for epoch in range(number_of_epochs):    
    print("--------- Epoch %d -----------" % epoch)
    random.shuffle(train_data)
    start_time = time.time()    
    
    #Train one sentence at a time (i.e. online training) to avoid padding of sentences
    cnt = 0
    for batch in iterate_minibatches(train_data):
        labels, tokens, casing = batch       
        model.train_on_batch([tokens, casing], labels)   
        cnt += 1
        
        if cnt % 100 == 0:
            print('Sentence: %d / %d' % (cnt, len(train_data)), end='\r')
    print("%.2f sec for training                 " % (time.time() - start_time))
    
    
    #Performance on dev dataset        
    predLabels, correctLabels = tag_dataset(dev_data)        
    pre_dev, rec_dev, f1_dev = BIOF1Validation.compute_f1(predLabels, correctLabels, idx2Label)
    print("Dev-Data: Prec: %.3f, Rec: %.3f, F1: %.3f" % (pre_dev, rec_dev, f1_dev))
    
    #Performance on test dataset       
    predLabels, correctLabels = tag_dataset(test_data)        
    pre_test, rec_test, f1_test= BIOF1Validation.compute_f1(predLabels, correctLabels, idx2Label)
    print("Test-Data: Prec: %.3f, Rec: %.3f, F1: %.3f" % (pre_test, rec_test, f1_test))
    
    print("%.2f sec for evaluation" % (time.time() - start_time))
    print("")
        
#


##################################
#
# Training of the Network
#
##################################


number_of_epochs = 10
minibatch_size = 64
print "%d epochs" % number_of_epochs
print "%d mini batches" % (len(train_x) / minibatch_size)


for epoch in xrange(number_of_epochs):
    start_time = time.time()

    # Train for 1 epoch
    model.fit(train_x, train_y_cat, nb_epoch=1, batch_size=minibatch_size, verbose=False, shuffle=True)
    print "%.2f sec for training" % (time.time() - start_time)

    # Compute precision, recall, F1 on dev & test data
    pre_dev, rec_dev, f1_dev = BIOF1Validation.compute_f1(model.predict_classes(dev_x, verbose=0), dev_y, idx2Label)
    pre_test, rec_test, f1_test = BIOF1Validation.compute_f1(
        model.predict_classes(test_x, verbose=0), test_y, idx2Label
    )

    print "%d epoch: F1 on dev: %f, F1 on test: %f" % (epoch + 1, f1_dev, f1_test)
for epoch in xrange(number_of_epochs):
    start_time = time.time()

    model.fit([train_sim_x, train_pos_x, train_x, train_case_x, train_freq_x],
              train_y_cat,
              nb_epoch=1,
              batch_size=minibatch_size,
              verbose=0,
              shuffle=False)
    #for batch in iterate_minibatches(train_x, train_y_cat, minibatch_size, shuffle=False):
    #    inputs, targets = batch
    #    model.train_on_batch(inputs, targets)

    print "%.2f sec for training" % (time.time() - start_time)

    pre_dev, rec_dev, f1_dev = BIOF1Validation.compute_f1(
        model.predict_classes(
            [dev_sim_x, dev_pos_x, dev_x, dev_case_x, dev_freq_x], verbose=0),
        dev_y, idx2Label)
    pre_test, rec_test, f1_test = BIOF1Validation.compute_f1(
        model.predict_classes(
            [test_sim_x, test_pos_x, test_x, test_case_x, test_freq_x],
            verbose=1), test_y, idx2Label)
    print test_y.shape[0]
    print "%d epoch: prec, rec, F1 on dev: %f %f %f, prec, rec, F1 on test: %f %f %f" % (
        epoch + 1, pre_dev, rec_dev, f1_dev, pre_test, rec_test, f1_test)
    #if epoch==stop_epoch:
    #for i in range(0, test_y.shape[0]):
    #print i, idx2Label[model.predict_classes([test_x, test_pos_x, test_case_x], verbose=0)[i]], idx2Label[test_y[i]]
Ejemplo n.º 4
0
for epoch in xrange(number_of_epochs):
    print "--------- Epoch %d -----------" % epoch
    random.shuffle(train_data)
    for startIdx in xrange(0, len(train_data), stepsize):
        start_time = time.time()
        for batch in iterate_minibatches(train_data, startIdx,
                                         startIdx + stepsize):
            labels, tokens, casing = batch
            model.train_on_batch([tokens, casing], labels)
        print "%.2f sec for training" % (time.time() - start_time)

        #Train Dataset
        start_time = time.time()
        predLabels, correctLabels = tag_dataset(train_data)
        pre_train, rec_train, f1_train = BIOF1Validation.compute_f1(
            predLabels, correctLabels, idx2Label)
        print "Train-Data: Prec: %.3f, Rec: %.3f, F1: %.3f" % (
            pre_train, rec_train, f1_train)

        #Dev Dataset
        predLabels, correctLabels = tag_dataset(dev_data)
        pre_dev, rec_dev, f1_dev = BIOF1Validation.compute_f1(
            predLabels, correctLabels, idx2Label)
        print "Dev-Data: Prec: %.3f, Rec: %.3f, F1: %.3f" % (pre_dev, rec_dev,
                                                             f1_dev)

        #Test Dataset
        predLabels, correctLabels = tag_dataset(test_data)
        pre_test, rec_test, f1_test = BIOF1Validation.compute_f1(
            predLabels, correctLabels, idx2Label)
        print "Test-Data: Prec: %.3f, Rec: %.3f, F1: %.3f" % (
        else:
            excerpt = slice(start_idx, start_idx + batchsize)
        yield inputs[excerpt], targets[excerpt]


number_of_epochs = 10
minibatch_size = 35
print "%d epochs" % number_of_epochs
print "%d mini batches" % (len(train_x) / minibatch_size)

for epoch in xrange(number_of_epochs):
    start_time = time.time()
    for batch in iterate_minibatches(train_x,
                                     train_y,
                                     minibatch_size,
                                     shuffle=True):
        inputs, targets = batch
        train_fn(inputs, targets)

    print "%.2f sec for training" % (time.time() - start_time)

    pre_dev, rec_dev, f1_dev = BIOF1Validation.compute_f1(
        predict_labels(dev_x), dev_y, idx2Label)
    pre_test, rec_test, f1_test = BIOF1Validation.compute_f1(
        predict_labels(test_x), test_y, idx2Label)

    print "%d epoch: F1 on dev: %f, F1 on test: %f" % (epoch + 1, f1_dev,
                                                       f1_test)

print "--DONE--"
Ejemplo n.º 6
0
#
# Training of the Network
#
##################################

number_of_epochs = 10
minibatch_size = 64
print "%d epochs" % number_of_epochs
print "%d mini batches" % (len(train_x) / minibatch_size)

for epoch in xrange(number_of_epochs):
    start_time = time.time()

    #Train for 1 epoch
    model.fit(train_x,
              train_y_cat,
              nb_epoch=1,
              batch_size=minibatch_size,
              verbose=False,
              shuffle=True)
    print "%.2f sec for training" % (time.time() - start_time)

    # Compute precision, recall, F1 on dev & test data
    pre_dev, rec_dev, f1_dev = BIOF1Validation.compute_f1(
        model.predict_classes(dev_x, verbose=0), dev_y, idx2Label)
    pre_test, rec_test, f1_test = BIOF1Validation.compute_f1(
        model.predict_classes(test_x, verbose=0), test_y, idx2Label)

    print "%d epoch: F1 on dev: %f, F1 on test: %f" % (epoch + 1, f1_dev,
                                                       f1_test)
##################################

number_of_epochs = 10
minibatch_size = 128
print("%d epochs" % number_of_epochs)


def predict_classes(prediction):
    return prediction.argmax(axis=-1)


for epoch in range(number_of_epochs):
    print("\n------------- Epoch %d ------------" % (epoch + 1))
    model.fit([train_tokens, train_case],
              train_y,
              epochs=1,
              batch_size=minibatch_size,
              verbose=True,
              shuffle=True)

    # Compute precision, recall, F1 on dev & test data
    pre_dev, rec_dev, f1_dev = BIOF1Validation.compute_f1(
        predict_classes(model.predict([dev_tokens, dev_case])), dev_y,
        idx2Label)
    pre_test, rec_test, f1_test = BIOF1Validation.compute_f1(
        predict_classes(model.predict([test_tokens, test_case])), test_y,
        idx2Label)

    print("%d. epoch: F1 on dev: %f, F1 on test: %f" %
          (epoch + 1, f1_dev, f1_test))
        yield inputs[excerpt], targets[excerpt]
        

        
number_of_epochs = 10
minibatch_size = 35
print "%d epochs" % number_of_epochs
print "%d mini batches" % (len(train_x)/minibatch_size)

for epoch in xrange(number_of_epochs):    
    start_time = time.time()
    for batch in iterate_minibatches(train_x, train_y, minibatch_size, shuffle=True):
        inputs, targets = batch
        train_fn(inputs, targets)              
    
    print "%.2f sec for training" % (time.time() - start_time)

    pre_dev, rec_dev, f1_dev = BIOF1Validation.compute_f1(predict_labels(dev_x), dev_y, idx2Label)
    pre_test, rec_test, f1_test = BIOF1Validation.compute_f1(predict_labels(test_x), test_y, idx2Label)

    print "%d epoch: F1 on dev: %f, F1 on test: %f" % (epoch+1, f1_dev, f1_test)
    


print "--DONE--"
            
    
    
    
        
Ejemplo n.º 9
0
print train_tokens.shape[1], ' train dimension'
print test_tokens.shape[0], ' test samples'



##################################
#
# Training of the Network
#
##################################


        
number_of_epochs = 10
minibatch_size = 128
print "%d epochs" % number_of_epochs

 
for epoch in xrange(number_of_epochs):
    print "\n------------- Epoch %d ------------" % (epoch+1)
    model.fit([train_tokens, train_case], train_y_cat, nb_epoch=1, batch_size=minibatch_size, verbose=True, shuffle=True)   
    
    
    # Compute precision, recall, F1 on dev & test data
    pre_dev, rec_dev, f1_dev = BIOF1Validation.compute_f1(model.predict_classes([dev_tokens, dev_case], verbose=0), dev_y, idx2Label)
    pre_test, rec_test, f1_test = BIOF1Validation.compute_f1(model.predict_classes([test_tokens, test_case], verbose=0), test_y, idx2Label)

    print "%d epoch: F1 on dev: %f, F1 on test: %f" % (epoch+1, f1_dev, f1_test)
    
  
Ejemplo n.º 10
0
preprocessing_lite.addCasingInformation(trainSentences)
preprocessing_lite.addCharInformation(goldSentences)
preprocessing_lite.addCasingInformation(goldSentences)

guessedMatrirx = preprocessing_lite.createMatrices(trainSentences, mappings,
                                                   False)
goldMatrirx = preprocessing_lite.createMatrices(goldSentences, mappings, False)

correctLabels = [goldMatrirx[idx][labelKey] for idx in range(len(goldMatrirx))]
predictLabels = [
    guessedMatrirx[idx][labelKey] for idx in range(len(guessedMatrirx))
]

encodingScheme = labelKey[labelKey.index('_') + 1:]

pre, rec, f1 = BIOF1Validation.compute_f1(predictLabels, correctLabels,
                                          idx2Labels, 'B', encodingScheme)

print("Test-Data: Prec: %.4f, Rec: %.4f, F1: %.4f" % (pre, rec, f1))

# from index to labales
label_pred = []
for sentence in predictLabels:
    for element in sentence:
        label_pred.append(idx2Labels[element])

label_correct = []
for sentence in correctLabels:
    for element in sentence:
        label_correct.append(idx2Labels[element])

# use functions from conll script