for epoch in range(number_of_epochs): print("--------- Epoch %d -----------" % epoch) random.shuffle(train_data) start_time = time.time() #Train one sentence at a time (i.e. online training) to avoid padding of sentences cnt = 0 for batch in iterate_minibatches(train_data): labels, tokens, casing = batch model.train_on_batch([tokens, casing], labels) cnt += 1 if cnt % 100 == 0: print('Sentence: %d / %d' % (cnt, len(train_data)), end='\r') print("%.2f sec for training " % (time.time() - start_time)) #Performance on dev dataset predLabels, correctLabels = tag_dataset(dev_data) pre_dev, rec_dev, f1_dev = BIOF1Validation.compute_f1(predLabels, correctLabels, idx2Label) print("Dev-Data: Prec: %.3f, Rec: %.3f, F1: %.3f" % (pre_dev, rec_dev, f1_dev)) #Performance on test dataset predLabels, correctLabels = tag_dataset(test_data) pre_test, rec_test, f1_test= BIOF1Validation.compute_f1(predLabels, correctLabels, idx2Label) print("Test-Data: Prec: %.3f, Rec: %.3f, F1: %.3f" % (pre_test, rec_test, f1_test)) print("%.2f sec for evaluation" % (time.time() - start_time)) print("")
# ################################## # # Training of the Network # ################################## number_of_epochs = 10 minibatch_size = 64 print "%d epochs" % number_of_epochs print "%d mini batches" % (len(train_x) / minibatch_size) for epoch in xrange(number_of_epochs): start_time = time.time() # Train for 1 epoch model.fit(train_x, train_y_cat, nb_epoch=1, batch_size=minibatch_size, verbose=False, shuffle=True) print "%.2f sec for training" % (time.time() - start_time) # Compute precision, recall, F1 on dev & test data pre_dev, rec_dev, f1_dev = BIOF1Validation.compute_f1(model.predict_classes(dev_x, verbose=0), dev_y, idx2Label) pre_test, rec_test, f1_test = BIOF1Validation.compute_f1( model.predict_classes(test_x, verbose=0), test_y, idx2Label ) print "%d epoch: F1 on dev: %f, F1 on test: %f" % (epoch + 1, f1_dev, f1_test)
for epoch in xrange(number_of_epochs): start_time = time.time() model.fit([train_sim_x, train_pos_x, train_x, train_case_x, train_freq_x], train_y_cat, nb_epoch=1, batch_size=minibatch_size, verbose=0, shuffle=False) #for batch in iterate_minibatches(train_x, train_y_cat, minibatch_size, shuffle=False): # inputs, targets = batch # model.train_on_batch(inputs, targets) print "%.2f sec for training" % (time.time() - start_time) pre_dev, rec_dev, f1_dev = BIOF1Validation.compute_f1( model.predict_classes( [dev_sim_x, dev_pos_x, dev_x, dev_case_x, dev_freq_x], verbose=0), dev_y, idx2Label) pre_test, rec_test, f1_test = BIOF1Validation.compute_f1( model.predict_classes( [test_sim_x, test_pos_x, test_x, test_case_x, test_freq_x], verbose=1), test_y, idx2Label) print test_y.shape[0] print "%d epoch: prec, rec, F1 on dev: %f %f %f, prec, rec, F1 on test: %f %f %f" % ( epoch + 1, pre_dev, rec_dev, f1_dev, pre_test, rec_test, f1_test) #if epoch==stop_epoch: #for i in range(0, test_y.shape[0]): #print i, idx2Label[model.predict_classes([test_x, test_pos_x, test_case_x], verbose=0)[i]], idx2Label[test_y[i]]
for epoch in xrange(number_of_epochs): print "--------- Epoch %d -----------" % epoch random.shuffle(train_data) for startIdx in xrange(0, len(train_data), stepsize): start_time = time.time() for batch in iterate_minibatches(train_data, startIdx, startIdx + stepsize): labels, tokens, casing = batch model.train_on_batch([tokens, casing], labels) print "%.2f sec for training" % (time.time() - start_time) #Train Dataset start_time = time.time() predLabels, correctLabels = tag_dataset(train_data) pre_train, rec_train, f1_train = BIOF1Validation.compute_f1( predLabels, correctLabels, idx2Label) print "Train-Data: Prec: %.3f, Rec: %.3f, F1: %.3f" % ( pre_train, rec_train, f1_train) #Dev Dataset predLabels, correctLabels = tag_dataset(dev_data) pre_dev, rec_dev, f1_dev = BIOF1Validation.compute_f1( predLabels, correctLabels, idx2Label) print "Dev-Data: Prec: %.3f, Rec: %.3f, F1: %.3f" % (pre_dev, rec_dev, f1_dev) #Test Dataset predLabels, correctLabels = tag_dataset(test_data) pre_test, rec_test, f1_test = BIOF1Validation.compute_f1( predLabels, correctLabels, idx2Label) print "Test-Data: Prec: %.3f, Rec: %.3f, F1: %.3f" % (
else: excerpt = slice(start_idx, start_idx + batchsize) yield inputs[excerpt], targets[excerpt] number_of_epochs = 10 minibatch_size = 35 print "%d epochs" % number_of_epochs print "%d mini batches" % (len(train_x) / minibatch_size) for epoch in xrange(number_of_epochs): start_time = time.time() for batch in iterate_minibatches(train_x, train_y, minibatch_size, shuffle=True): inputs, targets = batch train_fn(inputs, targets) print "%.2f sec for training" % (time.time() - start_time) pre_dev, rec_dev, f1_dev = BIOF1Validation.compute_f1( predict_labels(dev_x), dev_y, idx2Label) pre_test, rec_test, f1_test = BIOF1Validation.compute_f1( predict_labels(test_x), test_y, idx2Label) print "%d epoch: F1 on dev: %f, F1 on test: %f" % (epoch + 1, f1_dev, f1_test) print "--DONE--"
# # Training of the Network # ################################## number_of_epochs = 10 minibatch_size = 64 print "%d epochs" % number_of_epochs print "%d mini batches" % (len(train_x) / minibatch_size) for epoch in xrange(number_of_epochs): start_time = time.time() #Train for 1 epoch model.fit(train_x, train_y_cat, nb_epoch=1, batch_size=minibatch_size, verbose=False, shuffle=True) print "%.2f sec for training" % (time.time() - start_time) # Compute precision, recall, F1 on dev & test data pre_dev, rec_dev, f1_dev = BIOF1Validation.compute_f1( model.predict_classes(dev_x, verbose=0), dev_y, idx2Label) pre_test, rec_test, f1_test = BIOF1Validation.compute_f1( model.predict_classes(test_x, verbose=0), test_y, idx2Label) print "%d epoch: F1 on dev: %f, F1 on test: %f" % (epoch + 1, f1_dev, f1_test)
################################## number_of_epochs = 10 minibatch_size = 128 print("%d epochs" % number_of_epochs) def predict_classes(prediction): return prediction.argmax(axis=-1) for epoch in range(number_of_epochs): print("\n------------- Epoch %d ------------" % (epoch + 1)) model.fit([train_tokens, train_case], train_y, epochs=1, batch_size=minibatch_size, verbose=True, shuffle=True) # Compute precision, recall, F1 on dev & test data pre_dev, rec_dev, f1_dev = BIOF1Validation.compute_f1( predict_classes(model.predict([dev_tokens, dev_case])), dev_y, idx2Label) pre_test, rec_test, f1_test = BIOF1Validation.compute_f1( predict_classes(model.predict([test_tokens, test_case])), test_y, idx2Label) print("%d. epoch: F1 on dev: %f, F1 on test: %f" % (epoch + 1, f1_dev, f1_test))
yield inputs[excerpt], targets[excerpt] number_of_epochs = 10 minibatch_size = 35 print "%d epochs" % number_of_epochs print "%d mini batches" % (len(train_x)/minibatch_size) for epoch in xrange(number_of_epochs): start_time = time.time() for batch in iterate_minibatches(train_x, train_y, minibatch_size, shuffle=True): inputs, targets = batch train_fn(inputs, targets) print "%.2f sec for training" % (time.time() - start_time) pre_dev, rec_dev, f1_dev = BIOF1Validation.compute_f1(predict_labels(dev_x), dev_y, idx2Label) pre_test, rec_test, f1_test = BIOF1Validation.compute_f1(predict_labels(test_x), test_y, idx2Label) print "%d epoch: F1 on dev: %f, F1 on test: %f" % (epoch+1, f1_dev, f1_test) print "--DONE--"
print train_tokens.shape[1], ' train dimension' print test_tokens.shape[0], ' test samples' ################################## # # Training of the Network # ################################## number_of_epochs = 10 minibatch_size = 128 print "%d epochs" % number_of_epochs for epoch in xrange(number_of_epochs): print "\n------------- Epoch %d ------------" % (epoch+1) model.fit([train_tokens, train_case], train_y_cat, nb_epoch=1, batch_size=minibatch_size, verbose=True, shuffle=True) # Compute precision, recall, F1 on dev & test data pre_dev, rec_dev, f1_dev = BIOF1Validation.compute_f1(model.predict_classes([dev_tokens, dev_case], verbose=0), dev_y, idx2Label) pre_test, rec_test, f1_test = BIOF1Validation.compute_f1(model.predict_classes([test_tokens, test_case], verbose=0), test_y, idx2Label) print "%d epoch: F1 on dev: %f, F1 on test: %f" % (epoch+1, f1_dev, f1_test)
preprocessing_lite.addCasingInformation(trainSentences) preprocessing_lite.addCharInformation(goldSentences) preprocessing_lite.addCasingInformation(goldSentences) guessedMatrirx = preprocessing_lite.createMatrices(trainSentences, mappings, False) goldMatrirx = preprocessing_lite.createMatrices(goldSentences, mappings, False) correctLabels = [goldMatrirx[idx][labelKey] for idx in range(len(goldMatrirx))] predictLabels = [ guessedMatrirx[idx][labelKey] for idx in range(len(guessedMatrirx)) ] encodingScheme = labelKey[labelKey.index('_') + 1:] pre, rec, f1 = BIOF1Validation.compute_f1(predictLabels, correctLabels, idx2Labels, 'B', encodingScheme) print("Test-Data: Prec: %.4f, Rec: %.4f, F1: %.4f" % (pre, rec, f1)) # from index to labales label_pred = [] for sentence in predictLabels: for element in sentence: label_pred.append(idx2Labels[element]) label_correct = [] for sentence in correctLabels: for element in sentence: label_correct.append(idx2Labels[element]) # use functions from conll script