def execute_demo(language): data = Dataset(language) print("{}: {} training - {} test".format(language, len(data.trainset), len(data.testset))) baseline = Baseline(language) word_frequence = baseline.word_frequences(data.trainset) char_frequence = baseline.char_frequence(data.trainset) lengh_trainset = baseline.lengh_trainset(data.trainset) bigram_counts_word = baseline.bigram_counts_word(data.trainset) pos_dictionary = baseline.pos_dictionary(data.trainset) lengh_char = baseline.lengh_char(data.trainset) bigram_counts_char = baseline.bigram_counts_char(data.trainset) baseline.train(data.trainset, word_frequence, pos_dictionary, bigram_counts_word, lengh_trainset, char_frequence, lengh_char, bigram_counts_char) predictions = baseline.test(data.testset, word_frequence, pos_dictionary, bigram_counts_word, lengh_trainset, char_frequence, lengh_char, bigram_counts_char) gold_labels = [sent['gold_label'] for sent in data.testset] report_score(gold_labels, predictions)
def execute_demo(language): data = Dataset(language) print("{}: {} training - {} Test\n".format(language.upper(), len(data.trainset), len(data.devset))) #for sent in data.trainset: # print(sent['sentence'], sent['target_word'], sent['gold_label']) baseline = Baseline(language, type='classify') baseline.train(data.trainset) predictions = baseline.test(data.devset) gold_labels = [sent['gold_label'] for sent in data.devset] report_score(gold_labels, predictions, detailed=True) ########################### Regression ###################33 baseline2 = Baseline(language, type='regression') baseline2.train(data.trainset) predictions = baseline2.test(data.devset) gold_labels2 = [float(sent['gold_prob']) for sent in data.devset] print("Probabilistic classification task:\nMSE:", mean_squared_error(gold_labels2, predictions), "\n\n")
def execute_demo(language): if language == 'english': word_emb = load_word_embeddings('english') elif language == 'spanish': word_emb = load_word_embeddings('spanish') data = Dataset(language) print("{}: {} training - {} dev".format(language, len(data.trainset), len(data.devset))) #for sent in data.trainset: # Gold label -> 0 if the word is not complex, 1 if the word is complex. #print(sent['sentence'], sent['target_word'], sent['gold_label']) baseline = Baseline(language) model = Model(language) model.train(data.trainset, word_emb) predictions = model.test(data.devset, word_emb) gold_labels = [sent['gold_label'] for sent in data.devset] report_score(gold_labels, predictions)
def execute_demo(language): data = Dataset(language) print("{}: {} training - {} dev".format(language, len(data.trainset), len(data.devset))) # for sent in data.trainset: # print(sent['sentence'], sent['target_word'], sent['gold_label']) baseline = Baseline(language) baseline.train(data.trainset, data.bigram_dic) predictions = baseline.test(data.devset, data.bigram_dic) gold_labels = [sent['gold_label'] for sent in data.devset] report_score(gold_labels, predictions) print("{} test".format(language)) predictions = baseline.test(data.testset, data.bigram_dic) gold_labels = [sent['gold_label'] for sent in data.testset] report_score(gold_labels, predictions)
def execute_demo(language): data = Dataset(language) if test == True: print("{}: {} training - {} dev".format(language, len(data.trainset), len(data.testset))) else: print("{}: {} training - {} dev".format(language, len(data.trainset), len(data.devset))) if Base == True: baseline = Baseline(language) else: baseline = MyLine(language) baseline.train(data.trainset) if test == True: predictions = baseline.test(data.testset) gold_labels = [sent['gold_label'] for sent in data.testset] else: predictions = baseline.test(data.devset) gold_labels = [sent['gold_label'] for sent in data.devset] report_score(gold_labels, predictions)
def execute_demo(language, flag): data = Dataset(language) if flag == 0: print("{}: {} training - {} dev".format(language, len(data.trainset), len(data.devset)) ) #data.trainset 是dataset函数内返回的dataset的形式 data.devset用来测试用的 if flag == 1: print("{}: {} training - {} test".format(language, len(data.trainset), len(data.testset))) # for sent in data.trainset: # # print(sent['sentence'], sent['target_word'], sent['gold_label']) # print(sent) baseline = Baseline(language) baseline.train(data.trainset) predictions_devset = baseline.test(data.devset) predictions_testset = baseline.test(data.testset) gold_labels_devset = [sent['gold_label'] for sent in data.devset] ##输出的是二元值 0 1 0 1形式的 gold_labels_testset = [sent['gold_label'] for sent in data.testset] if flag == 0: print("Test by using dev set:") report_score(gold_labels_devset, predictions_devset) if flag == 1: print("Test by using test set:") report_score(gold_labels_testset, predictions_testset)
def execute_demo(language): data = Dataset(language) print("{}: {} training - {} test".format(language, len(data.trainset), len(data.testset))) # for sent in data.trainset: # print(sent['target_word'])#sent['sentence'], sent['target_word'], sent['gold_label']) baseline = Baseline(language) baseline.train(data.trainset) predictions_dev = baseline.test(data.devset) predictions_test = baseline.test(data.testset) gold_labels_dev = [sent['gold_label'] for sent in data.devset] gold_labels_test = [sent['gold_label'] for sent in data.testset] print("DEV result:") report_score(gold_labels_dev, predictions_dev, detailed=True) print("TEST result:") report_score(gold_labels_test, predictions_test, detailed=True)
def word_identifier(language): data = Dataset(language) print("{}: {} training - {} dev".format(language, len(data.trainset), len(data.testset))) # for sent in data.trainset: # print(sent['sentence'], sent['target_word'], sent['gold_label']) #Define gold labels dev_gold_labels = [sent['gold_label'] for sent in data.devset] test_gold_labels = [sent['gold_label'] for sent in data.testset] train_gold_labels = [sent['gold_label'] for sent in data.trainset] train_data_size = len(data.trainset) #define using of features for improved systems features = [ 'chars_len', 'tokens_len', 'vowels_len', 'first_upper', 'word_frequency' ] LR_classifier = LR(language, features) LR_classifier.train(data.trainset) LR_predictions = LR_classifier.test(data.testset) report_score(test_gold_labels, LR_predictions, True) SVM_classifier = SVM(language, features) SVM_classifier.train(data.trainset) SVM_predictions = SVM_classifier.test(data.testset) report_score(test_gold_labels, SVM_predictions, True) ''' scores = [] data_scale = 0 while True: data_scale += 1000 if train_data_size <data_scale: data_scale = train_data_size data_set = data.trainset else: data_set = data.trainset[0:data_scale] TB_baseline = WordLength(language) if language == 'english': length = 8 elif language == 'spanish': length =10 TB_predictions = TB_baseline.test(data.devset,length) fscore = report_score(dev_gold_labels, TB_predictions) scores.append((data_scale,fscore)) if data_scale == train_data_size: break TB_baseline_results[language] = np.asarray(scores) ''' '''
def execute_demo(language): data = Dataset(language) baseline = Baseline(language) baseline.train(data.trainset, data.unigram, data.suffix, data.char_trigram, data.pos, data.dep, data.shape, data.frequency) predictions = baseline.test(data.testset) gold_labels = [sent['gold_label'] for sent in data.testset] report_score(gold_labels,predictions)
def execute_demo(language, size=0): data = Dataset(language) if size: data.trainset = data.trainset[0:size] print("{}: {} training - {} dev - {} test".format(language, len(data.trainset), len(data.devset), len(data.testset))) improved = Improved(language) improved.train(data.trainset) predictions_dev = improved.test(data.devset) predictions_test = improved.test(data.testset) gold_labels_dev = [sent['gold_label'] for sent in data.devset] gold_labels_test = [sent['gold_label'] for sent in data.testset] if size: print("dev score size = " + str(size)) report_score(gold_labels_dev, predictions_dev) print("test score size = " + str(size)) report_score(gold_labels_test, predictions_test) print('-' * 50) else: print("dev score") report_score(gold_labels_dev, predictions_dev) print("test score") report_score(gold_labels_test, predictions_test) print('-' * 50)
def execute_demo(language): data = Dataset(language) trainset_small = [] for i in range(int(5*len(data.trainset)/5)): trainset_small.append(data.trainset[i]) # trainset_small = [] # for i in range(1000): # trainset_small.append(data.trainset[i]) # ***** Improved system ***** # system = ImprovedSys(language) if language == 'english': BoW, lexicon = system.create_engBoWLexicon_wiki() # BoW = system.create_BoW_data(data.trainset, data.devset, data.testset) # BoW = system.create_engBoW_brown() # lexicon = system.create_engLexicon_original(data.tnewsset, data.twikinewsset, data.twikiset) # pos_weight = system.create_posWeight(data.trainset) # aoa_dict, mean_rate = system.create_aoaDict() system.train_eng(trainset_small, BoW, lexicon) dev_predictions = system.test_eng(data.devset, BoW, lexicon) test_predictions = system.test_eng(data.testset, BoW, lexicon) if language == 'spanish': BoW, lexicon = system.create_espBoWLexicon() # BoW = system.create_BoW_data(data.trainset, data.devset, data.testset) # BoW = system.create_espBoW_wiki() # pos_weight = system.create_posWeight(data.trainset) system.train_esp(trainset_small, BoW, lexicon) dev_predictions = system.test_esp(data.devset, BoW, lexicon) test_predictions = system.test_esp(data.testset, BoW, lexicon) dev_gold_labels = [sent['gold_label'] for sent in data.devset] test_gold_labels = [sent['gold_label'] for sent in data.testset] print("{}: {} training - {} dev".format(language, len(trainset_small), len(data.devset))) report_score(dev_gold_labels, dev_predictions) print("{}: {} training - {} test".format(language, len(trainset_small), len(data.testset))) report_score(test_gold_labels, test_predictions)
def execute_demo(language): data = Dataset(language) print("{}: {} training - {} dev".format(language, len(data.trainset), len(data.devset))) # for sent in data.trainset: # print(sent['sentence'], sent['target_word'], sent['gold_label']) #for baselineLSTM # baselineLSTM = BaselineLSTM(language) # baselineLSTM.train(data.trainset) # predictions = baselineLSTM.test(data.devset) #for baselineNew # baselineNew = BaselineNew(language) # baselineNew.train(data.trainset) # predictions = baselineNew.test(data.devset) #for baseline # baseline = Baseline(language) # baseline.train(data.trainset) # predictions = baseline.test(data.devset) #for baselineTF # baselineTF = BaselineTf(language) # baselineTF.train(data.trainset) # predictions = baselineTF.test(data.devset) #for baselineRNN baselineRNN = BaselineRNN(language) baselineRNN.train(data.trainset) predictions = baselineRNN.test(data.devset) #gold_label is the binary result gold_labels = [sent['gold_label'] for sent in data.devset] report_score(gold_labels, predictions)
def execute(language): data = Dataset(language) instance = CWI(language) # baseline = Baseline(language) print("{}: {} training - {} dev - {} test".format(language, len(data.trainset), len(data.devset), len(data.testset))) instance.train(data.trainset) predictions = instance.test(data.testset) # baseline.train(data.trainset) # predBaseline = baseline.test(data.testset) gold_labels = [sent['gold_label'] for sent in data.testset] accuracy = numpy.cumsum([ prediction == sent for sent, prediction in zip(gold_labels, predictions) ]) / range(1, len(data.testset) + 1) # accuracy = numpy.cumsum([ prediction == sent for sent, prediction in zip(gold_labels, predBaseline) ]) / range(1,len(data.testset)+1) print("For", language, "language:") report_score(gold_labels, predictions) # report_score(gold_labels, predBaseline) plt.figure("Learning graphs" ) #Creates the plot and set the title to Learning Graphs graph = plt.subplot2grid((1, 1), (0, 0)) #Create subplot in 0 coordinate title = "Learning Rate for Complex Words Identification of " + language #Define title graph.set_title(title) #Set the title of plot graph.plot(100. * accuracy[10:], 'g-', label="Accuracy") #Plot line for accuracy graph.set_yscale('linear') #set y scale linear graph.set_ylabel('Accuracy') #Set y label Accuracy graph.set_xscale('linear') #Set x scale linear graph.set_xlabel('Iterations') #Set x label Iterations legend = plt.legend(loc='upper right') #Declare the position of legends for label in legend.get_texts(): label.set_fontsize('small') #setting font size of label to small for label in legend.get_lines(): label.set_linewidth(1) #Setting line width of legend to 1 plt.grid(True) plt.show()
def execute_demo(language, algor): data = Dataset(language) print("{}: {} training - {} dev".format(language, len(data.trainset), len(data.testset))) baseline = Baseline(language, algor) freqdict1 = baseline.freqdict(data.trainset + data.testset) posindex1 = baseline.posdict(data.trainset + data.testset) baseline.train(data.trainset, freqdict1, posindex1) predictions = baseline.test(data.testset, freqdict1, posindex1) gold_labels = [sent['gold_label'] for sent in data.testset] report_score(gold_labels, predictions)
def execute_demo(language, is_baseline = True, use_test = False): data = Dataset(language) model = Model(language, is_baseline) model.train(data.trainset) if is_baseline: mod = "baseline" else: mod = "final" if use_test: print("Evaluating {} model on {} using Test set".format(mod, language)) predictions = model.test(data.testset) gold_labels = [sent['gold_label'] for sent in data.testset] print("{} instances of training data, {} instances of evaluation data".format(len(data.trainset), len(data.testset))) else: print("Evaluating {} model on {} using Development set".format(mod, language)) predictions = model.test(data.devset) gold_labels = [sent['gold_label'] for sent in data.devset] print("{} instances of training data, {} instances of evaluation data".format(len(data.trainset), len(data.devset))) report_score(gold_labels, predictions, detailed = False)
def execute_demo(language): data = Dataset(language) print("{}: {} training - {} test".format(language, len(data.trainset), len(data.testset))) # for sent in data.trainset: # print(sent['sentence'], sent['target_word'], sent['gold_label']) baseline = Baseline(language) baseline.train(data.trainset) predictions = baseline.test(data.testset) gold_labels = [sent['gold_label'] for sent in data.testset] report_score(gold_labels, predictions, True) svm = SVM(language) svm.train(data.trainset) predictions2 = svm.test(data.testset) report_score(gold_labels, predictions2, True)
def execute_demo(language): data = Dataset(language) print("{}: {} training - {} dev - {} test".format(language, len(data.trainset), len(data.devset), len(data.testset))) baseline = Baseline(language) baseline.train(data.trainset) dev = baseline.test(data.devset) devLabels = [sent['gold_label'] for sent in data.devset] print("Fine-tuned Score - Dev Set") report_score(devLabels, dev, detailed=True) predictions = baseline.test(data.testset) gold_labels = [sent['gold_label'] for sent in data.testset] print("Final Score - Test Set") report_score(gold_labels, predictions, detailed=True)
def execute_improve(language): data = Dataset(language) print("{}: {} training - {} dev - {} test".format(language, len(data.trainset), len(data.devset), len(data.testset))) improved = Improved(language) improved.train(data.trainset) dev = improved.test(data.devset) devLabels = [sent['gold_label'] for sent in data.devset] print("Fine-tuned Score") report_score(devLabels, dev, detailed=True) prediction = improved.test(data.testset) gold_label = [sent['gold_label'] for sent in data.testset] print("Final Score") report_score(gold_label, prediction, detailed=True)
def execute_demo(language): data = Dataset(language) print("{}: {} training - {} test".format(language, len(data.trainset), len(data.devset))) #baseline = Baseline(language)#, data.trainset) #baseline.train(data.trainset)#,data.devset) baseline = Improved_system(language, data.trainset) baseline.train(data.trainset, data.devset) predictions = baseline.test(data.devset) gold_labels = [sent['gold_label'] for sent in data.devset] report_score(gold_labels, predictions) predictions = baseline.test(data.testset) gold_labels = [sent['gold_label'] for sent in data.testset] report_score(gold_labels, predictions)
def execute_demo(language, amountdata=100): data = Dataset(language, amountdata) print("{}: {} training - {} dev".format(language, len(data.trainset), len(data.devset))) print('\nInitialising') baseline = Baseline(language) improved = Improved(language) print('Training') baseline.train(data.trainset) improved.train(data.trainset) print('Predicting') predictions = baseline.test(data.devset) predictionImp = improved.test(data.devset) gold_labels = [sent['gold_label'] for sent in data.devset] target = [sent['target_word'] for sent in data.devset] print("\nScore for baseline:") report_score(gold_labels, predictions) print("Score for improved model:") report_score(gold_labels, predictionImp) print('Predicting on testset') predictions2 = baseline.test(data.testset) predictionImp2 = improved.test(data.testset) gold_labels2 = [sent['gold_label'] for sent in data.testset] target2 = [sent['target_word'] for sent in data.testset] print("\nScore for baseline:") report_score(gold_labels2, predictions2) print("Score for improved model:") report_score(gold_labels2, predictionImp2) results = [(predictions[i], predictionImp[i], gold_labels[i], target[i]) for i in range(len(target))] ####to show wrong predictions results = [tup for tup in results if tup[0] != tup[2] and tup[1] != tup[2]] results2 = [(predictions2[i], predictionImp2[i], gold_labels2[i], target2[i]) for i in range(len(target2))] return results, results2
def execute_system(language, modelName, featureSet): data = Dataset(language) print("{}: {} training - {} test".format(language, len(data.trainset), len(data.testset))) print("Features: {}".format(featureSet)) print("Model: {}".format(modelName)) system = System(language, modelName, featureSet) print("Training...") system.train(data.trainset) print("Testing...") predictions = system.test(data.testset) gold_labels = [sent['gold_label'] for sent in data.testset] score = report_score(gold_labels, predictions, detailed=True)
def execute_demo(language): data = Dataset(language) print("{}: {} training - {} dev".format(language, len(data.trainset), len(data.devset))) # trainset = data.trainset[:int(len(data.trainset)*1/100)] print('Feature based models') baseline = Baseline(language) print('Training models') baseline.train(data.trainset) # baseline.train(trainset) print('Predicting labels') predictions = baseline.test(data.devset) predictions_int =[] for pred in predictions: pred_int = [] for val in pred[1]: pred_int.append(int(val)) predictions_int.append(pred_int) gold_labels = [sent['gold_label'] for sent in data.devset] # target_words = [sent['target_word'] for sent in data.devset] print('Calculating scores') for pred in predictions: print('Scores for' ,pred[0]) report_score(gold_labels, pred[1]) print('Scores for hard voting with all models') avg_pred_int = np.mean(np.array(predictions_int), axis = 0).tolist() avg_pred = [str(round(val)) for val in avg_pred_int] report_score(gold_labels, avg_pred) # Woed2vec based models print('Word2vec based models') print('Loading w2v') w2v = Word2vec(language) print('Training models') w2v.train(data.trainset) # w2v.train(trainset) print('Predicting labels') predictions_w2v = w2v.test(data.devset) predictions_w2v_int =[] for pred in predictions_w2v: pred_int = [] for val in pred[1]: pred_int.append(int(val)) predictions_w2v_int.append(pred_int) print('Calculating scores') for pred in predictions_w2v: print('Scores for' ,pred[0]) report_score(gold_labels, pred[1]) print('Scores for hard voting with all models') avg_pred_w2v_int = np.mean(np.array(predictions_w2v_int), axis = 0).tolist() avg_pred_w2v = [str(round(val)) for val in avg_pred_w2v_int] report_score(gold_labels, avg_pred_w2v) for pred in predictions: pred_int = [] for val in pred[1]: pred_int.append(int(val)) predictions_w2v_int.append(pred_int) print('Scores for hard voting with both types of models') avg_pred_all_int = np.mean(np.array(predictions_w2v_int), axis = 0).tolist() avg_pred_all = [str(round(val)) for val in avg_pred_all_int] report_score(gold_labels, avg_pred_all)
elif model == 0: # to skip the language continue ####################################################################### scheme.train(data.trainset) predictions = scheme.test(dataSet) gold_labels = [sent['gold_label'] for sent in dataSet] # collect data for the training rate graph gmodel = language + str(model) + dataSetName graphData[gmodel] = cumsum([ pred == sent['gold_label'] for sent, pred in zip(dataSet, predictions) ]) / range(1,len(dataSet)+1) # collect and print results print("Using model", model) macroF1, accuracy = report_score(gold_labels, predictions) results.append([model, macroF1, 100.*accuracy, language, dataSetName]) # log failures model = language + str(model) + dataSetName failures[model] = [ (sent['gold_label'], sent['target_word']) for pred, sent in zip(predictions, dataSet) if pred != sent['gold_label'] ] failures[model] = sorted(failures[model], key=lambda x: x[0]) # sort alphabetically # print training rate graphs from pylab import * for language in languages: if language == 'english':
test_data_set = DataSet(path="data", bodies="train_bodies.csv", stances="test_stances.csv") test_segments = segmentize_dataset(test_data_set) entries = zip_segments(test_segments) test_classifications = [] stance_features = [] predictions = [] for entry in tqdm(entries[:TESTING_SIZE]): headline, body, classification = entry prediction = classifier.predict(headline, body) predictions.append(prediction) if prediction != classification: logging.debug("Headline: {0}\n".format(headline)) logging.debug("Body: {0}\n".format(body)) logging.debug("correct: {0}, predicted: {1}\n\n\n".format( classification, prediction)) test_classifications.append(classification) hits = 0 results = zip(predictions, test_classifications) for result in results: p, tc = result if p == tc: hits += 1 print("Percentage correct: {0}%".format(float(hits) / float(len(results)))) score = Scorer.report_score(test_classifications, predictions) print(score)
def execute_sys(language): data = Dataset(language) print("{}: {} training - {} dev".format(language, len(data.trainset), len(data.testset))) ### feature selection training = [] suffix = {} vowels_combo = {} pos_tags = {} chars = {} bigrams = {} trigrams = {} vowels = ['a', 'e', 'i', 'o', 'u', 'á', 'é', 'í', 'ó', 'ú'] for sent in data.trainset: training.append((sent['target_word'], sent['gold_label'])) tokenised = sent['target_word'].split(' ') for wd in tokenised: ### vowels features target = wd.lower() temp_combo = '' for char in target: if char in vowels: temp_combo += char elif len(temp_combo) > 0: vowels_combo[temp_combo] = 0 temp_combo = '' ### suffix features suffix[target[-3:]] = 0 try: tag = nltk.pos_tag(nltk.word_tokenize(wd))[0][1] pos_tags[tag] = 0 except IndexError: pass for i in range(len(target)): chars[target[i]] = 0 ### char bigram for i in range(len(target) - 1): bigrams[target[i] + target[i + 1]] = 0 ### char trigram for i in range(len(target) - 2): trigrams[target[i] + target[i + 1] + target[i + 2]] = 0 vowels_combo_list = list(vowels_combo.keys()) suffix_to_list = list(suffix.keys()) suffix_list_len3 = [s for s in suffix_to_list if len(s) == 3] sys_run = System(language, Baseline_run=False, vowels=False, v_list=vowels_combo_list, syllables=False, upper=True, suffix=False, s_list=suffix_list_len3, vc_ratio=False, pos=False, pos_dict=pos_tags, all_chars=True, all_chars_dict=chars, bigrams=True, bigrams_dict=bigrams, trigrams=False, trigrams_dict=trigrams) gold_labels = [sent['gold_label'] for sent in data.testset] sys_run.train(data.trainset) predictions = sys_run.test(data.testset) score = report_score(gold_labels, predictions, detailed=False) print(score) # this output the (target word, gold label) and the predicted label words = [] for sent in data.testset: words.append((sent['target_word'], sent['gold_label'])) predict = [] for x in np.nditer(predictions): predict.append(np.asscalar(x)) word_pred = [] for i in range(len(predict)): word_pred.append((words[i], predict[i])) return word_pred
sess.run(tf.global_variables_initializer()) total_loss = 0 # start training for i in range(30000): # get batch to learn easily batch_x, batch_y = train.next_batch(batch_size_train) feed_dict = { x: batch_x, onehot_labels: batch_y, keep_prob: train_keep_prob } _, current_loss = sess.run([opt_op, loss], feed_dict=feed_dict) total_loss += current_loss if i % 50 == 0: print( str(i) + " : " + str(compute_accuracy(validation.input, validation.labels))) #sess = tf.Session() print("Test accuracy : " + str(compute_accuracy(test.input, test.labels))) # input v_x to nn and get the result with y_pre y_pre = sess.run(prediction, feed_dict={x: test.input}) # find how many right with tf.Session(): predicted = tf.argmax(y_pre, 1).eval() # transoform from tensor to np array actual = tf.argmax(test.labels, 1).eval() LABELS = ['agree', 'disagree', 'discuss'] report_score([LABELS[e] for e in actual], [LABELS[e] for e in predicted])
import numpy from pylab import * for lang in ["english", "spanish"]: data = Dataset(lang) model = Features(lang) # baseline = Baseline(lang) # baseline.train(data.trainset) # paseline = baseline.test(data.testset) print("{}: {} training - {} dev - {} test".format(lang, len(data.trainset), len(data.devset), len(data.testset))) model.train(data.trainset) predictions = model.test(data.testset) gold_labels = [sent['gold_label'] for sent in data.testset] pl = numpy.cumsum([predic == sent['gold_label'] for sent, predic in zip(data.testset, predictions) ]) / range(1,len(data.testset)+1) # pl = numpy.cumsum([predic == sent['gold_label'] for sent, predic in zip(data.testset, paseline) ]) / range(1,len(data.testset)+1) report_score(gold_labels, predictions) # report_score(gold_labels, paseline) plt.title('graph for learning rate') plt.plot(100*pl[20:]) plt.ylabel('accuracy score') plt.xlabel('iteration') plt.show()
def execute_demo(language): data = Dataset(language) # test_data = data.testset test_data = data.devset print("{}: {} training - {} test".format(language, len(data.trainset), len(test_data))) baseline = Baseline(language) advanced = Advanced(language) models_to_run = [baseline, advanced] model_mistakes = {} gold_labels = [sent['gold_label'] for sent in test_data] # Error analysis: sentences = [sent['sentence'] for sent in test_data] targets = [sent['target_word'] for sent in test_data] model_predictions = {} debug = False for model in models_to_run: model.train(data.trainset) trained = model.train(data.trainset) # Since only English uses RFC importances = False if importances == True: if language == 'english' and model == advanced: importances = trained.feature_importances_ ordered_feature_list = model.ordered_feature_list indices = np.argsort(importances)[::-1] for f in range(20): print("{}. & {} & ({:0.3}) \\\\ \hline".format(f+1, ordered_feature_list[indices[f]], importances[indices[f]])) predictions = model.test(test_data) model_predictions[model.name] = predictions print(model.name) report_score(gold_labels, predictions) if debug == True: look_at = 500 for sent_i in range(look_at): if predictions[sent_i] != gold_labels[sent_i]: if sent_i in model_mistakes: model_mistakes[sent_i].append(model.name) else: model_mistakes[sent_i] = [model.name] else: if sent_i not in model_mistakes: model_mistakes[sent_i] = [] if debug == True: both_right = [] advanced_right = [] baseline_right = [] both_wrong = [] for key, value in model_mistakes.items(): if len(value) == 2: both_wrong.append(key) elif len(value) == 0: both_right.append(key) elif value[0] == 'Baseline': advanced_right.append(key) else: baseline_right.append(key) # Finds an example of an incorrect word. max_wrong = 10 for perm in [both_right, both_wrong, advanced_right, baseline_right]: curr_wrong = 0 for item in perm: if curr_wrong == max_wrong: break curr_wrong += 1 sent = sentences[item] target = targets[item] gold = gold_labels[item] if perm == advanced_right: predict = model_predictions['Advanced'][item] else: predict = model_predictions['Baseline'][item] if perm == advanced_right: perm_name = 'Advanced Correct, Baseline Incorrect' elif perm == baseline_right: perm_name = 'Advanced Incorrect, Baseline Correct' elif perm == both_right: perm_name = 'Both Correct' else: perm_name = 'Both Incorrect' print("{}:\n Sent: {}\n Target: {}\n Predicted: {}\n Gold: {}\n".format(perm_name, sent, target, predict, gold))
data = read_data('./datasets/english/English_Train.tsv') all = build_dataset(data) progress('training perceptron') per = Perceptron(eta=0.01, epochs=opts.epochs, avg=opts.avg, shuffle=opts.shuffle) per.train(all) progress('perceptron trained in', per.convergance_epochs(), '/', opts.epochs, '(max) epochs') if opts.graph: per.plot_training_error() progress('reading test data') data = read_data('./datasets/english/News_Test.tsv') all = build_dataset(data) progress('testing') gold = [x[2]['gsbin'] for x in all] pred = [max(0, per.predict(x[0])) for x in all] progress('evaluating') report_score(gold, pred, detailed=True)
predictions.append(str(category_index)) ''' for i in range(n_predictions): value = topv[0][i].item() category_index = topi[0][i].item() print('(%.2f) %s' % (value, all_categories[category_index])) predictions.append([value, all_categories[category_index]]) ''' gold_label_dev = [] for sent in eng_dataset.devset: predict(sent['target_word']) gold_label_dev.append(sent['gold_label']) print('The result for development dataset') report_score(gold_label_dev, predictions) predictions = [] gold_label_test = [] for sent in eng_dataset.testset: predict(sent['target_word']) gold_label_test.append(sent['gold_label']) print('The result for test dataset') report_score(gold_label_test, predictions) import matplotlib.pyplot as plt import matplotlib.ticker as ticker plt.figure() plt.plot(all_losses) plt.show()