else: weights_0p25 = np.load('training_output/collins_w_0p25.npy') weights_0p5 = np.load('training_output/collins_w_0p5.npy') weights_0p75 = np.load('training_output/collins_w_0p75.npy') weights = [weights_0p25, weights_0p5, weights_0p75] # load the test data test_labels = dp.labels_as_ints('dataset/testLabels.txt') test_sentences = dp.import_sentences('dataset/testSentences.txt') # calculate the scores score = [] for w in weights: if scoretype == 'word': score.append(sr.score_by_word(w,test_labels,test_sentences,dummy)) elif scoretype == 'sentence': score.append(sr.score_by_sentence(w,test_labels,test_sentences,dummy)) elif scoretype == 'mark': score.append(sr.score_by_mark(w,test_labels,test_sentences,dummy)) else: print 'Not a valid method!\n' exit(0) # save to file f = open('scores/'+method+scoretype+'.txt', 'w') f.write('Method: ' + method + ', Scoretype: ' + scoretype + '\n') if method != 'mark': f.write('25\%: ' + str(score[0]) + '\n') f.write('50\%: ' + str(score[1]) + '\n')
def collins(train_labels, train_sentences, validation_labels, validation_sentences, pct_train=0.5, Nex=None): """ Runs the Collins perceptron training on the input training data. labels - All training, validation labels. sentences - All training, validation sentences. pct_train - Percentage of examples from data set to use as training data. The rest are used as validation data. """ # get J, the total number of feature functions J = ffs.calcJ() print 'J = ',J # now run it scores = [] w0 = np.zeros(J) print 'Calculating initial score...' scores.append(sr.score_by_word(w0,validation_labels,validation_sentences)) print 'Done!\n' # run until converged, according to score on validation set nep = 1 epoch_time = [] print 'Initiating Collins perceptron training.' while True: print 'Epoch #',nep,'...' t0 = time.time() # get the new weights & score print 'Training...' w1 = collins_epoch(train_labels, train_sentences, w0) print 'Done.\n' epoch_time.append([time.time() - t0]) t0 = time.time() print 'Calculating new score...' scores.append(sr.general_score(w1,validation_labels,validation_sentences,'word',0)) print 'Done.\n' epoch_time[nep-1].append(time.time() - t0) # decide if converged if scores[nep] < scores[nep-1]: break else: w0 = w1 nep += 1 print 'Training complete!\n' """ # make a prediction on a dummy sentence #dummy = ['FIRSTWORD','I','like','cheese','but','I','also','like','bread','LASTWORD'] dummy = ['FIRSTWORD','Do','you','like','cheese','LASTWORD'] g_dummy = sr.g(w,dummy) U_dummy = sr.U(g_dummy) y_best = sr.bestlabel(U_dummy,g_dummy) """ # now return final weights, score time series, and epoch timing return w0, scores, epoch_time