def SGD_train(train_labels, train_sentences, max_epoch, validate_labels, validate_sentences, method, lr=0.1): """ The main subroutine for training by SGD training, train_labels and train_sentence are list of lists of corresponding (y,x) pairs. validate_labels and validate_sentences are similar list of lists for doing early stopping regularization. """ # max_epoch = 5 converged = 0 # lr = 0.1 avg_time_per_epoch = 0.0 score_list = [] num_training = len(train_sentences) order = np.arange(0, num_training, dtype="int") # ordering of training ex's J = ff.calcJ() # two weightvectors for bookkeeping, and a dw vector for updates, we initialize # the weightvector at zero # w_traj = np.zeros(shape=(max_epoch+1,J)) weights = np.zeros(J) # w_traj[0,:] = weights old_weights = np.zeros(J) dw = np.zeros(J) score = 0.0 score_list.append(score) for epoch in range(max_epoch): time1 = time.time() # print "epoch number {}".format(epoch) i = 0 # put something in here about learning rate? np.random.shuffle(order) # randomly shuffle test data old_weights = np.copy(weights) for ind_ex in order: # print "Now processing sample {} of {}".format(i,num_training) x = train_sentences[ind_ex] y = train_labels[ind_ex] dw = compute_gradient(x, y, weights, dw) weights += lr * dw i += 1 time2 = time1 - time.time() avg_time_per_epoch += time2 # convergence test, remove this commentary when we have a score function # in collins module new_score = sr.general_score(weights, validate_labels, validate_sentences, method, 0) score_list.append(new_score) if new_score > score: # the validation score has increased score = new_score else: # validation score has decreased, early stopping dictates we stop # training and use the old weights converged = 1 weights = np.copy(old_weights) break avg_time_per_epoch /= float(epoch) # score = 0.0 return weights, score_list, epoch, avg_time_per_epoch
def collins(train_labels, train_sentences, validation_labels, validation_sentences, pct_train=0.5, Nex=None): """ Runs the Collins perceptron training on the input training data. labels - All training, validation labels. sentences - All training, validation sentences. pct_train - Percentage of examples from data set to use as training data. The rest are used as validation data. """ # get J, the total number of feature functions J = ffs.calcJ() print 'J = ',J # now run it scores = [] w0 = np.zeros(J) print 'Calculating initial score...' scores.append(sr.score_by_word(w0,validation_labels,validation_sentences)) print 'Done!\n' # run until converged, according to score on validation set nep = 1 epoch_time = [] print 'Initiating Collins perceptron training.' while True: print 'Epoch #',nep,'...' t0 = time.time() # get the new weights & score print 'Training...' w1 = collins_epoch(train_labels, train_sentences, w0) print 'Done.\n' epoch_time.append([time.time() - t0]) t0 = time.time() print 'Calculating new score...' scores.append(sr.general_score(w1,validation_labels,validation_sentences,'word',0)) print 'Done.\n' epoch_time[nep-1].append(time.time() - t0) # decide if converged if scores[nep] < scores[nep-1]: break else: w0 = w1 nep += 1 print 'Training complete!\n' """ # make a prediction on a dummy sentence #dummy = ['FIRSTWORD','I','like','cheese','but','I','also','like','bread','LASTWORD'] dummy = ['FIRSTWORD','Do','you','like','cheese','LASTWORD'] g_dummy = sr.g(w,dummy) U_dummy = sr.U(g_dummy) y_best = sr.bestlabel(U_dummy,g_dummy) """ # now return final weights, score time series, and epoch timing return w0, scores, epoch_time