def __init__( self, word_count, tag_count, word_dims, tag_dims, lstm_units, hidden_units, struct_out, label_out, droprate=0, struct_spans=4, label_spans=3, ): self.word_count = word_count self.tag_count = tag_count self.word_dims = word_dims self.tag_dims = tag_dims self.lstm_units = lstm_units self.hidden_units = hidden_units self.struct_out = struct_out self.label_out = label_out self.droprate = droprate self.model = pycnn.Model() self.trainer = pycnn.AdadeltaTrainer(self.model, lam=0, eps=1e-7, rho=0.99) random.seed(1) self.activation = pycnn.rectify self.model.add_lookup_parameters('word-embed', (word_count, word_dims)) self.model.add_lookup_parameters('tag-embed', (tag_count, tag_dims)) self.fwd_lstm1 = LSTM(word_dims + tag_dims, lstm_units, self.model) self.back_lstm1 = LSTM(word_dims + tag_dims, lstm_units, self.model) self.fwd_lstm2 = LSTM(2 * lstm_units, lstm_units, self.model) self.back_lstm2 = LSTM(2 * lstm_units, lstm_units, self.model) self.model.add_parameters( 'struct-hidden-W', (hidden_units, 4 * struct_spans * lstm_units), ) self.model.add_parameters('struct-hidden-b', hidden_units) self.model.add_parameters('struct-out-W', (struct_out, hidden_units)) self.model.add_parameters('struct-out-b', struct_out) self.model.add_parameters( 'label-hidden-W', (hidden_units, 4 * label_spans * lstm_units), ) self.model.add_parameters('label-hidden-b', hidden_units) self.model.add_parameters('label-out-W', (label_out, hidden_units)) self.model.add_parameters('label-out-b', label_out)
def train_model(model, encoder_frnn, encoder_rrnn, decoder_rnn, train_lemmas, train_feat_dicts, train_words, dev_lemmas, dev_feat_dicts, dev_words, alphabet_index, inverse_alphabet_index, epochs, optimization, results_file_path, feat_index, feature_types, plot): print 'training...' np.random.seed(17) random.seed(17) if optimization == 'ADAM': trainer = pc.AdamTrainer(model, lam=REGULARIZATION, alpha=LEARNING_RATE, beta_1=0.9, beta_2=0.999, eps=1e-8) elif optimization == 'MOMENTUM': trainer = pc.MomentumSGDTrainer(model) elif optimization == 'SGD': trainer = pc.SimpleSGDTrainer(model) elif optimization == 'ADAGRAD': trainer = pc.AdagradTrainer(model) elif optimization == 'ADADELTA': trainer = pc.AdadeltaTrainer(model) else: trainer = pc.SimpleSGDTrainer(model) train_sanity_set_size = 100 total_loss = 0 best_avg_dev_loss = 999 best_dev_accuracy = -1 best_train_accuracy = -1 best_dev_epoch = 0 best_train_epoch = 0 patience = 0 train_len = len(train_words) epochs_x = [] train_loss_y = [] dev_loss_y = [] train_accuracy_y = [] dev_accuracy_y = [] # progress bar init widgets = [progressbar.Bar('>'), ' ', progressbar.ETA()] train_progress_bar = progressbar.ProgressBar(widgets=widgets, maxval=epochs).start() avg_loss = -1 e = 0 for e in xrange(epochs): # randomize the training set indices = range(train_len) random.shuffle(indices) train_set = zip(train_lemmas, train_feat_dicts, train_words) train_set = [train_set[i] for i in indices] # compute loss for each example and update for i, example in enumerate(train_set): lemma, feats, word = example loss = compute_loss(model, encoder_frnn, encoder_rrnn, decoder_rnn, lemma, feats, word, alphabet_index, feat_index, feature_types) loss_value = loss.value() total_loss += loss_value loss.backward() trainer.update() if i > 0: avg_loss = total_loss / float(i + e * train_len) else: avg_loss = total_loss if i % 100 == 0 and i > 0: print 'went through {} examples out of {}'.format(i, train_len) if EARLY_STOPPING: print 'starting epoch evaluation' # get train accuracy print 'train sanity prediction:' train_predictions = predict_sequences( model, decoder_rnn, encoder_frnn, encoder_rrnn, alphabet_index, inverse_alphabet_index, train_lemmas[:train_sanity_set_size], train_feat_dicts[:train_sanity_set_size], feat_index, feature_types) print 'train sanity evaluation:' train_accuracy = evaluate_model( train_predictions, train_lemmas[:train_sanity_set_size], train_feat_dicts[:train_sanity_set_size], train_words[:train_sanity_set_size], feature_types, True)[1] if train_accuracy > best_train_accuracy: best_train_accuracy = train_accuracy best_train_epoch = e dev_accuracy = 0 avg_dev_loss = 0 if len(dev_lemmas) > 0: print 'dev prediction:' # get dev accuracy dev_predictions = predict_sequences(model, decoder_rnn, encoder_frnn, encoder_rrnn, alphabet_index, inverse_alphabet_index, dev_lemmas, dev_feat_dicts, feat_index, feature_types) print 'dev evaluation:' # get dev accuracy dev_accuracy = evaluate_model(dev_predictions, dev_lemmas, dev_feat_dicts, dev_words, feature_types, print_results=True)[1] if dev_accuracy >= best_dev_accuracy: best_dev_accuracy = dev_accuracy best_dev_epoch = e # save best model to disk task1_attention_implementation.save_pycnn_model( model, results_file_path) print 'saved new best model' patience = 0 else: patience += 1 # found "perfect" model if dev_accuracy == 1: train_progress_bar.finish() if plot: plt.cla() return model, e # get dev loss total_dev_loss = 0 for i in xrange(len(dev_lemmas)): total_dev_loss += compute_loss( model, encoder_frnn, encoder_rrnn, decoder_rnn, dev_lemmas[i], dev_feat_dicts[i], dev_words[i], alphabet_index, feat_index, feature_types).value() avg_dev_loss = total_dev_loss / float(len(dev_lemmas)) if avg_dev_loss < best_avg_dev_loss: best_avg_dev_loss = avg_dev_loss print 'epoch: {0} train loss: {1:.4f} dev loss: {2:.4f} dev accuracy: {3:.4f} train accuracy = {4:.4f} \ best dev accuracy {5:.4f} (epoch {8}) best train accuracy: {6:.4f} (epoch {9}) patience = {7}'.format( e, avg_loss, avg_dev_loss, dev_accuracy, train_accuracy, best_dev_accuracy, best_train_accuracy, patience, best_dev_epoch, best_train_epoch) if patience == MAX_PATIENCE: print 'out of patience after {0} epochs'.format(str(e)) # TODO: would like to return best model but pycnn has a bug with save and load. Maybe copy via code? # return best_model[0] train_progress_bar.finish() if plot: plt.cla() return model, e else: # if no dev set is present, optimize on train set print 'no dev set for early stopping, running all epochs until perfectly fitting or patience was \ reached on the train set' if train_accuracy > best_train_accuracy: best_train_accuracy = train_accuracy # save best model to disk task1_attention_implementation.save_pycnn_model( model, results_file_path) print 'saved new best model' patience = 0 else: patience += 1 print 'epoch: {0} train loss: {1:.4f} train accuracy = {2:.4f} best train accuracy: {3:.4f} \ patience = {4}'.format(e, avg_loss, train_accuracy, best_train_accuracy, patience) # found "perfect" model on train set or patience has reached if train_accuracy == 1 or patience == MAX_PATIENCE: train_progress_bar.finish() if plot: plt.cla() return model, e # update lists for plotting train_accuracy_y.append(train_accuracy) epochs_x.append(e) train_loss_y.append(avg_loss) dev_loss_y.append(avg_dev_loss) dev_accuracy_y.append(dev_accuracy) # finished epoch train_progress_bar.update(e) if plot: with plt.style.context('fivethirtyeight'): p1, = plt.plot(epochs_x, dev_loss_y, label='dev loss') p2, = plt.plot(epochs_x, train_loss_y, label='train loss') p3, = plt.plot(epochs_x, dev_accuracy_y, label='dev acc.') p4, = plt.plot(epochs_x, train_accuracy_y, label='train acc.') plt.legend(loc='upper left', handles=[p1, p2, p3, p4]) plt.savefig(results_file_path + 'plot.png') train_progress_bar.finish() if plot: plt.cla() print 'finished training. average loss: {} best epoch on dev: {} best epoch on train: {}'.format( str(avg_loss), best_dev_epoch, best_train_epoch) return model, e, best_train_epoch