Example #1
0
    def __init__(
        self,
        word_count,
        tag_count,
        word_dims,
        tag_dims,
        lstm_units,
        hidden_units,
        struct_out,
        label_out,
        droprate=0,
        struct_spans=4,
        label_spans=3,
    ):

        self.word_count = word_count
        self.tag_count = tag_count
        self.word_dims = word_dims
        self.tag_dims = tag_dims
        self.lstm_units = lstm_units
        self.hidden_units = hidden_units
        self.struct_out = struct_out
        self.label_out = label_out

        self.droprate = droprate

        self.model = pycnn.Model()

        self.trainer = pycnn.AdadeltaTrainer(self.model,
                                             lam=0,
                                             eps=1e-7,
                                             rho=0.99)
        random.seed(1)

        self.activation = pycnn.rectify

        self.model.add_lookup_parameters('word-embed', (word_count, word_dims))
        self.model.add_lookup_parameters('tag-embed', (tag_count, tag_dims))

        self.fwd_lstm1 = LSTM(word_dims + tag_dims, lstm_units, self.model)
        self.back_lstm1 = LSTM(word_dims + tag_dims, lstm_units, self.model)

        self.fwd_lstm2 = LSTM(2 * lstm_units, lstm_units, self.model)
        self.back_lstm2 = LSTM(2 * lstm_units, lstm_units, self.model)

        self.model.add_parameters(
            'struct-hidden-W',
            (hidden_units, 4 * struct_spans * lstm_units),
        )
        self.model.add_parameters('struct-hidden-b', hidden_units)

        self.model.add_parameters('struct-out-W', (struct_out, hidden_units))
        self.model.add_parameters('struct-out-b', struct_out)

        self.model.add_parameters(
            'label-hidden-W',
            (hidden_units, 4 * label_spans * lstm_units),
        )
        self.model.add_parameters('label-hidden-b', hidden_units)

        self.model.add_parameters('label-out-W', (label_out, hidden_units))
        self.model.add_parameters('label-out-b', label_out)
def train_model(model, encoder_frnn, encoder_rrnn, decoder_rnn, train_lemmas,
                train_feat_dicts, train_words, dev_lemmas, dev_feat_dicts,
                dev_words, alphabet_index, inverse_alphabet_index, epochs,
                optimization, results_file_path, feat_index, feature_types,
                plot):
    print 'training...'

    np.random.seed(17)
    random.seed(17)

    if optimization == 'ADAM':
        trainer = pc.AdamTrainer(model,
                                 lam=REGULARIZATION,
                                 alpha=LEARNING_RATE,
                                 beta_1=0.9,
                                 beta_2=0.999,
                                 eps=1e-8)
    elif optimization == 'MOMENTUM':
        trainer = pc.MomentumSGDTrainer(model)
    elif optimization == 'SGD':
        trainer = pc.SimpleSGDTrainer(model)
    elif optimization == 'ADAGRAD':
        trainer = pc.AdagradTrainer(model)
    elif optimization == 'ADADELTA':
        trainer = pc.AdadeltaTrainer(model)
    else:
        trainer = pc.SimpleSGDTrainer(model)

    train_sanity_set_size = 100
    total_loss = 0
    best_avg_dev_loss = 999
    best_dev_accuracy = -1
    best_train_accuracy = -1
    best_dev_epoch = 0
    best_train_epoch = 0
    patience = 0
    train_len = len(train_words)
    epochs_x = []
    train_loss_y = []
    dev_loss_y = []
    train_accuracy_y = []
    dev_accuracy_y = []

    # progress bar init
    widgets = [progressbar.Bar('>'), ' ', progressbar.ETA()]
    train_progress_bar = progressbar.ProgressBar(widgets=widgets,
                                                 maxval=epochs).start()
    avg_loss = -1
    e = 0

    for e in xrange(epochs):

        # randomize the training set
        indices = range(train_len)
        random.shuffle(indices)
        train_set = zip(train_lemmas, train_feat_dicts, train_words)
        train_set = [train_set[i] for i in indices]

        # compute loss for each example and update
        for i, example in enumerate(train_set):
            lemma, feats, word = example
            loss = compute_loss(model, encoder_frnn, encoder_rrnn, decoder_rnn,
                                lemma, feats, word, alphabet_index, feat_index,
                                feature_types)
            loss_value = loss.value()
            total_loss += loss_value
            loss.backward()
            trainer.update()
            if i > 0:
                avg_loss = total_loss / float(i + e * train_len)
            else:
                avg_loss = total_loss

            if i % 100 == 0 and i > 0:
                print 'went through {} examples out of {}'.format(i, train_len)

        if EARLY_STOPPING:
            print 'starting epoch evaluation'

            # get train accuracy
            print 'train sanity prediction:'
            train_predictions = predict_sequences(
                model, decoder_rnn, encoder_frnn, encoder_rrnn, alphabet_index,
                inverse_alphabet_index, train_lemmas[:train_sanity_set_size],
                train_feat_dicts[:train_sanity_set_size], feat_index,
                feature_types)
            print 'train sanity evaluation:'
            train_accuracy = evaluate_model(
                train_predictions, train_lemmas[:train_sanity_set_size],
                train_feat_dicts[:train_sanity_set_size],
                train_words[:train_sanity_set_size], feature_types, True)[1]

            if train_accuracy > best_train_accuracy:
                best_train_accuracy = train_accuracy
                best_train_epoch = e

            dev_accuracy = 0
            avg_dev_loss = 0

            if len(dev_lemmas) > 0:
                print 'dev prediction:'
                # get dev accuracy
                dev_predictions = predict_sequences(model, decoder_rnn,
                                                    encoder_frnn, encoder_rrnn,
                                                    alphabet_index,
                                                    inverse_alphabet_index,
                                                    dev_lemmas, dev_feat_dicts,
                                                    feat_index, feature_types)
                print 'dev evaluation:'
                # get dev accuracy
                dev_accuracy = evaluate_model(dev_predictions,
                                              dev_lemmas,
                                              dev_feat_dicts,
                                              dev_words,
                                              feature_types,
                                              print_results=True)[1]

                if dev_accuracy >= best_dev_accuracy:
                    best_dev_accuracy = dev_accuracy
                    best_dev_epoch = e

                    # save best model to disk
                    task1_attention_implementation.save_pycnn_model(
                        model, results_file_path)
                    print 'saved new best model'
                    patience = 0
                else:
                    patience += 1

                # found "perfect" model
                if dev_accuracy == 1:
                    train_progress_bar.finish()
                    if plot:
                        plt.cla()
                    return model, e

                # get dev loss
                total_dev_loss = 0
                for i in xrange(len(dev_lemmas)):
                    total_dev_loss += compute_loss(
                        model, encoder_frnn, encoder_rrnn, decoder_rnn,
                        dev_lemmas[i], dev_feat_dicts[i], dev_words[i],
                        alphabet_index, feat_index, feature_types).value()

                avg_dev_loss = total_dev_loss / float(len(dev_lemmas))
                if avg_dev_loss < best_avg_dev_loss:
                    best_avg_dev_loss = avg_dev_loss

                print 'epoch: {0} train loss: {1:.4f} dev loss: {2:.4f} dev accuracy: {3:.4f} train accuracy = {4:.4f} \
 best dev accuracy {5:.4f} (epoch {8}) best train accuracy: {6:.4f} (epoch {9}) patience = {7}'.format(
                    e, avg_loss, avg_dev_loss, dev_accuracy, train_accuracy,
                    best_dev_accuracy, best_train_accuracy, patience,
                    best_dev_epoch, best_train_epoch)

                if patience == MAX_PATIENCE:
                    print 'out of patience after {0} epochs'.format(str(e))
                    # TODO: would like to return best model but pycnn has a bug with save and load. Maybe copy via code?
                    # return best_model[0]
                    train_progress_bar.finish()
                    if plot:
                        plt.cla()
                    return model, e
            else:

                # if no dev set is present, optimize on train set
                print 'no dev set for early stopping, running all epochs until perfectly fitting or patience was \
                reached on the train set'

                if train_accuracy > best_train_accuracy:
                    best_train_accuracy = train_accuracy

                    # save best model to disk
                    task1_attention_implementation.save_pycnn_model(
                        model, results_file_path)
                    print 'saved new best model'
                    patience = 0
                else:
                    patience += 1

                print 'epoch: {0} train loss: {1:.4f} train accuracy = {2:.4f} best train accuracy: {3:.4f} \
                patience = {4}'.format(e, avg_loss, train_accuracy,
                                       best_train_accuracy, patience)

                # found "perfect" model on train set or patience has reached
                if train_accuracy == 1 or patience == MAX_PATIENCE:
                    train_progress_bar.finish()
                    if plot:
                        plt.cla()
                    return model, e

            # update lists for plotting
            train_accuracy_y.append(train_accuracy)
            epochs_x.append(e)
            train_loss_y.append(avg_loss)
            dev_loss_y.append(avg_dev_loss)
            dev_accuracy_y.append(dev_accuracy)

        # finished epoch
        train_progress_bar.update(e)

        if plot:
            with plt.style.context('fivethirtyeight'):
                p1, = plt.plot(epochs_x, dev_loss_y, label='dev loss')
                p2, = plt.plot(epochs_x, train_loss_y, label='train loss')
                p3, = plt.plot(epochs_x, dev_accuracy_y, label='dev acc.')
                p4, = plt.plot(epochs_x, train_accuracy_y, label='train acc.')
                plt.legend(loc='upper left', handles=[p1, p2, p3, p4])
            plt.savefig(results_file_path + 'plot.png')

    train_progress_bar.finish()
    if plot:
        plt.cla()
    print 'finished training. average loss: {} best epoch on dev: {} best epoch on train: {}'.format(
        str(avg_loss), best_dev_epoch, best_train_epoch)
    return model, e, best_train_epoch