Exemple #1
0
    def _init_optimizer(self, model, **kwargs):
        mom = float(kwargs.get('mom', 0.0))
        optim = kwargs.get('optim', 'sgd')
        clip = kwargs.get('clip')

        self.current_lr = kwargs.get('eta', kwargs.get('lr', 0.01))
        if optim == 'adadelta':
            self.optimizer = dy.AdadeltaTrainer(model.pc)
        elif optim == 'adam':
            self.optimizer = dy.AdamTrainer(model.pc,
                                            alpha=self.current_lr,
                                            beta_1=kwargs.get('beta1', 0.9),
                                            beta_2=kwargs.get('beta2', 0.999),
                                            eps=kwargs.get('epsilon', 1e-8))
        elif optim == 'rmsprop':
            self.optimizer = dy.RMSPropTrainer(model.pc,
                                               learning_rate=self.current_lr)
        else:
            if mom == 0 or mom is None:
                self.optimizer = dy.SimpleSGDTrainer(
                    model.pc, learning_rate=self.current_lr)
            else:
                logging.info('Using mom %f', mom)
                self.optimizer = dy.MomentumSGDTrainer(
                    model.pc, learning_rate=self.current_lr, mom=mom)
        if clip is not None:
            self.optimizer.set_clip_threshold(clip)
        self.optimizer.set_sparse_updates(False)
Exemple #2
0
 def __init__(self,
              eps: numbers.Real = 1e-6,
              rho: numbers.Real = 0.95,
              skip_noisy: bool = False) -> None:
     super().__init__(optimizer=dy.AdadeltaTrainer(
         ParamManager.global_collection(), eps, rho),
                      skip_noisy=skip_noisy)
Exemple #3
0
    def train(self):
        trainer = dy.AdadeltaTrainer(self.model)

        best_acc, repeat = 0.0, 0
        for epoch in range(self.config.epochs):
            dy.renew_cg()
            losses = []
            closs = 0.0
            for i, traininst in enumerate(self.trainset):
                pre_context = [self.EOS] + traininst['pre_context']
                pos_context = traininst['pos_context'] + [self.EOS]
                refex = [w.lower() for w in traininst['refex']
                         ] if self.lowercase else traininst['refex']
                refex = [self.EOS] + refex + [self.EOS]
                entity = traininst['entity']
                entity_tokens = entity.replace('\"',
                                               '').replace('\'', '').replace(
                                                   ',', '').split('_')

                loss = self.get_loss(pre_context, pos_context, refex, entity,
                                     entity_tokens)
                losses.append(loss)

                if len(losses) == self.config.batch:
                    loss = dy.esum(losses)
                    closs += loss.value()
                    loss.backward()
                    trainer.update()
                    dy.renew_cg()

                    print("Epoch: {0} \t Loss: {1} \t Progress: {2}".format(
                        epoch, round(closs / self.config.batch, 2),
                        round(i / len(self.trainset), 2)),
                          end='       \r')
                    losses = []
                    closs = 0.0

            outputs, num, dem = self.validate()
            acc = round(float(num) / dem, 2)

            print("Dev acc: {0} \t Best acc: {1}".format(
                str(num / dem), best_acc))

            # Saving the model with best accuracy
            if best_acc == 0.0 or acc > best_acc:
                best_acc = acc

                self.logger.save_result(fname='dev_best',
                                        results=outputs,
                                        beam=self.config.beam)
                self.model.save(self.logger.model_path)

                repeat = 0
            else:
                repeat += 1

            # In case the accuracy does not increase in 20 epochs, break the process
            if repeat == self.config.early_stop:
                break
Exemple #4
0
    def train(self):
        trainer = dy.AdadeltaTrainer(self.model)

        log = []
        best_acc, repeat = 0.0, 0
        for epoch in range(self.config.epochs):
            dy.renew_cg()
            losses = []
            closs = 0.0
            for i, traininst in enumerate(self.trainset):
                pre_context = [self.EOS] + traininst['pre_context']
                pos_context = traininst['pos_context'] + [self.EOS]
                refex = [self.EOS] + traininst['refex'] + [self.EOS]
                entity = traininst['entity']
                loss = self.get_loss(pre_context, pos_context, refex, entity)
                losses.append(loss)

                if len(losses) == self.config.batch:
                    loss = dy.esum(losses)
                    closs += loss.value()
                    loss.backward()
                    trainer.update()
                    dy.renew_cg()

                    print("Epoch: {0} \t Loss: {1} \t Progress: {2}".format(
                        epoch, (closs / self.config.batch),
                        round(i / len(self.trainset), 2)),
                          end='       \r')
                    losses = []
                    closs = 0.0

            outputs, num, dem = self.validate()
            acc = float(num) / dem
            log.append(acc)

            print("Dev acc: {0} \t Best acc: {1}".format(
                round(acc, 2), best_acc))

            # Saving the model with best accuracy
            if best_acc == 0.0 or acc > best_acc:
                best_acc = acc

                fname = 'dev_best.txt'
                self.write(os.path.join(self.path, fname), outputs)

                fname = 'best_model.dy'
                self.model.save(os.path.join(self.path, fname))

                repeat = 0
            else:
                repeat += 1

            # In case the accuracy does not increase in 20 epochs, break the process
            if repeat == self.config.early_stop:
                break

        json.dump(log, open(os.path.join(self.path, 'log.json'), 'w'))
Exemple #5
0
    def __init__(self, params, vocab, label2tag, pretrained_embeddings=None):
        """

        :param params:
        :param vocab:
        :param label2tag:
        :param pretrained_embeddings:
        """
        self.dim_w = params.dim_w
        self.win = params.win
        self.vocab = vocab
        self.n_words = len(self.vocab)
        self.dim_asp = params.dim_asp
        self.dim_y_asp = params.n_asp_tags
        self.n_steps = params.n_steps
        self.asp_label2tag = label2tag
        self.dropout_asp = params.dropout_asp
        self.dropout = params.dropout
        self.ds_name = params.ds_name
        self.model_name = params.model_name
        self.attention_type = params.attention_type

        self.pc = dy.ParameterCollection()
        self.Emb = WDEmb(pc=self.pc, n_words=self.n_words, dim_w=self.dim_w,
                         pretrained_embeddings=pretrained_embeddings)
        
        self.DEP_RecNN = DTreeBuilder(pc=self.pc, n_in=self.win * self.dim_w, n_out=self.dim_asp, dropout_rate=self.dropout_asp)
        
        self.ASP_RNN = dy.LSTMBuilder(1, self.win * self.dim_w, self.dim_asp, self.pc)

        self.BiAttention_F=BiAttention(pc=self.pc, n_in=self.dim_asp, n_out=self.dim_asp, dropout_rate=self.dropout_asp)
        self.BiAttention_B=BiAttention(pc=self.pc, n_in=self.dim_asp, n_out=self.dim_asp, dropout_rate=self.dropout_asp)
        self.BiAttention_T=BiAttention(pc=self.pc, n_in=self.dim_asp, n_out=self.dim_asp, dropout_rate=self.dropout_asp)

        self.MultiWeightLayer=MultiWeightLayer(pc=self.pc, n_in=self.dim_asp, n_out=self.dim_asp, dropout_rate=self.dropout_asp)

        self.ASP_FC = Linear(pc=self.pc, n_in=self.dim_asp, n_out=self.dim_y_asp)
        
        self.layers = [self.ASP_FC,self.DEP_RecNN,self.BiAttention_F,self.BiAttention_B,self.BiAttention_T,self.MultiWeightLayer]

        if params.optimizer == 'sgd':
            self.optimizer = dy.SimpleSGDTrainer(self.pc, params.sgd_lr)
        elif params.optimizer == 'momentum':
            self.optimizer = dy.MomentumSGDTrainer(self.pc, 0.01, 0.9)
        elif params.optimizer == 'adam':
            self.optimizer = dy.AdamTrainer(self.pc, 0.001, 0.9, 0.9)
        elif params.optimizer == 'adagrad':
            self.optimizer = dy.AdagradTrainer(self.pc)
        elif params.optimizer == 'adadelta':
            self.optimizer = dy.AdadeltaTrainer(self.pc)
        else:
            raise Exception("Invalid optimizer!!")
Exemple #6
0
    def __init__(
        self,
        bigrams_size,
        unigrams_size,
        bigrams_dims,
        unigrams_dims,
        lstm_units,
        hidden_units,
        label_size,
        span_nums,
        droprate=0,
    ):

        self.bigrams_size = bigrams_size
        self.bigrams_dims = bigrams_dims
        self.unigrams_dims = unigrams_dims
        self.unigrams_size = unigrams_size
        self.lstm_units = lstm_units
        self.hidden_units = hidden_units
        self.span_nums = span_nums
        self.droprate = droprate
        self.label_size = label_size

        self.model = dynet.Model()
        self.trainer = dynet.AdadeltaTrainer(self.model, eps=1e-7, rho=0.99)
        random.seed(1)

        self.activation = dynet.rectify

        self.bigram_embed = self.model.add_lookup_parameters(
            (self.bigrams_size, self.bigrams_dims), )
        self.unigram_embed = self.model.add_lookup_parameters(
            (self.unigrams_size, self.unigrams_dims), )
        self.fwd_lstm1 = LSTM(self.bigrams_dims + self.unigrams_dims,
                              self.lstm_units, self.model)
        self.back_lstm1 = LSTM(self.bigrams_dims + self.unigrams_dims,
                               self.lstm_units, self.model)

        self.fwd_lstm2 = LSTM(2 * self.lstm_units, self.lstm_units, self.model)
        self.back_lstm2 = LSTM(2 * self.lstm_units, self.lstm_units,
                               self.model)

        self.p_hidden_W = self.model.add_parameters(
            (self.hidden_units, 2 * self.span_nums * self.lstm_units),
            dynet.UniformInitializer(0.01))
        self.p_hidden_b = self.model.add_parameters((self.hidden_units, ),
                                                    dynet.ConstInitializer(0))
        self.p_output_W = self.model.add_parameters(
            (self.label_size, self.hidden_units), dynet.ConstInitializer(0))
        self.p_output_b = self.model.add_parameters((self.label_size, ),
                                                    dynet.ConstInitializer(0))
def entailment(train_file, dev_file, test_file, embed_file, epochs, eps,
               reg_lambda, batch_size, per_log, LSTM_params, training_sample,
               sample_type, improvement):
    curr_time = strftime("%Y-%m-%d %H:%M:%S", gmtime())
    print(curr_time + ": starting process")

    # read train and dev data sets
    train, train_words, max_len_train = read_data(
        train_file
    )  # read train data to list. each list item is a sentence. each sentence is a tuple
    dev, dev_words, max_len_dev = read_data(
        dev_file
    )  # read train data to list. each list item is a sentence. each sentence is a tuple
    test, test_words, max_len_test = read_data(
        test_file
    )  # read train data to list. each list item is a sentence. each sentence is a tuple
    P_rows = max([max_len_train, max_len_dev, max_len_test])

    # unify all unique words to one set and delete independent sets
    all_words = train_words.union(dev_words).union(test_words)
    del train_words
    del dev_words
    del test_words

    # get embeddings
    embed_vec, vocab = get_embeddings(embed_file, all_words, LSTM_params[2])

    # define vocabulary and help structures
    word2int = {w: i for i, w in enumerate(vocab)}
    label2int = {
        l: i
        for i, l in enumerate(["entailment", "neutral", "contradiction"])
    }
    vocab_size = len(vocab)
    num_labels = 3

    # create a classifier
    m = dy.ParameterCollection()
    trainer = dy.AdadeltaTrainer(m, eps)  # define trainer
    snli_classifier = ReRead_LSTM(vocab_size, num_labels, LSTM_params,
                                  embed_vec, P_rows, m,
                                  improvement)  # create classifier
    train_model(train, dev, test, epochs, batch_size, reg_lambda, trainer,
                snli_classifier, word2int, label2int, per_log, training_sample,
                sample_type, improvement)
Exemple #8
0
 def __init__(self, model, optim='sgd', clip=5, mom=0.9, **kwargs):
     super(ClassifyTrainerDynet, self).__init__()
     self.model = model
     eta = kwargs.get('eta', kwargs.get('lr', 0.01))
     print("Using eta [{:.4f}]".format(eta))
     print("Using optim [{}]".format(optim))
     self.labels = model.labels
     if optim == 'adadelta':
         self.optimizer = dy.AdadeltaTrainer(model.pc)
     elif optim == 'adam':
         self.optimizer = dy.AdamTrainer(model.pc)
     elif optim == 'rmsprop':
         self.optimizer = dy.RMSPropTrainer(model.pc, learning_rate=eta)
     else:
         print("using mom {:.3f}".format(mom))
         self.optimizer = dy.MomentumSGDTrainer(model.pc,
                                                learning_rate=eta,
                                                mom=mom)
     self.optimizer.set_clip_threshold(clip)
Exemple #9
0
def optimizer(model, optim='sgd', eta=0.01, clip=None, mom=0.9, **kwargs):
    if 'lr' in kwargs:
        eta = kwargs['lr']
    print('Using eta [{:.4f}]'.format(eta))
    print('Using optim [{}]'.format(optim))
    if optim == 'adadelta':
        opt = dy.AdadeltaTrainer(model.pc)
    elif optim == 'adam':
        opt = dy.AdamTrainer(model.pc)
    elif optim == 'rmsprop':
        opt = dy.RMSPropTrainer(model.pc, learning_rate=eta)
    else:
        if mom == 0 or mom is None:
            opt = dy.SimpleSGDTrainer(model.pc, learning_rate=eta)
        else:
            print('Using mom {:.3f}'.format(mom))
            opt = dy.MomentumSGDTrainer(model.pc, learning_rate=eta, mom=mom)
    if clip is not None:
        opt.set_clip_threshold(clip)
    opt.set_sparse_updates(False)
    return opt
Exemple #10
0
    def __init__(self, params, vocab, label2tag, pretrained_embeddings=None):
        """

        :param params:
        :param vocab:
        :param label2tag:
        :param pretrained_embeddings:
        """
        self.dim_w = params.dim_w
        self.win = params.win
        self.vocab = vocab
        self.n_words = len(self.vocab)
        self.dim_asp = params.dim_asp
        self.dim_opi = params.dim_opi
        self.dim_y_asp = params.n_asp_tags
        self.dim_y_opi = params.n_opi_tags
        self.n_steps = params.n_steps
        self.asp_label2tag = label2tag
        self.opi_label2tag = {0: 'O', 1: 'T'}
        self.dropout_asp = params.dropout_asp
        self.dropout_opi = params.dropout_opi
        self.dropout = params.dropout
        self.rnn_type = params.rnn_type
        self.ds_name = params.ds_name
        self.model_name = params.model_name
        self.attention_type = params.attention_type

        self.pc = dy.ParameterCollection()
        self.Emb = WDEmb(pc=self.pc, n_words=self.n_words, dim_w=self.dim_w,
                         pretrained_embeddings=pretrained_embeddings)
        #self.ASP_RNN = LSTM(pc=self.pc, n_in=self.win*self.dim_w, n_out=self.dim_asp, dropout_rate=self.dropout_asp)
        #self.OPI_RNN = LSTM(pc=self.pc, n_in=self.win*self.dim_w, n_out=self.dim_opi, dropout_rate=self.dropout_opi)
        # use dynet RNNBuilder rather than the self-defined RNN classes
        if self.rnn_type == 'LSTM':
            self.ASP_RNN = dy.LSTMBuilder(1, self.win * self.dim_w, self.dim_asp, self.pc)
            self.OPI_RNN = dy.LSTMBuilder(1, self.win * self.dim_w, self.dim_opi, self.pc)
        elif self.rnn_type == 'GRU':
            # NOT TRIED!
            self.ASP_RNN = dy.GRUBuilder(1, self.win * self.dim_w, self.dim_asp, self.pc)
            self.OPI_RNN = dy.GRUBuilder(1, self.win * self.dim_w, self.dim_opi, self.pc)
        else:
            raise Exception("Invalid RNN type!!!")
        self.THA = THA(pc=self.pc, n_steps=self.n_steps, n_in=2*self.dim_asp)
        if self.attention_type == 'bilinear':
            self.STN = ST_bilinear(pc=self.pc, dim_asp=self.dim_asp, dim_opi=self.dim_opi)
        # here dot attention is not applicable since the aspect representation and opinion representation
        # have different dimensions
        # elif self.attention_type == 'dot':
        #    self.STN = ST_dot(pc=self.pc, dim_asp=self.dim_asp, dim_opi=self.dim_opi)
        elif self.attention_type == 'concat':
            self.STN = ST_concat(pc=self.pc, dim_asp=self.dim_asp, dim_opi=self.dim_opi)
        else:
            raise Exception("Invalid attention type!!!")

        self.ASP_FC = Linear(pc=self.pc, n_in=2*self.dim_asp+2*self.dim_opi, n_out=self.dim_y_asp)
        self.OPI_FC = Linear(pc=self.pc, n_in=2*self.dim_opi, n_out=self.dim_y_opi)

        self.layers = [self.ASP_FC, self.OPI_FC, self.THA, self.STN]

        if params.optimizer == 'sgd':
            self.optimizer = dy.SimpleSGDTrainer(self.pc, params.sgd_lr)
        elif params.optimizer == 'momentum':
            self.optimizer = dy.MomentumSGDTrainer(self.pc, 0.01, 0.9)
        elif params.optimizer == 'adam':
            self.optimizer = dy.AdamTrainer(self.pc, 0.001, 0.9, 0.9)
        elif params.optimizer == 'adagrad':
            self.optimizer = dy.AdagradTrainer(self.pc)
        elif params.optimizer == 'adadelta':
             # use default value of adadelta
            self.optimizer = dy.AdadeltaTrainer(self.pc)
        else:
            raise Exception("Invalid optimizer!!")
Exemple #11
0
    def __init__(self, params, vocab, embeddings):
        """

        :param params: parameters
        :param vocab: vocabulary
        :param embeddings: pretrained word embeddings
        """
        self.params = params
        self.name = 'lstm_crf'
        self.dim_char = params.dim_char
        self.dim_w = params.dim_w
        self.dim_char_h = params.dim_char_h
        self.dim_ote_h = params.dim_ote_h
        self.dim_ts_h = params.dim_ts_h
        self.input_win = params.input_win
        self.ds_name = params.ds_name
        # tag vocabulary of opinion target extraction and targeted sentiment
        self.ote_tag_vocab = params.ote_tag_vocab
        self.ts_tag_vocab = params.ts_tag_vocab
        self.dim_ote_y = len(self.ote_tag_vocab)
        self.dim_ts_y = len(self.ts_tag_vocab)
        self.n_epoch = params.n_epoch
        self.dropout_rate = params.dropout
        self.tagging_schema = params.tagging_schema
        self.clip_grad = params.clip_grad
        self.use_char = params.use_char
        # name of word embeddings
        self.emb_name = params.emb_name
        self.embeddings = embeddings
        self.vocab = vocab
        # character vocabulary
        self.char_vocab = params.char_vocab
        self.pc = dy.ParameterCollection()

        # word embedding layer
        self.emb = WDEmb(pc=self.pc,
                         n_words=len(vocab),
                         dim_w=self.dim_w,
                         pretrained_embeddings=embeddings)

        # input dimension
        dim_input = self.input_win * self.dim_w

        self.lstm_ts = dy.LSTMBuilder(1, dim_input, self.dim_ts_h, self.pc)

        # hidden layer between LSTM and CRF decoding layer
        self.hidden = Linear(pc=self.pc,
                             n_in=2 * self.dim_ts_h,
                             n_out=self.dim_ts_h,
                             use_bias=True,
                             nonlinear='tanh')
        # map the word representation to the ts label space
        # in the label space, both BEG and END tag are considered
        self.fc_ts = Linear(pc=self.pc,
                            n_in=self.dim_ts_h,
                            n_out=self.dim_ts_y)

        # transition matrix, [i, j] is the transition score from tag i to tag j
        self.transitions = self.pc.add_lookup_parameters(
            (self.dim_ts_y + 2, self.dim_ts_y + 2))

        # determine the optimizer
        if params.optimizer == 'sgd':
            self.optimizer = dy.SimpleSGDTrainer(self.pc, params.sgd_lr)
        elif params.optimizer == 'adam':
            self.optimizer = dy.AdamTrainer(self.pc, 0.001, 0.9, 0.9)
        elif params.optimizer == 'adadelta':
            self.optimizer = dy.AdadeltaTrainer(self.pc)
        elif params.optimizer == 'momentum':
            self.optimizer = dy.MomentumSGDTrainer(self.pc, 0.01, 0.9)
        else:
            raise Exception("Unsupported optimizer type: %s" %
                            params.optimizer)
Exemple #12
0
    def __init__(self, params, vocab, embeddings, char_embeddings):
        """

        :param params:
        :param vocab:
        :param embeddings:
        :param char_embeddings:
        """
        self.params = params
        self.name = 'lstm_cascade'
        self.dim_char = params.dim_char
        self.dim_w = params.dim_w
        self.dim_char_h = params.dim_char_h
        self.dim_ote_h = params.dim_ote_h
        self.dim_ts_h = params.dim_ts_h
        self.input_win = params.input_win
        self.ds_name = params.ds_name
        # tag vocabulary of opinion target extraction and targeted sentiment
        self.ote_tag_vocab = params.ote_tag_vocab
        self.ts_tag_vocab = params.ts_tag_vocab
        self.dim_ote_y = len(self.ote_tag_vocab)
        self.dim_ts_y = len(self.ts_tag_vocab)
        self.n_epoch = params.n_epoch
        self.dropout_rate = params.dropout
        self.tagging_schema = params.tagging_schema
        self.clip_grad = params.clip_grad
        self.use_char = params.use_char
        # name of word embeddings
        self.emb_name = params.emb_name
        self.embeddings = embeddings
        self.vocab = vocab
        # character vocabulary
        self.char_vocab = params.char_vocab
        #self.td_proportions = params.td_proportions
        self.epsilon = params.epsilon
        #self.tc_proportions = params.tc_proportions
        self.pc = dy.ParameterCollection()

        if self.use_char:
            self.char_emb = CharEmb(pc=self.pc,
                                    n_chars=len(self.char_vocab),
                                    dim_char=self.dim_char,
                                    pretrained_embeddings=char_embeddings)
            self.lstm_char = dy.LSTMBuilder(1, self.dim_char, self.dim_char_h,
                                            self.pc)
            dim_input = self.input_win * self.dim_w + 2 * self.dim_char_h
        else:
            dim_input = self.input_win * self.dim_w
        # word embedding layer
        self.emb = WDEmb(pc=self.pc,
                         n_words=len(vocab),
                         dim_w=self.dim_w,
                         pretrained_embeddings=embeddings)

        # lstm layers
        self.lstm_ote = dy.LSTMBuilder(1, dim_input, self.dim_ote_h, self.pc)
        self.lstm_ts = dy.LSTMBuilder(1, 2 * self.dim_ote_h, self.dim_ts_h,
                                      self.pc)

        # fully connected layer
        self.fc_ote = Linear(pc=self.pc,
                             n_in=2 * self.dim_ote_h,
                             n_out=self.dim_ote_y)
        self.fc_ts = Linear(pc=self.pc,
                            n_in=2 * self.dim_ts_h,
                            n_out=self.dim_ts_y)

        assert self.tagging_schema == 'BIEOS'
        transition_path = {
            'B': ['B-POS', 'B-NEG', 'B-NEU'],
            'I': ['I-POS', 'I-NEG', 'I-NEU'],
            'E': ['E-POS', 'E-NEG', 'E-NEU'],
            'S': ['S-POS', 'S-NEG', 'S-NEU'],
            'O': ['O']
        }
        self.transition_scores = np.zeros((self.dim_ote_y, self.dim_ts_y))
        for t in transition_path:
            next_tags = transition_path[t]
            n_next_tag = len(next_tags)
            ote_id = self.ote_tag_vocab[t]
            for nt in next_tags:
                ts_id = self.ts_tag_vocab[nt]
                self.transition_scores[ote_id][ts_id] = 1.0 / n_next_tag
        print(self.transition_scores)
        self.transition_scores = np.array(self.transition_scores,
                                          dtype='float32').transpose()

        # opinion target-opinion words co-occurrence modeling
        self.stm_lm = Linear(pc=self.pc,
                             n_in=2 * self.dim_ote_h,
                             n_out=2 * self.dim_ote_h,
                             nonlinear='tanh')
        # fully connected layer for opinion-enhanced indicator prediction task
        self.fc_stm = Linear(pc=self.pc, n_in=2 * self.dim_ote_h, n_out=2)

        # gate for maintaining sentiment consistency
        self.W_gate = self.pc.add_parameters(
            (2 * self.dim_ote_h, 2 * self.dim_ote_h),
            init=dy.UniformInitializer(0.2))

        # determine the optimizer
        if params.optimizer == 'sgd':
            self.optimizer = dy.SimpleSGDTrainer(self.pc, params.sgd_lr)
        elif params.optimizer == 'adam':
            self.optimizer = dy.AdamTrainer(self.pc, 0.001, 0.9, 0.9)
        elif params.optimizer == 'adadelta':
            self.optimizer = dy.AdadeltaTrainer(self.pc)
        elif params.optimizer == 'momentum':
            self.optimizer = dy.MomentumSGDTrainer(self.pc, 0.01, 0.9)
        else:
            raise Exception("Unsupported optimizer type: %s" %
                            params.optimizer)
Exemple #13
0
 def __init__(self, eps=1e-6, rho=0.95):
     self.optimizer = dy.AdadeltaTrainer(ParamManager.global_collection(),
                                         eps, rho)
Exemple #14
0
def main(args: argparse.Namespace):

    dargs = args.__dict__
    for key, value in dargs.items():
        logging.info("%s: %s", str(key).ljust(15), value)

    os.makedirs(args.output)

    if args.nfd:
        logging.info("Will perform training on NFD-normalized data.")
    else:
        logging.info("Will perform training on unnormalized data.")

    vocabulary_ = vocabulary.Vocabularies()

    training_data = []
    with utils.OpenNormalize(args.train, args.nfd) as f:
        for line in f:
            input_, target = line.rstrip().split("\t", 1)
            encoded_input = vocabulary_.encode_input(input_)
            vocabulary_.encode_actions(target)
            sample = utils.Sample(input_, target, encoded_input)
            training_data.append(sample)

    logging.info("%d actions: %s", len(vocabulary_.actions),
                 vocabulary_.actions)
    logging.info("%d chars: %s", len(vocabulary_.characters),
                 vocabulary_.characters)
    vocabulary_path = os.path.join(args.output, "vocabulary.pkl")
    vocabulary_.persist(vocabulary_path)
    logging.info("Wrote vocabulary to %s.", vocabulary_path)

    development_data = []
    with utils.OpenNormalize(args.dev, args.nfd) as f:
        for line in f:
            input_, target = line.rstrip().split("\t", 1)
            encoded_input = vocabulary_.encode_unseen_input(input_)
            sample = utils.Sample(input_, target, encoded_input)
            development_data.append(sample)

    if args.test is not None:
        test_data = []
        with utils.OpenNormalize(args.test, args.nfd) as f:
            for line in f:
                input_, *optional_target = line.rstrip().split("\t", 1)
                target = optional_target[0] if optional_target else None
                encoded_input = vocabulary_.encode_unseen_input(input_)
                sample = utils.Sample(input_, target, encoded_input)
                test_data.append(sample)

    sed_parameters_path = os.path.join(args.output, "sed.pkl")
    sed_aligner = sed.StochasticEditDistance.fit_from_data(
        training_data,
        em_iterations=args.sed_em_iterations,
        output_path=sed_parameters_path,
    )
    expert = optimal_expert_substitutions.OptimalSubstitutionExpert(
        sed_aligner)

    model = dy.Model()
    transducer_ = transducer.Transducer(model, vocabulary_, expert, **dargs)

    widgets = [progressbar.Bar(">"), " ", progressbar.ETA()]
    train_progress_bar = progressbar.ProgressBar(widgets=widgets,
                                                 maxval=args.epochs).start()

    train_log_path = os.path.join(args.output, "train.log")
    best_model_path = os.path.join(args.output, "best.model")

    with open(train_log_path, "w") as w:
        w.write("epoch\tavg_loss\ttrain_accuracy\tdev_accuracy\n")

    trainer = dy.AdadeltaTrainer(model)
    train_subset = training_data[:100]
    rollin_schedule = inverse_sigmoid_schedule(args.k)
    max_patience = args.patience
    batch_size = args.batch_size

    logging.info(
        "Training for a maximum of %d with a maximum patience of %d.",
        args.epochs,
        max_patience,
    )
    logging.info(
        "Number of train batches: %d.",
        math.ceil(len(training_data) / batch_size),
    )

    best_train_accuracy = 0
    best_dev_accuracy = 0
    best_epoch = 0
    patience = 0

    for epoch in range(args.epochs):

        logging.info("Training...")
        with utils.Timer():
            train_loss = 0.0
            random.shuffle(training_data)
            batches = [
                training_data[i:i + batch_size]
                for i in range(0, len(training_data), batch_size)
            ]
            rollin = rollin_schedule(epoch)
            j = 0
            for j, batch in enumerate(batches):
                losses = []
                dy.renew_cg()
                for sample in batch:
                    output = transducer_.transduce(
                        input_=sample.input,
                        encoded_input=sample.encoded_input,
                        target=sample.target,
                        rollin=rollin,
                        external_cg=True,
                    )
                    losses.extend(output.losses)
                batch_loss = -dy.average(losses)
                train_loss += batch_loss.scalar_value()
                batch_loss.backward()
                trainer.update()
                if j > 0 and j % 100 == 0:
                    logging.info("\t\t...%d batches", j)
            logging.info("\t\t...%d batches", j + 1)

        avg_loss = train_loss / len(batches)
        logging.info("Average train loss: %.4f.", avg_loss)

        logging.info("Evaluating on training data subset...")
        with utils.Timer():
            train_accuracy = decode(transducer_, train_subset).accuracy

        if train_accuracy > best_train_accuracy:
            best_train_accuracy = train_accuracy

        patience += 1

        logging.info("Evaluating on development data...")
        with utils.Timer():
            decoding_output = decode(transducer_, development_data)
            dev_accuracy = decoding_output.accuracy
            avg_dev_loss = decoding_output.loss

        if dev_accuracy > best_dev_accuracy:
            best_dev_accuracy = dev_accuracy
            best_epoch = epoch
            patience = 0
            logging.info("Found best dev accuracy %.4f.", best_dev_accuracy)
            model.save(best_model_path)
            logging.info("Saved new best model to %s.", best_model_path)

        logging.info(
            f"Epoch {epoch} / {args.epochs - 1}: train loss: {avg_loss:.4f} "
            f"dev loss: {avg_dev_loss:.4f} train acc: {train_accuracy:.4f} "
            f"dev acc: {dev_accuracy:.4f} best train acc: {best_train_accuracy:.4f} "
            f"best dev acc: {best_dev_accuracy:.4f} best epoch: {best_epoch} "
            f"patience: {patience} / {max_patience - 1}")

        log_line = f"{epoch}\t{avg_loss:.4f}\t{train_accuracy:.4f}\t{dev_accuracy:.4f}\n"
        with open(train_log_path, "a") as a:
            a.write(log_line)

        if patience == max_patience:
            logging.info("Out of patience after %d epochs.", epoch + 1)
            train_progress_bar.finish()
            break

        train_progress_bar.update(epoch)

    logging.info("Finished training.")

    if not os.path.exists(best_model_path):
        sys.exit(0)

    model = dy.Model()
    transducer_ = transducer.Transducer(model, vocabulary_, expert, **dargs)
    model.populate(best_model_path)

    evaluations = [(development_data, "dev")]
    if args.test is not None:
        evaluations.append((test_data, "test"))
    for data, dataset_name in evaluations:

        logging.info(
            "Evaluating best model on %s data using beam search "
            "(beam width %d)...",
            dataset_name,
            args.beam_width,
        )
        with utils.Timer():
            greedy_decoding = decode(transducer_, data)
        utils.write_results(
            greedy_decoding.accuracy,
            greedy_decoding.predictions,
            args.output,
            args.nfd,
            dataset_name,
            dargs=dargs,
        )
        with utils.Timer():
            beam_decoding = decode(transducer_, data, args.beam_width)
        utils.write_results(
            beam_decoding.accuracy,
            beam_decoding.predictions,
            args.output,
            args.nfd,
            dataset_name,
            args.beam_width,
            dargs=dargs,
        )
Exemple #15
0
    def __init__(
        self,
        word_count,
        tag_count,
        word_dims,
        tag_dims,
        lstm_units,
        hidden_units,
        struct_out,
        label_out,
        droprate=0,
        struct_spans=4,
        label_spans=3,
    ):

        self.word_count = word_count
        self.tag_count = tag_count
        self.word_dims = word_dims
        self.tag_dims = tag_dims
        self.lstm_units = lstm_units
        self.hidden_units = hidden_units
        self.struct_out = struct_out
        self.label_out = label_out

        self.droprate = droprate

        self.model = dynet.Model()

        self.trainer = dynet.AdadeltaTrainer(self.model, eps=1e-7, rho=0.99)
        random.seed(1)

        self.activation = dynet.rectify

        self.word_embed = self.model.add_lookup_parameters(
            (word_count, word_dims), )
        self.tag_embed = self.model.add_lookup_parameters(
            (tag_count, tag_dims), )

        self.fwd_lstm1 = LSTM(word_dims + tag_dims, lstm_units, self.model)
        self.back_lstm1 = LSTM(word_dims + tag_dims, lstm_units, self.model)

        self.fwd_lstm2 = LSTM(2 * lstm_units, lstm_units, self.model)
        self.back_lstm2 = LSTM(2 * lstm_units, lstm_units, self.model)

        self.struct_hidden_W = self.model.add_parameters(
            (hidden_units, 4 * struct_spans * lstm_units),
            dynet.UniformInitializer(0.01),
        )
        self.struct_hidden_b = self.model.add_parameters(
            (hidden_units, ),
            dynet.ConstInitializer(0),
        )
        self.struct_output_W = self.model.add_parameters(
            (struct_out, hidden_units),
            dynet.ConstInitializer(0),
        )
        self.struct_output_b = self.model.add_parameters(
            (struct_out, ),
            dynet.ConstInitializer(0),
        )

        self.label_hidden_W = self.model.add_parameters(
            (hidden_units, 4 * label_spans * lstm_units),
            dynet.UniformInitializer(0.01),
        )
        self.label_hidden_b = self.model.add_parameters(
            (hidden_units, ),
            dynet.ConstInitializer(0),
        )
        self.label_output_W = self.model.add_parameters(
            (label_out, hidden_units),
            dynet.ConstInitializer(0),
        )
        self.label_output_b = self.model.add_parameters(
            (label_out, ),
            dynet.ConstInitializer(0),
        )
Exemple #16
0
 def __init__(self, yaml_context, eps=1e-6, rho=0.95):
     self.optimizer = dy.AdadeltaTrainer(
         yaml_context.dynet_param_collection.param_col, eps, rho)
Exemple #17
0
def char_train(network, train_set, val_set, test_set, test_set2,
               train_set_word, val_set_word, test_set_word, test_set2_word,
               epochs, batch_size, args, tag_to_ix):
    def get_val_set_loss(network, val_set, val_set_word, val_author_vecs,
                         pretrain, num_basis):
        loss = []
        vae_loss = [0]
        l2_loss = [0]
        for i, (input_sentence, output_sentence) in enumerate(val_set):
            if args.use_vae:
                l, a, v, l2 = network.get_full_loss(input_sentence,
                                                    val_set_word[i][0],
                                                    output_sentence,
                                                    val_author_vecs[i],
                                                    pretrain)
                loss.append(l.value())
                vae_loss.append(v.value())
                l2_loss.append(l2.value())
            else:
                loss.append(
                    network.get_loss(input_sentence, val_set_word[i][0],
                                     output_sentence, val_author_vecs[i],
                                     pretrain).value())
            dy.renew_cg()
        return sum(loss) / len(val_set), sum(vae_loss) / len(val_set), sum(
            l2_loss) / len(val_set)

    def get_val_set_acc(network, val_set, val_set_word, val_author_vecs,
                        val_author_ids, pretrain, num_basis):
        evals = []
        if args.use_vae:
            for i, (input_sentence, output_sentence) in enumerate(val_set):
                evals.append(
                    network.full_evaluate_acc(input_sentence,
                                              val_set_word[i][0],
                                              output_sentence,
                                              val_author_vecs[i],
                                              val_author_ids[i], pretrain))
                dy.renew_cg()
        else:
            for i, (input_sentence, output_sentence) in enumerate(val_set):
                evals.append(
                    network.evaluate_acc(input_sentence, val_set_word[i][0],
                                         output_sentence, val_author_vecs[i],
                                         val_author_ids[i], pretrain))
                dy.renew_cg()
        dy.renew_cg()

        correct = [c for c, t, d, w, cc, e in evals]
        total = [t for c, t, d, w, cc, e in evals]
        mean = 0
        confidence = 0
        oov = [d for c, t, d, w, cc, e in evals]
        wrong = [w for c, t, d, w, cc, e in evals]
        correct2 = [cc for c, t, d, w, cc, e in evals]

        auth_correct = [
            c for i, (c, t, d, w, cc, e) in enumerate(evals)
            if val_author_vecs[i] is not None
        ]
        auth_total = [
            t for i, (c, t, d, w, cc, e) in enumerate(evals)
            if val_author_vecs[i] is not None
        ]
        non_auth_correct = [
            c for i, (c, t, d, w, cc, e) in enumerate(evals)
            if val_author_vecs[i] is None
        ]
        non_auth_total = [
            t for i, (c, t, d, w, cc, e) in enumerate(evals)
            if val_author_vecs[i] is None
        ]
        eids = [e for c, t, d, w, cc, e in evals]
        #unique_eid = set(eids)
        len_eid = num_basis
        counts = []
        for i in range(len_eid):
            counts.append(sum([e == i for e in eids]))
        counts2 = []
        for i in range(len_eid):
            counts2.append(
                sum([
                    e == i for j, e in enumerate(eids)
                    if val_author_vecs[j] is not None
                ]))

        if sum(non_auth_total) == 0:
            non_auth_total = [1]

        return 100.0 * sum(correct) / sum(total), mean, confidence, sum(
            oov), sum(wrong), sum(correct2), 100.0 * sum(auth_correct) / sum(
                auth_total), 100.0 * sum(non_auth_correct) / sum(
                    non_auth_total), counts, counts2

    #original_set = train_set
    #train_set = train_set*epochs

    if args.optimizer == 'adadelta':
        trainer = dy.AdadeltaTrainer(network.model)
        trainer.set_clip_threshold(5)
    elif args.optimizer == 'adam':
        trainer = dy.AdamTrainer(network.model, alpha=args.lr)
        trainer.set_clip_threshold(5)
    elif args.optimizer == 'sgd-momentum':
        trainer = dy.MomentumSGDTrainer(network.model, learning_rate=args.lr)
    else:
        logging.critical('This Optimizer is not valid or not allowed')

    losses = []
    iterations = []

    kk = args.pretrain_epochs

    if args.use_all_networks:
        args.network = 'follow'
        train_author_vecs1, dev_author_vecs1, test_author_vecs1, test2_author_vecs1, train_author_ids, dev_author_ids, test_author_ids, test2_author_ids = extract_authorvecs(
            args)

        args.network = 'mention'
        train_author_vecs2, dev_author_vecs2, test_author_vecs2, test2_author_vecs2, _, _, _, _ = extract_authorvecs(
            args)

        args.network = 'retweet'
        train_author_vecs3, dev_author_vecs3, test_author_vecs3, test2_author_vecs3, _, _, _, _ = extract_authorvecs(
            args)

        train_author_vecs = []
        for i, j, k in zip(train_author_vecs1, train_author_vecs2,
                           train_author_vecs3):
            train_author_vecs.append((i, j, k))

        dev_author_vecs = []
        for i, j, k in zip(dev_author_vecs1, dev_author_vecs2,
                           dev_author_vecs3):
            dev_author_vecs.append((i, j, k))

        test_author_vecs = []
        for i, j, k in zip(test_author_vecs1, test_author_vecs2,
                           test_author_vecs3):
            test_author_vecs.append((i, j, k))

        test2_author_vecs = []
        for i, j, k in zip(test2_author_vecs1, test2_author_vecs2,
                           test2_author_vecs3):
            test2_author_vecs.append((i, j, k))

    else:
        train_author_vecs, dev_author_vecs, test_author_vecs, test2_author_vecs, train_author_ids, dev_author_ids, test_author_ids, test2_author_ids = extract_authorvecs(
            args)

    logging.info('obtained all author vectors ' + str(len(train_author_vecs)) +
                 ' ' + str(len(dev_author_vecs)) + ' ' +
                 str(len(test_author_vecs)) + ' ' +
                 str(len(test2_author_vecs)))

    batch_loss_vec = []
    dy.renew_cg()

    is_best = 0
    best_val = 0
    count = 0
    count_train = -1

    #early_stopping = 0

    for epoch in range(epochs):
        #if early_stopping>args.early_epochs:
        #	break

        all_inds = []
        num_train = int(len(train_set) / args.batch_size + 1) * args.batch_size

        #prev_time=time.time()

        for ii in range(num_train):

            count_train += 1
            if count_train == len(train_set):
                count_train = 0

            count += 1
            inputs, outputs = train_set[count_train]
            inputs_word, _ = train_set_word[count_train]
            '''
			data_point = {'inputs':inputs, 'inputs_word':inputs_word, 'outputs':outputs, 'train_author_vecs':train_author_vecs[i]}
			pickle.dump(data_point,open( "data_pickle/"+str(i)+".p", "wb" ))
			data_point = pickle.load( open( "data_pickle/"+str(i)+".p", "rb" ) )
			inputs = data_point['inputs']
			inputs_word = data_point['inputs_word']
			outputs = data_point['outputs']
			train_author_vec = data_point['train_author_vecs']
			'''

            #prev_time2 = time.time()
            #if train_author_vecs[count_train] !=None:

            vae_loss = 0
            if args.use_vae:
                loss, ind, vae_loss, l2_loss = network.get_full_loss(
                    inputs, inputs_word, outputs,
                    train_author_vecs[count_train], epoch < kk, True)
            else:
                loss, ind = network.get_loss(inputs, inputs_word, outputs,
                                             train_author_vecs[count_train],
                                             epoch < kk, True)

            #curr_time2 = time.time()

            #print ('time for one instance: ', curr_time2 - prev_time2)

            all_inds.append(ind)
            #print (loss)
            #a = input()
            batch_loss_vec.append(loss)

            if count % batch_size == 0:

                batch_loss = dy.esum(batch_loss_vec) / batch_size
                batch_loss.forward()
                batch_loss.backward()
                trainer.update()
                batch_loss_vec = []
                dy.renew_cg()
                count = 0
            #logging.info('finished minibatch: %d/%d',ii,num_train)

        #print ('until here-----')
        #curr_time = time.time()
        #print ('time for one epoch training: ', curr_time - prev_time)

        counts = []
        for i in range(args.num_basis):
            a = [v == i for v in all_inds]
            counts.append(sum(a))
        logging.info('distribution of the data points' + str(counts))

        #if ((i+1))%len(original_set) == 0:
        if args.plots:
            val_loss = get_val_set_loss(network, val_set, val_set_word,
                                        dev_author_vecs, epoch < kk,
                                        args.num_basis)
            losses.append(val_loss)
            iterations.append(epoch)
        #dy.renew_cg()

        #if ((i+1))%len(original_set)==0:
        train_loss = 0
        if args.slow:
            train_loss, train_vae_loss, train_l2_loss = get_val_set_loss(
                network, train_set, train_set_word, train_author_vecs,
                epoch < kk, args.num_basis)

        if args.write_errors:
            f = open(args.log_errors_file, 'a')
            f.write('\n--------- epoch no: --------- ')
            f.write(str(epoch) + '\n')
            f.close()
            f = open(args.log_errors_file, 'a')
            f.write('\n--------- oct27.train errors: --------- \n')
            f.close()
        #prev_time = time.time()
        trainacc, train_acc, train_confidence, oov_train, wrong_train, correct_train, auth_acc1, non_auth_acc1, eids1, counts21 = get_val_set_acc(
            network, train_set, train_set_word, train_author_vecs,
            train_author_ids, epoch < kk, args.num_basis)
        #curr_time = time.time()
        #print ('time for acc train: ', curr_time - prev_time)

        if args.write_errors:
            f = open(args.log_errors_file, 'a')
            f.write('\n--------- oct27.dev errors: ---------\n')
            f.close()

        val_loss, val_vae_loss, val_l2_loss = 0, 0, 0
        val_acc, oov_val, wrong_val, correct_val = 0, 0, 0, 0

        if args.slow:
            pass
            #val_loss,val_vae_loss = get_val_set_loss(network, val_set, val_set_word, dev_author_vecs,epoch<kk, args.num_basis)
        #prev_time = time.time()
        valacc, val_acc, val_confidence, oov_val, wrong_val, correct_val, auth_acc2, non_auth_acc2, eids2, counts22 = 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
        #valacc, val_acc, val_confidence, oov_val, wrong_val, correct_val, auth_acc2, non_auth_acc2, eids2, counts22 = get_val_set_acc(network, val_set, val_set_word, dev_author_vecs, dev_author_ids, epoch<kk, args.num_basis)
        #curr_time = time.time()
        #print ('time for acc val: ', curr_time - prev_time)

        if args.write_errors:
            f = open(args.log_errors_file, 'a')
            f.write('\n---------  oct27.test errors: --------- \n')
            f.close()
        test_loss = 0
        if args.slow:
            test_loss, test_vae_loss, test_l2_loss = get_val_set_loss(
                network, test_set, test_set_word, test_author_vecs, epoch < kk,
                args.num_basis)
        #prev_time = time.time()
        testacc, test_acc, test_confidence, oov_test, wrong_test, correct_test, auth_acc3, non_auth_acc3, eids3, counts23 = get_val_set_acc(
            network, test_set, test_set_word, test_author_vecs,
            test_author_ids, epoch < kk, args.num_basis)
        #curr_time = time.time()
        #print ('time for acc test: ', curr_time - prev_time)

        if args.write_errors:
            f = open(args.log_errors_file, 'a')
            f.write('\n---------  daily547.test errors: --------- \n')
            f.close()
        test_loss2 = 0
        if args.slow:
            test_loss2, test_vae_loss2, test2_l2_loss = get_val_set_loss(
                network, test_set2, test_set2_word, test2_author_vecs,
                epoch < kk, args.num_basis)
        #prev_time = time.time()
        testacc2, test_acc2, test2_confidence, oov_test2, wrong_test2, correct_test2, auth_acc4, non_auth_acc4, eids4, counts24 = get_val_set_acc(
            network, test_set2, test_set2_word, test2_author_vecs,
            test2_author_ids, epoch < kk, args.num_basis)
        #curr_time = time.time()
        #print ('time for acc test2: ', curr_time - prev_time)

        #test_loss2 = get_val_set_loss(network, test_set2, test_set2_word, test_author_vecs, epoch<kk)
        #test_acc2, oov_test2, wrong_test2, correct_test2, auth_acc4, non_auth_acc4, eids4 = get_val_set_acc(network, test_set2, test_set2_word, test_author_vecs,epoch<kk)

        #prev_time = time.time()
        logging.info('epoch %d done', epoch)
        logging.info(
            'train loss: %f, train vae loss: %f, train l2 loss: %f, train acc: %f',
            train_loss, train_vae_loss, train_l2_loss, trainacc)
        logging.info(
            'val loss: %f, val vae loss: %f, val l2 loss: %f, val acc: %f',
            val_loss, val_vae_loss, val_l2_loss, valacc)
        logging.info(
            'test loss: %f, test vae loss: %f, test l2 loss: %f, test acc: %f',
            test_loss, test_vae_loss, test_l2_loss, testacc)
        logging.info(
            'test2 loss: %f, tes2 vae loss: %f, tes2 l2 loss: %f, test2 acc: %f',
            test_loss2, test_vae_loss2, test2_l2_loss, testacc2)

        logging.info(
            ' oov_train: %d/%d, %d, oov_val: %d/%d, %d, oov_test: %d/%d, %d, oov_test2: %d/%d, %d',
            oov_train, wrong_train, correct_train, oov_val, wrong_val,
            correct_val, oov_test, wrong_test, correct_test, oov_test2,
            wrong_test2, correct_test2)

        logging.info(
            'train: author_acc: %f, non_author_acc: %f, ' + str(eids1) + ' ' +
            str(counts21), auth_acc1, non_auth_acc1)
        logging.info(
            'dev: author_acc: %f, non_author_acc: %f, ' + str(eids2) + ' ' +
            str(counts22), auth_acc2, non_auth_acc2)
        logging.info(
            'test: author_acc: %f, non_author_acc: %f, ' + str(eids3) + ' ' +
            str(counts23), auth_acc3, non_auth_acc3)
        logging.info(
            'test2: author_acc: %f, non_author_acc: %f, ' + str(eids4) + ' ' +
            str(counts24), auth_acc4, non_auth_acc4)

        if args.plots:
            test_acc, test_confidence, confusion_matrix, auth_acc, non_auth_acc, eids = get_val_set_acc2(
                network, test_set, test_set_word, test_author_vecs, epoch < kk,
                args.num_basis)
            df_cm = pd.DataFrame(confusion_matrix,
                                 index=[i for i in tag_to_ix.keys()],
                                 columns=[i for i in tag_to_ix.keys()])
            fig = plt.figure(figsize=(10, 7))
            sn.heatmap(df_cm, annot=True)
            fig.savefig('figs/conf_matrix_' + str(epoch) + '.png')
            #a = input()

        if args.combine_train_dev:
            valacc = testacc
        elif args.combine_train_dev_test:
            valacc = testacc2
        else:
            valacc = valacc

        m = network.model
        if epoch == 0:
            best_acc = valacc
            best_epoch = 0
            #best_val = val_loss
            #if args.combine_train_dev:
            #	best_acc = testacc
            #else:
            #	best_acc = valacc
            if args.save_model:
                m.save(args.save_model)
                logging.info('saving best model')
        else:
            #if args.combine_train_dev:
            #	valacc = testacc
            #
            #if best_acc < valacc:
            #	early_stopping = 0
            #	if args.combine_train_dev:
            #		best_acc = testacc
            #	else:
            #		best_acc = valacc
            if best_acc <= valacc:
                best_acc = valacc
                best_epoch = epoch
                if args.save_model:
                    m.save(args.save_model)
                    logging.info('re-saving best model')
            #else:
            #	early_stopping+=1
        logging.info('best model is at epoch no: %d', best_epoch)

    logging.info('\nbest model details are at epoch no: %d', best_epoch)

    #curr_time = time.time()
    #print ('time for rest junk: ', curr_time - prev_time)
    '''
	if count%batch_size!=0:
		batch_loss = dy.esum(batch_loss_vec)/len(batch_loss_vec)
		batch_loss.forward()
		batch_loss.backward()
		trainer.update()
		batch_loss_vec=[]
		dy.renew_cg()
	'''

    if args.plots:
        fig = plt.figure()
        plt.plot(iterations, losses)
        axes = plt.gca()
        axes.set_xlim([0, epochs])
        axes.set_ylim([0, 10000])

        fig.savefig('figs/loss_plot.png')
Exemple #18
0
def train(network, train_data, dev_data, test_data, args):
    def get_val_set_acc(network, dev_data):
        evals = [
            network.evaluate(input_sentences, labels)
            for i, (input_sentences, labels) in enumerate(dev_data)
        ]

        dy.renew_cg()
        loss = [l for l, p, c, t in evals]
        correct = [l for l, p, c, t in evals]
        total = [l for l, p, c, t in evals]
        return 100.0 * sum(correct) / sum(total), sum(loss) / len(dev_data)

    if args.optimizer == 'adadelta':
        trainer = dy.AdadeltaTrainer(network.model)
        trainer.set_clip_threshold(5)
    elif args.optimizer == 'adam':
        trainer = dy.AdamTrainer(network.model, alpha=args.lr)
        trainer.set_clip_threshold(5)
    elif args.optimizer == 'sgd-momentum':
        trainer = dy.MomentumSGDTrainer(network.model, learning_rate=args.lr)
    else:
        logging.critical('This Optimizer is not valid or not allowed')

    losses = []
    iterations = []

    batch_loss_vec = []
    dy.renew_cg()

    is_best = 0
    best_val = 0
    count = 0
    count_train = -1
    for epoch in range(args.epochs):

        num_train = int(len(train_data) / args.batch_size +
                        1) * args.batch_size

        for ii in range(num_train):
            count_train += 1
            if count_train == len(train_data):
                count_train = 0

            count += 1
            inputs, outputs = train_data[count_train]

            loss, pred_labels, correct, total = network.get_loss(
                inputs, outputs)
            batch_loss_vec.append(loss)

            if count % args.batch_size == 0:
                batch_loss = dy.esum(batch_loss_vec) / args.batch_size
                batch_loss.forward()
                batch_loss.backward()
                trainer.update()
                batch_loss_vec = []
                dy.renew_cg()

        dev_acc, dev_loss = get_val_set_acc(network, dev_data)
        losses.append(dev_loss)
        iterations.append(epoch)

        test_acc, test_loss = get_val_set_acc(network, test_data)

        logging.info(
            'epoch %d done, dev loss: %f, dev acc: %f, test loss: %f, test acc: %f',
            epoch, dev_loss, dev_acc, test_loss, test_acc)

        m = network.model
        if epoch == 0:
            best_val = dev_loss
            if args.save_model:
                m.save(args.save_model)
                logging.info('saving best model')
        else:
            if dev_loss < best_val:
                best_val = dev_loss
                if args.save_model:
                    m.save(args.save_model)
                    logging.info('re-saving best model')

    if count % args.batch_size != 0:
        batch_loss = dy.esum(batch_loss_vec) / len(batch_loss_vec)
        batch_loss.forward()
        batch_loss.backward()
        trainer.update()
        batch_loss_vec = []
        dy.renew_cg()

    if args.plots:
        fig = plt.figure()
        plt.plot(iterations, losses)
        axes = plt.gca()
        axes.set_xlim([0, epochs])
        axes.set_ylim([0, 10000])

        fig.savefig('figs/loss_plot.png')
Exemple #19
0
else: args.rnn = dynet.SimpleRNNBuilder

BEGIN_TOKEN = '<s>'
END_TOKEN = '<e>'

# define model and obtain vocabulary
# (reload vocab files is saved model or create new vocab files if new model)

model = dynet.Model()

if not args.trainer or args.trainer=="simple_sgd":
    trainer = dynet.SimpleSGDTrainer(model)
elif args.trainer == "momentum_sgd":
    trainer = dynet.MomentumSGDTrainer(model)
elif args.trainer == "adadelta":
    trainer = dynet.AdadeltaTrainer(model)
elif args.trainer == "adagrad":
    trainer = dynet.AdagradTrainer(model)
elif args.trainer == "adam":
    trainer = dynet.AdamTrainer(model)
else:
    raise Exception("Trainer not recognized! Please use one of {simple_sgd, momentum_sgd, adadelta, adagrad, adam}")

trainer.set_clip_threshold(-1.0)
trainer.set_sparse_updates(True)

# load corpus

print "Loading corpus..."
train_data = list(util.get_reader(args.reader_mode)(args.train, mode=args.reader_mode, begin=BEGIN_TOKEN, end=END_TOKEN))
if args.valid:
Exemple #20
0
def train_model(model, embeddings_lookup, hidden_W, hidden_bias, MLP_W,
                MLP_bias, encoder_lstm, train_sents, train_labels, dev_sents,
                dev_labels, word2int):
    print 'training...'
    aggregated_loss = 0
    trainer = dy.AdadeltaTrainer(model)
    train_len = len(train_sents)
    patience = 10
    best_dev = 0
    avg_loss = 0
    for e in xrange(EPOCHS):

        start = time.time()

        print 'starting epoch {}'.format(e)

        # randomize the training set
        indices = range(train_len)
        random.shuffle(indices)
        train_set = zip(train_sents, train_labels)
        shuffled_train_set = [train_set[i] for i in indices]

        # compute loss for each example and update
        for i, example in enumerate(shuffled_train_set):
            sent, label = example
            loss = one_sent_loss(model, embeddings_lookup, hidden_W,
                                 hidden_bias, MLP_W, MLP_bias, encoder_lstm,
                                 sent, label, word2int)

            loss_value = loss.value()
            aggregated_loss += loss_value
            loss.backward()
            trainer.update()
            if i > 0:
                avg_loss = aggregated_loss / float(i + e * train_len)
            else:
                avg_loss = aggregated_loss

            if i % 10000 == 0:
                print 'epoch: {} avg. loss: {} went through {} examples'.format(
                    e, avg_loss, i)

        # evaluate on dev after each epoch:
        dev_score = evaluate_model(model, embeddings_lookup, hidden_W,
                                   hidden_bias, MLP_W, MLP_bias, encoder_lstm,
                                   dev_sents, dev_labels, word2int)

        if dev_score < best_dev:
            patience += 1
        else:
            patience = 0
            best_dev = dev_score
            model.save('best_model.txt')

        print 'epoch: {} avg. loss: {} dev acc.: {} best dev acc.:{}'.format(
            e, avg_loss, dev_score, best_dev)
        end = time.time()
        print 'epoch took {} seconds'.format(end - start)

        if patience > 10:
            return
Exemple #21
0
def train_model(model, encoder, decoder, params, train_inputs, train_outputs,
                dev_inputs, dev_outputs, y2int, int2y, epochs, optimization,
                results_file_path, plot, batch_size, eval_after):
    print 'training...'

    np.random.seed(17)
    random.seed(17)

    # sort training sentences by length in descending order
    train_data = zip(train_inputs, train_outputs)
    train_data.sort(key=lambda t: -len(t[0]))
    train_order = [
        x * batch_size for x in range(len(train_data) / batch_size + 1)
    ]

    # sort dev sentences by length in descending order
    dev_batch_size = 1
    dev_data = zip(dev_inputs, dev_outputs)
    dev_data.sort(key=lambda t: -len(t[0]))
    dev_order = [
        x * dev_batch_size for x in range(len(dev_data) / dev_batch_size + 1)
    ]

    if optimization == 'ADAM':
        trainer = dn.AdamTrainer(
            model
        )  # lam=REGULARIZATION, alpha=LEARNING_RATE, beta_1=0.9, beta_2=0.999, eps=1e-8)
    elif optimization == 'MOMENTUM':
        trainer = dn.MomentumSGDTrainer(model)
    elif optimization == 'SGD':
        trainer = dn.SimpleSGDTrainer(model)
    elif optimization == 'ADAGRAD':
        trainer = dn.AdagradTrainer(model)
    elif optimization == 'ADADELTA':
        trainer = dn.AdadeltaTrainer(model)
    else:
        trainer = dn.SimpleSGDTrainer(model)

    trainer.set_clip_threshold(float(arguments['--grad-clip']))
    seen_examples_count = 0
    total_loss = 0
    best_dev_epoch = 0
    best_train_epoch = 0
    patience = 0
    train_len = len(train_outputs)
    dev_len = len(dev_inputs)
    avg_train_loss = -1
    train_loss_patience = 0
    train_loss_patience_threshold = 99999999
    max_patience = int(arguments['--max-patience'])
    log_path = results_file_path + '_log.txt'
    start_epoch, checkpoints_x, train_loss_y, dev_loss_y, dev_accuracy_y = read_from_log(
        log_path)

    if len(train_loss_y) > 0:
        total_batches = checkpoints_x[-1]
        best_avg_train_loss = max(train_loss_y)
        best_dev_accuracy = max(dev_accuracy_y)
        best_dev_loss = max(dev_loss_y)
    else:
        total_batches = 0
        best_avg_train_loss = 999999
        best_dev_loss = 999999
        best_dev_accuracy = 0

    # progress bar init
    # noinspection PyArgumentList
    widgets = [progressbar.Bar('>'), ' ', progressbar.ETA()]
    train_progress_bar = progressbar.ProgressBar(widgets=widgets,
                                                 maxval=epochs).start()

    for e in xrange(start_epoch, epochs):

        # shuffle the batch start indices in each epoch
        random.shuffle(train_order)
        batches_per_epoch = len(train_order)
        start = time.time()

        # go through batches
        for i, batch_start_index in enumerate(train_order, start=1):
            total_batches += 1

            # get batch examples
            batch_inputs = [
                x[0] for x in train_data[batch_start_index:batch_start_index +
                                         batch_size]
            ]
            batch_outputs = [
                x[1] for x in train_data[batch_start_index:batch_start_index +
                                         batch_size]
            ]
            actual_batch_size = len(batch_inputs)

            # skip empty batches
            if actual_batch_size == 0 or len(batch_inputs[0]) == 0:
                continue

            # compute batch loss
            loss = compute_batch_loss(encoder, decoder, batch_inputs,
                                      batch_outputs, y2int)

            # forward pass
            total_loss += loss.scalar_value()
            loss.backward()

            # update parameters
            trainer.update()

            seen_examples_count += actual_batch_size

            # avg loss per sample
            avg_train_loss = total_loss / float(i * batch_size + e * train_len)

            # start patience counts only after 20 batches
            if avg_train_loss < best_avg_train_loss and total_batches > 20:
                best_avg_train_loss = avg_train_loss
                train_loss_patience = 0
            else:
                train_loss_patience += 1
                if train_loss_patience > train_loss_patience_threshold:
                    print 'train loss patience exceeded: {}'.format(
                        train_loss_patience)
                    return model, params, e, best_train_epoch

            if total_batches % 100 == 0 and total_batches > 0:
                print 'epoch {}: {} batches out of {} ({} examples out of {}) total: {} batches, {} examples. avg \
loss per example: {}'.format(e, i, batches_per_epoch, i * batch_size,
                             train_len, total_batches,
                             total_batches * batch_size, avg_train_loss)

                # print sentences per second
                end = time.time()
                elapsed_seconds = end - start
                print '{} sentences per second'.format(seen_examples_count /
                                                       elapsed_seconds)
                seen_examples_count = 0
                start = time.time()

            # checkpoint
            if total_batches % eval_after == 0:

                print 'starting checkpoint evaluation'
                dev_bleu, dev_loss = checkpoint_eval(
                    encoder,
                    decoder,
                    params,
                    dev_batch_size,
                    dev_data,
                    dev_inputs,
                    dev_len,
                    dev_order,
                    dev_outputs,
                    int2y,
                    y2int,
                    results_file_path=results_file_path)

                log_to_file(log_path, e, total_batches, avg_train_loss,
                            dev_loss, dev_bleu)
                save_model(model,
                           results_file_path,
                           total_batches,
                           models_to_save=int(arguments['--models-to-save']))
                if dev_bleu >= best_dev_accuracy:
                    best_dev_accuracy = dev_bleu
                    best_dev_epoch = e

                    # save best model to disk
                    save_best_model(model, results_file_path)
                    print 'saved new best model'
                    patience = 0
                else:
                    patience += 1

                if dev_loss < best_dev_loss:
                    best_dev_loss = dev_loss

                print 'epoch: {0} train loss: {1:.4f} dev loss: {2:.4f} dev bleu: {3:.4f} \
best dev bleu {4:.4f} (epoch {5}) patience = {6}'.format(
                    e, avg_train_loss, dev_loss, dev_bleu, best_dev_accuracy,
                    best_dev_epoch, patience)

                if patience == max_patience:
                    print 'out of patience after {0} checkpoints'.format(
                        str(e))
                    train_progress_bar.finish()
                    if plot:
                        plt.cla()
                    print 'checkpoint patience exceeded'
                    return model, params, e, best_train_epoch

                # plotting results from checkpoint evaluation
                if plot:
                    train_loss_y.append(avg_train_loss)
                    checkpoints_x.append(total_batches)
                    dev_accuracy_y.append(dev_bleu)
                    dev_loss_y.append(dev_loss)

                    y_vals = [('train_loss', train_loss_y),
                              ('dev loss', dev_loss_y),
                              ('dev_bleu', dev_accuracy_y)]
                    common.plot_to_file(y_vals,
                                        x_name='total batches',
                                        x_vals=checkpoints_x,
                                        file_path=results_file_path +
                                        '_learning_curve.png')

        # update progress bar after completing epoch
        train_progress_bar.update(e)

    # update progress bar after completing training
    train_progress_bar.finish()

    if plot:
        # clear plot when done
        plt.cla()

    print 'finished training. average loss: {} best epoch on dev: {} best epoch on train: {}'.format(
        str(avg_train_loss), best_dev_epoch, best_train_epoch)

    return model, params, e, best_train_epoch
        meta.w2i = {}
        for w in wvm.vocab:
            meta.w2i[w] = wvm.vocab[w].index

    if args.save_model:
        pickle.dump(meta, open('%s.meta' % args.save_model, 'wb'))
    if args.load_model:
        ontoparser = SubsumptionLearning(model=args.load_model)
    else:
        ontoparser = SubsumptionLearning(meta=meta)
        trainers = {
            'momsgd': dy.MomentumSGDTrainer(ontoparser.model, edecay=0.25),
            'adam': dy.AdamTrainer(ontoparser.model, edecay=0.25),
            'simsgd': dy.SimpleSGDTrainer(ontoparser.model, edecay=0.25),
            'adagrad': dy.AdagradTrainer(ontoparser.model, edecay=0.25),
            'adadelta': dy.AdadeltaTrainer(ontoparser.model, edecay=0.25)
        }
        trainer = trainers[args.trainer]
        nntraining(train_sents)

    if args.dev:
        accuracy = Test(inputGenDev)
        sys.stdout.write("Accuracy: {}%\n".format(accuracy))

    if args.isDaemon and args.daemonPort:
        sys.stderr.write('Leastening at port %d\n' % args.daemonPort)
        host = "0.0.0.0"  #Listen on all interfaces
        port = args.daemonPort  #Port number

        tcpsock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        tcpsock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
Exemple #23
0
    plt.legend(handles=[blue_patch])

    with open(os.path.join('..\processed', 'train_ix.pkl'), 'rb') as f:
        train_ix = pickle.load(f)

    if USE_UNLABELED:
        with open(os.path.join('..\processed', 'unlab_ix.pkl'), 'rb') as f:
            train_ix.extend(pickle.load(f))


    with open(os.path.join('..\processed', 'valid_ix.pkl'), 'rb') as f:
        valid_ix = pickle.load(f)

    # initialize dynet parameters and learning algorithm
    params = dy.ParameterCollection()
    trainer = dy.AdadeltaTrainer(params)
    lm = SimpleNLM(params, vocab_size=VOCAB_SIZE, hidden_dim=HIDDEN_DIM)

    train_batches = make_batches(train_ix, batch_size=BATCH_SIZE)
    valid_batches = make_batches(valid_ix, batch_size=BATCH_SIZE)

    n_train_words = sum(len(sent) for _, sent in train_ix)
    n_valid_words = sum(len(sent) for _, sent in valid_ix)

    for it in range(MAX_EPOCHS):
        tic = clock()

        # iterate over all training batches, accumulate loss.
        total_loss = 0
        for batch in train_batches:
            dy.renew_cg()
Exemple #24
0
	def __init__(self, args, vocabLengthSource, vocabLengthActionRule, vocabLengthNodes, vocabLengthTarget):

		self.flag_copy = True

		self.vocabLengthSource = vocabLengthSource
		self.vocabLengthActionRule = vocabLengthActionRule
		self.vocabLengthNodes = vocabLengthNodes
		self.vocabLengthTarget = vocabLengthTarget

		# parameters for the model
		self.numLayer = args.numLayer
		self.embeddingSourceSize = args.embeddingSourceSize
		self.embeddingApplySize = args.embeddingApplySize
		self.embeddingGenSize = args.embeddingGenSize
		self.embeddingNodeSize = args.embeddingNodeSize
		self.hiddenSize = args.hiddenSize
		self.attSize = args.attSize
		self.pointerSize = args.pointerSize
		self.dropout = args.dropout
		self.embeddingRuletypeSize = 2
		self.learningRate= args.learningRate


		self.model = dy.ParameterCollection()
		#self.trainer = dy.AdamTrainer(self.model, alpha=self.learningRate)
		self.trainer = dy.AdadeltaTrainer(self.model)

		# source lookup
		self.sourceLookup = self.model.add_lookup_parameters((self.vocabLengthSource, self.embeddingSourceSize))

		# action embeddging matrix
		self.actionRuleLookup = self.model.add_lookup_parameters((self.vocabLengthActionRule, self.embeddingApplySize))

		# for node type lookup
		self.nodeTypeLookup = self.model.add_lookup_parameters((self.vocabLengthNodes, self.embeddingNodeSize))

		# gor gen type lookup
		self.gentokenLookup = self.model.add_lookup_parameters((self.vocabLengthTarget, self.embeddingGenSize))


		# adding paramteters to the AST Neural Network
		self.attentionSource = self.model.add_parameters((self.attSize, self.hiddenSize * 2))
		self.attentionTarget = self.model.add_parameters((self.attSize, self.numLayer*self.hiddenSize))
		self.attentionParameter = self.model.add_parameters((1, self.attSize))

		self.w_selection_gen_softmax = self.model.add_parameters((2, self.hiddenSize))

		self.w_out_rule = self.model.add_parameters((self.embeddingApplySize, self.hiddenSize)) # should change whe hidden layers increase
		self.b_out_rule = self.model.add_parameters((self.embeddingApplySize))

		self.w_out_vocab = self.model.add_parameters((self.embeddingApplySize, self.hiddenSize + self.hiddenSize * 2)) # should change whe hidden layers increase
		self.b_out_vocab = self.model.add_parameters((self.embeddingApplySize))

		self.w_pointer_hidden = self.model.add_parameters((self.pointerSize, 2*self.hiddenSize + 2*self.hiddenSize + self.hiddenSize))
		self.b_pointer_hidden = self.model.add_parameters((self.pointerSize))
		self.w_pointer_out = self.model.add_parameters((1, self.pointerSize))
		self.b_pointer_out = self.model.add_parameters((1))
		# initializing the encoder and decoder
		self.forward_encoder = dy.VanillaLSTMBuilder(self.numLayer, self.embeddingSourceSize, self.hiddenSize, self.model)
		self.backward_encoder = dy.VanillaLSTMBuilder(self.numLayer, self.embeddingSourceSize, self.hiddenSize, self.model)

		# check this
		# embedding size + (previous action embedding + context vector + node type mebedding + parnnet feeding )
		# parent feeding - hidden states of parent action + embedding of parent action
		self.inputDecoderSize = self.embeddingApplySize + self.hiddenSize * 2 + self.hiddenSize + self.embeddingApplySize + self.embeddingNodeSize
		self.decoder = dy.VanillaLSTMBuilder(self.numLayer, self.inputDecoderSize, self.hiddenSize, self.model)
def train_model(model, char_lookup, feat_lookup, R, bias, encoder_frnn, encoder_rrnn, decoder_rnn, train_lemmas, train_feat_dicts, train_words, dev_lemmas,
                dev_feat_dicts, dev_words, alphabet_index, inverse_alphabet_index, epochs, optimization,
                results_file_path, train_aligned_pairs, dev_aligned_pairs, feat_index, feature_types,
                plot):
    print 'training...'

    np.random.seed(17)
    random.seed(17)

    if optimization == 'ADAM':
        trainer = pc.AdamTrainer(model, lam=REGULARIZATION, alpha=LEARNING_RATE, beta_1=0.9, beta_2=0.999, eps=1e-8)
    elif optimization == 'MOMENTUM':
        trainer = pc.MomentumSGDTrainer(model)
    elif optimization == 'SGD':
        trainer = pc.SimpleSGDTrainer(model)
    elif optimization == 'ADAGRAD':
        trainer = pc.AdagradTrainer(model)
    elif optimization == 'ADADELTA':
        trainer = pc.AdadeltaTrainer(model)
    else:
        trainer = pc.SimpleSGDTrainer(model)

    total_loss = 0
    best_avg_dev_loss = 999
    best_dev_accuracy = -1
    best_train_accuracy = -1
    patience = 0
    train_len = len(train_words)
    sanity_set_size = 100
    epochs_x = []
    train_loss_y = []
    dev_loss_y = []
    train_accuracy_y = []
    dev_accuracy_y = []
    e = -1

    # progress bar init
    widgets = [progressbar.Bar('>'), ' ', progressbar.ETA()]
    train_progress_bar = progressbar.ProgressBar(widgets=widgets, maxval=epochs).start()
    avg_loss = -1

    for e in xrange(epochs):

        # randomize the training set
        indices = range(train_len)
        random.shuffle(indices)
        train_set = zip(train_lemmas, train_feat_dicts, train_words, train_aligned_pairs)
        train_set = [train_set[i] for i in indices]

        # compute loss for each example and update
        for i, example in enumerate(train_set):
            lemma, feats, word, alignment = example
            loss = one_word_loss(model, char_lookup, feat_lookup, R, bias, encoder_frnn, encoder_rrnn, decoder_rnn, lemma, feats, word,
                                 alphabet_index, alignment, feat_index, feature_types)
            loss_value = loss.value()
            total_loss += loss_value
            loss.backward()
            trainer.update()
            if i > 0:
                avg_loss = total_loss / float(i + e * train_len)
            else:
                avg_loss = total_loss

        if EARLY_STOPPING:

            # get train accuracy
            print 'evaluating on train...'
            train_predictions = predict_sequences(model, char_lookup, feat_lookup, R, bias, encoder_frnn, encoder_rrnn, decoder_rnn, alphabet_index,
                                                  inverse_alphabet_index, train_lemmas[:sanity_set_size],
                                                  train_feat_dicts[:sanity_set_size],
                                                  feat_index,
                                                  feature_types)

            train_accuracy = evaluate_model(train_predictions, train_lemmas[:sanity_set_size],
                                            train_feat_dicts[:sanity_set_size],
                                            train_words[:sanity_set_size],
                                            feature_types, print_results=False)[1]

            if train_accuracy > best_train_accuracy:
                best_train_accuracy = train_accuracy

            dev_accuracy = 0
            avg_dev_loss = 0

            if len(dev_lemmas) > 0:

                # get dev accuracy
                dev_predictions = predict_sequences(model, char_lookup, feat_lookup, R, bias, encoder_frnn, encoder_rrnn, decoder_rnn, alphabet_index,
                                                    inverse_alphabet_index, dev_lemmas, dev_feat_dicts, feat_index,
                                                    feature_types)
                print 'evaluating on dev...'
                # get dev accuracy
                dev_accuracy = evaluate_model(dev_predictions, dev_lemmas, dev_feat_dicts, dev_words, feature_types,
                                              print_results=True)[1]

                if dev_accuracy > best_dev_accuracy:
                    best_dev_accuracy = dev_accuracy

                    # save best model to disk
                    save_pycnn_model(model, results_file_path)
                    print 'saved new best model'
                    patience = 0
                else:
                    patience += 1

                # found "perfect" model
                if dev_accuracy == 1:
                    train_progress_bar.finish()
                    if plot:
                        plt.cla()
                    return model, e

                # get dev loss
                total_dev_loss = 0
                for i in xrange(len(dev_lemmas)):
                    total_dev_loss += one_word_loss(model, char_lookup, feat_lookup, R, bias, encoder_frnn, encoder_rrnn, decoder_rnn, dev_lemmas[i],
                                                    dev_feat_dicts[i], dev_words[i], alphabet_index,
                                                    dev_aligned_pairs[i], feat_index, feature_types).value()

                avg_dev_loss = total_dev_loss / float(len(dev_lemmas))
                if avg_dev_loss < best_avg_dev_loss:
                    best_avg_dev_loss = avg_dev_loss

                print 'epoch: {0} train loss: {1:.4f} dev loss: {2:.4f} dev accuracy: {3:.4f} train accuracy = {4:.4f} \
 best dev accuracy {5:.4f} best train accuracy: {6:.4f} patience = {7}'.format(e, avg_loss, avg_dev_loss, dev_accuracy,
                                                                               train_accuracy, best_dev_accuracy,
                                                                               best_train_accuracy, patience)

                log_to_file(results_file_path + '_log.txt', e, avg_loss, train_accuracy, dev_accuracy)

                if patience == MAX_PATIENCE:
                    print 'out of patience after {0} epochs'.format(str(e))
                    # TODO: would like to return best model but pycnn has a bug with save and load. Maybe copy via code?
                    # return best_model[0]
                    train_progress_bar.finish()
                    if plot:
                        plt.cla()
                    return model, e
            else:

                # if no dev set is present, optimize on train set
                print 'no dev set for early stopping, running all epochs until perfectly fitting or patience was \
                reached on the train set'

                if train_accuracy > best_train_accuracy:
                    best_train_accuracy = train_accuracy

                    # save best model to disk
                    save_pycnn_model(model, results_file_path)
                    print 'saved new best model'
                    patience = 0
                else:
                    patience += 1

                print 'epoch: {0} train loss: {1:.4f} train accuracy = {2:.4f} best train accuracy: {3:.4f} \
                patience = {4}'.format(e, avg_loss, train_accuracy, best_train_accuracy, patience)

                # found "perfect" model on train set or patience has reached
                if train_accuracy == 1 or patience == MAX_PATIENCE:
                    train_progress_bar.finish()
                    if plot:
                        plt.cla()
                    return model, e

            # update lists for plotting
            train_accuracy_y.append(train_accuracy)
            epochs_x.append(e)
            train_loss_y.append(avg_loss)
            dev_loss_y.append(avg_dev_loss)
            dev_accuracy_y.append(dev_accuracy)

        # finished epoch
        train_progress_bar.update(e)
        if plot:
            with plt.style.context('fivethirtyeight'):
                p1, = plt.plot(epochs_x, dev_loss_y, label='dev loss')
                p2, = plt.plot(epochs_x, train_loss_y, label='train loss')
                p3, = plt.plot(epochs_x, dev_accuracy_y, label='dev acc.')
                p4, = plt.plot(epochs_x, train_accuracy_y, label='train acc.')
                plt.legend(loc='upper left', handles=[p1, p2, p3, p4])
            plt.savefig(results_file_path + '.png')
    train_progress_bar.finish()
    if plot:
        plt.cla()
    print 'finished training. average loss: ' + str(avg_loss)
    return model, e
Exemple #26
0
 def __init__(self, exp_global=Ref(Path("exp_global")), eps=1e-6, rho=0.95):
     self.optimizer = dy.AdadeltaTrainer(
         exp_global.dynet_param_collection.param_col, eps, rho)
Exemple #27
0
    def train(self, fdir):
        trainer = dy.AdadeltaTrainer(self.model)

        best_acc, repeat = 0.0, 0
        batch = 40
        for epoch in range(60):
            dy.renew_cg()
            losses = []
            closs = 0.0
            for i, traininst in enumerate(self.trainset['refex']):
                pre_context = self.trainset['pre_context'][i]
                pos_context = self.trainset['pos_context'][i]
                refex = self.trainset['refex'][i]
                entity = self.trainset['entity'][i]
                loss = self.get_loss(pre_context, pos_context, refex, entity)
                losses.append(loss)

                if len(losses) == batch:
                    loss = dy.esum(losses)
                    closs += loss.value()
                    loss.backward()
                    trainer.update()
                    dy.renew_cg()

                    print("Epoch: {0} \t Loss: {1}".format(epoch, (closs / batch)), end='       \r')
                    losses = []
                    closs = 0.0

            outputs, num, dem = self.validate()
            acc = round(float(num) / dem, 2)

            print("Dev acc: {0} \t Best acc: {1}".format(str(num/dem), best_acc))

            # Saving the model with best accuracy
            if best_acc == 0.0 or acc > best_acc:
                best_acc = acc

                fresults = os.path.join(fdir, 'results')
                if not os.path.exists(fresults):
                    os.mkdir(fresults)
                fname = 'dev_best_' + \
                        str(self.LSTM_NUM_OF_LAYERS) + '_' + \
                        str(self.EMBEDDINGS_SIZE) + '_' + \
                        str(self.STATE_SIZE) + '_' + \
                        str(self.ATTENTION_SIZE) + '_' + \
                        str(self.DROPOUT).split('.')[1] + '_' + \
                        str(self.character) + '_' + \
                        str(self.BEAM)
                self.write(os.path.join(fresults, fname), outputs)

                fmodels = os.path.join(fdir, 'models')
                if not os.path.exists(fmodels):
                    os.mkdir(fmodels)
                fname = 'best_' + \
                        str(self.LSTM_NUM_OF_LAYERS) + '_' + \
                        str(self.EMBEDDINGS_SIZE) + '_' + \
                        str(self.STATE_SIZE) + '_' + \
                        str(self.ATTENTION_SIZE) + '_' + \
                        str(self.DROPOUT).split('.')[1] + '_' + \
                        str(self.character) + '_' + \
                        str(self.BEAM)
                self.model.save(os.path.join(fmodels, fname))

                repeat = 0
            else:
                repeat += 1

            # In case the accuracy does not increase in 20 epochs, break the process
            if repeat == 20:
                break

        fmodels = os.path.join(fdir, 'models')
        fname = str(self.LSTM_NUM_OF_LAYERS) + '_' + \
                str(self.EMBEDDINGS_SIZE) + '_' + \
                str(self.STATE_SIZE) + '_' + \
                str(self.ATTENTION_SIZE) + '_' + \
                str(self.DROPOUT).split('.')[1] + '_' + \
                str(self.character) + '_' + \
                str(self.BEAM)
        self.model.save(os.path.join(fmodels, fname))
Exemple #28
0
    def train(self):
        trainer = dy.AdadeltaTrainer(self.model)

        epoch_timing = []
        early = 0.0
        best_acc = 0.0
        f = open('logging.txt', 'w')
        for epoch in range(self.EPOCH):
            print('\n')
            dy.renew_cg()
            losses = []
            closs = 0
            batch_timing = []
            for i, trainrow in enumerate(self.trainset):
                start = time.time()
                question = trainrow['question']
                answer = trainrow['answer']
                image = self.id2img[trainrow['face_id']]

                loss = self.get_loss(image, question, answer)
                losses.append(loss)
                end = time.time()
                t = (end - start)
                batch_timing.append(t)
                epoch_timing.append(t)

                if len(losses) == self.BATCH:
                    loss = dy.esum(losses)
                    _loss = loss.value()
                    closs += _loss
                    loss.backward()
                    trainer.update()
                    dy.renew_cg()

                    # percentage of trainset processed
                    percentage = str(
                        round((float(i + 1) / len(self.trainset)) * 100,
                              2)) + '%'
                    # time of epoch processing
                    time_epoch = sum(epoch_timing)
                    if time_epoch > 3600:
                        time_epoch = str(round(time_epoch / 3600, 2)) + ' h'
                    elif time_epoch > 60:
                        time_epoch = str(round(time_epoch / 60, 2)) + ' min'
                    else:
                        time_epoch = str(round(time_epoch, 2)) + ' sec'

                    print(
                        "Epoch: {0} \t\t Loss: {1} \t\t Epoch time: {2} \t\t Trainset: {3}"
                        .format(epoch + 1, round(_loss, 2), time_epoch,
                                percentage),
                        end='       \r')
                    losses = []
                    batch_timing = []

            print("\nEpoch: {0} \t\t Total Loss / Batch: {1}".format(
                epoch + 1, round(closs / self.BATCH, 2)))
            acc = self.validate()
            print("\nEpoch: {0} \t\t Dev acc: {1} \t\t Best acc: {2}".format(
                epoch + 1, round(acc, 2), round(best_acc, 2)))
            f.write("Epoch: {0} \t\t Dev acc: {1} \t\t Best acc: {2}\n".format(
                epoch + 1, round(acc, 2), round(best_acc, 2)))
            if acc > best_acc:
                best_acc = acc
                early = 0
            else:
                early += 1

            if early == 50:
                break
            epoch_timing = []
        f.close()