Esempio n. 1
0
def get_trainer(opt, s2s):
    if opt.trainer == 'sgd':
        trainer = dy.SimpleSGDTrainer(s2s.pc,
                                      e0=opt.learning_rate,
                                      edecay=opt.learning_rate_decay)
    elif opt.trainer == 'clr':
        trainer = dy.CyclicalSGDTrainer(s2s.pc,
                                        e0_min=opt.learning_rate / 10.0,
                                        e0_max=opt.learning_rate,
                                        edecay=opt.learning_rate_decay)
    elif opt.trainer == 'momentum':
        trainer = dy.MomentumSGDTrainer(s2s.pc,
                                        e0=opt.learning_rate,
                                        edecay=opt.learning_rate_decay)
    elif opt.trainer == 'rmsprop':
        trainer = dy.RMSPropTrainer(s2s.pc,
                                    e0=opt.learning_rate,
                                    edecay=opt.learning_rate_decay)
    elif opt.trainer == 'adam':
        trainer = dy.AdamTrainer(s2s.pc,
                                 opt.learning_rate,
                                 edecay=opt.learning_rate_decay)
    else:
        print('Trainer name invalid or not provided, using SGD',
              file=sys.stderr)
        trainer = dy.SimpleSGDTrainer(s2s.pc,
                                      e0=opt.learning_rate,
                                      edecay=opt.learning_rate_decay)

    trainer.set_clip_threshold(opt.gradient_clip)

    return trainer
Esempio n. 2
0
    def train(self,
              dataset,
              epoch=1,
              valid_dataset=None,
              test_dataset=None,
              resume=True):
        # 哈哈, 有大型 API 也有小型 API
        trainer = dy.CyclicalSGDTrainer(self.pc)

        if resume:
            resume_from = max(
                [int(x.split('_')[1]) for x in os.listdir('save/')])
            self.pc.populate("save/model_{}".format(resume_from))
            print("[Train] Resume from epoch {}".format(resume_from))
        else:
            resume_from = 0
        best_uas = 0
        best_las = 0
        records = []  # (epoch, uas, las)

        for e in range(resume_from, resume_from + epoch):
            # shuffle dataset
            random.shuffle(dataset)
            for sent_id, sent in enumerate(dataset, 1):
                dy.renew_cg()
                length = len(sent.form)
                if length == 1:
                    continue
                loss, _ = self.__call__(sent.form,
                                        sent.upos,
                                        sent.xpos,
                                        train=True,
                                        target_transitions=sent.transitions)
                (loss[0] + loss[1]).forward()
                # 分别训练的 trick
                if sent_id % 5 == 0:
                    loss[1].backward()
                else:
                    loss[0].backward()
                #(loss[0] + loss[1]).backward()
                trainer.update()
                if sent_id % 100 == 0:
                    print(
                        "[Train]\tepoch: {}\tsent_id: {}\tstructure_loss: {:.6f}\tdeprel_loss: {:.6f}"
                        .format(e, sent_id, loss[0].scalar_value() / length,
                                loss[1].scalar_value() / length))
            if valid_dataset:
                uas, las, n_sents, n_tokens = self.test(valid_dataset)
                print("[Valid]\tepoch: {}\tUAS: {:.6f}\tLAS: {:.6f}".format(
                    e, uas, las))
                if uas > best_uas or las > best_las:
                    self.pc.save("save/model_{}".format(e))
                    records.append((e, uas, las))
                    best_uas = max(best_uas, uas)
                    best_las = max(best_las, las)
        if test_dataset:
            best_uas_model = max(records, key=itemgetter(1))[0]
            self.pc.populate("save/model_{}".format(best_uas_model))
            uas, las, n_sents, n_tokens = self.test(test_dataset)
            print("[Test]\tUAS: {:.6f}\tLAS: {:.6f}".format(uas, las))
Esempio n. 3
0
    def add_parameters(self,
                       dropout,
                       lstm_size,
                       optimizer,
                       model_type,
                       gru=True):

        if model_type == "gru":
            self.encoder_rnn = dy.GRUBuilder(NUM_LAYERS, EMBEDDING_SIZE,
                                             lstm_size, self.model)
            self.encoder_rnn.set_dropout(dropout)
            self.encoder_rnn2 = dy.GRUBuilder(NUM_LAYERS, EMBEDDING_SIZE,
                                              lstm_size, self.model)
            self.encoder_rnn2.set_dropout(dropout)
            self.decoder_rnn = dy.GRUBuilder(NUM_LAYERS,
                                             EMBEDDING_SIZE + lstm_size,
                                             lstm_size, self.model)
            self.decoder_rnn.set_dropout(dropout)
        else:

            self.encoder_rnn = dy.LSTMBuilder(NUM_LAYERS, EMBEDDING_SIZE,
                                              lstm_size, self.model)
            self.encoder_rnn.set_dropout(dropout)
            self.encoder_rnn2 = dy.LSTMBuilder(NUM_LAYERS, EMBEDDING_SIZE,
                                               lstm_size, self.model)
            self.encoder_rnn2.set_dropout(dropout)
            self.decoder_rnn = dy.LSTMBuilder(NUM_LAYERS,
                                              EMBEDDING_SIZE + lstm_size,
                                              lstm_size, self.model)
            self.decoder_rnn.set_dropout(dropout)

        global DROPOUT
        DROPOUT = dropout

        self.W1 = self.model.add_parameters((200, lstm_size))
        self.b1 = self.model.add_parameters((200, 1))
        self.W2 = self.model.add_parameters((100, 200))
        self.b2 = self.model.add_parameters((100, 1))
        self.W3 = self.model.add_parameters((len(self.C2I), 100))
        self.b3 = self.model.add_parameters((len(self.C2I), 1))
        self.W_query = self.model.add_parameters((lstm_size, lstm_size))
        self.W_key = self.model.add_parameters((lstm_size, lstm_size))
        self.W_val = self.model.add_parameters((lstm_size, lstm_size))
        self.W_att = self.model.add_parameters((1, EMBEDDING_SIZE))
        self.W_c_s = self.model.add_parameters((lstm_size, EMBEDDING_SIZE))
        self.W_direct = self.model.add_parameters((len(self.C2I), lstm_size))
        self.b_att = self.model.add_parameters((lstm_size, 1))
        self.b_direct = self.model.add_parameters((len(self.C2I), 1))
        self.E_lang = self.model.add_lookup_parameters((7, EMBEDDING_SIZE))

        if optimizer == "sgd":
            self.trainer = dy.SimpleSGDTrainer(self.model)
        elif optimizer == "rms":
            self.trainer = dy.RMSPropTrainer(self.model)
        if optimizer == "cyclic":
            self.trainer = dy.CyclicalSGDTrainer(self.model)
        elif optimizer == "adam":
            self.trainer = dy.AdamTrainer(self.model)
        else:
            self.trainer = dy.AdagradTrainer(self.model)
Esempio n. 4
0
 def set_trainer(self, optimization):
     if optimization == 'MomentumSGD':
         self.trainer = dy.MomentumSGDTrainer(
             self.model, learning_rate=self.hp.learning_rate)
     if optimization == 'CyclicalSGD':
         self.trainer = dy.CyclicalSGDTrainer(
             self.model,
             learning_rate_max=self.hp.learning_rate_max,
             learning_rate_min=self.hp.learning_rate_min)
     if optimization == 'Adam':
         self.trainer = dy.AdamTrainer(self.model)
     if optimization == 'RMSProp':
         self.trainer = dy.RMSPropTrainer(self.model)
     else:  # 'SimpleSGD'
         self.trainer = dy.SimpleSGDTrainer(
             self.model, learning_rate=self.hp.learning_rate)
Esempio n. 5
0
def train(opt):
    # Load data =========================================================
    if opt.verbose:
        print('Reading corpora')
    # Read vocabs
    if opt.dic_src:
        widss, ids2ws = data.load_dic(opt.dic_src)
    else:
        widss, ids2ws = data.read_dic(opt.train_src, max_size=opt.src_vocab_size)
        data.save_dic(opt.exp_name + '_src_dic.txt', widss)

    if opt.dic_dst:
        widst, ids2wt = data.load_dic(opt.dic_dst)
    else:
        widst, ids2wt = data.read_dic(opt.train_dst, max_size=opt.trg_vocab_size)
        data.save_dic(opt.exp_name + '_trg_dic.txt', widst)

    # Read training
    trainings_data = data.read_corpus(opt.train_src, widss)
    trainingt_data = data.read_corpus(opt.train_dst, widst)
    # Read validation
    valids_data = data.read_corpus(opt.valid_src, widss)
    validt_data = data.read_corpus(opt.valid_dst, widst)

    # Create model ======================================================
    if opt.verbose:
        print('Creating model')
        sys.stdout.flush()
    s2s = seq2seq.Seq2SeqModel(opt.emb_dim,
                               opt.hidden_dim,
                               opt.att_dim,
                               widss,
                               widst,
                               model_file=opt.model,
                               bidir=opt.bidir,
                               word_emb=opt.word_emb,
                               dropout=opt.dropout_rate,
                               max_len=opt.max_len)

    if s2s.model_file is not None:
        s2s.load()
    s2s.model_file = opt.exp_name+'_model.txt'
    # Trainer ==========================================================
    if opt.trainer == 'sgd':
        trainer = dy.SimpleSGDTrainer(
            s2s.model, e0=opt.learning_rate, edecay=opt.learning_rate_decay)
    if opt.trainer == 'clr':
        trainer = dy.CyclicalSGDTrainer(s2s.model, e0_min=opt.learning_rate / 10,
                                        e0_max=opt.learning_rate, edecay=opt.learning_rate_decay)
    elif opt.trainer == 'momentum':
        trainer = dy.MomentumSGDTrainer(
            s2s.model, e0=opt.learning_rate, edecay=opt.learning_rate_decay)
    elif opt.trainer == 'rmsprop':
        trainer = dy.RMSPropTrainer(s2s.model, e0=opt.learning_rate,
                                    edecay=opt.learning_rate_decay)
    elif opt.trainer == 'adam':
        trainer = dy.AdamTrainer(s2s.model, opt.learning_rate, edecay=opt.learning_rate_decay)
    else:
        print('Trainer name invalid or not provided, using SGD', file=sys.stderr)
        trainer = dy.SimpleSGDTrainer(
            s2s.model, e0=opt.learning_rate, edecay=opt.learning_rate_decay)
    if opt.verbose:
        print('Using '+opt.trainer+' optimizer')
    trainer.set_clip_threshold(opt.gradient_clip)
    # Print configuration ===============================================
    if opt.verbose:
        options.print_config(opt, src_dict_size=len(widss), trg_dict_size=len(widst))
        sys.stdout.flush()
    # Creat batch loaders ===============================================
    if opt.verbose:
        print('Creating batch loaders')
        sys.stdout.flush()
    trainbatchloader = data.BatchLoader(trainings_data, trainingt_data, opt.batch_size)
    devbatchloader = data.BatchLoader(valids_data, validt_data, opt.dev_batch_size)
    # Start training ====================================================
    if opt.verbose:
        print('starting training')
        sys.stdout.flush()
    start = time.time()
    train_loss = 0
    processed = 0
    best_bleu = 0
    i = 0
    for epoch in range(opt.num_epochs):
        for x, y in trainbatchloader:
            processed += sum(map(len, y))
            bsize = len(y)
            # Compute loss
            loss = s2s.calculate_loss(x, y)
            # Backward pass and parameter update
            loss.backward()
            trainer.update()
            train_loss += loss.scalar_value() * bsize
            if (i+1) % opt.check_train_error_every == 0:
                # Check average training error from time to time
                logloss = train_loss / processed
                ppl = np.exp(logloss)
                elapsed = time.time()-start
                trainer.status()
                print(" Training_loss=%f, ppl=%f, time=%f s, tokens processed=%d" %
                      (logloss, ppl, elapsed, processed))
                start = time.time()
                train_loss = 0
                processed = 0
                sys.stdout.flush()
            if (i+1) % opt.check_valid_error_every == 0:
                # Check generalization error on the validation set from time to time
                dev_loss = 0
                dev_processed = 0
                dev_start = time.time()
                for x, y in devbatchloader:
                    dev_processed += sum(map(len, y))
                    bsize = len(y)
                    loss = s2s.calculate_loss(x, y, test=True)
                    dev_loss += loss.scalar_value() * bsize
                dev_logloss = dev_loss/dev_processed
                dev_ppl = np.exp(dev_logloss)
                dev_elapsed = time.time()-dev_start
                print("[epoch %d] Dev loss=%f, ppl=%f, time=%f s, tokens processed=%d" %
                      (epoch, dev_logloss, dev_ppl, dev_elapsed, dev_processed))
                sys.stdout.flush()
                start = time.time()

            if (i+1) % opt.valid_bleu_every == 0:
                # Check BLEU score on the validation set from time to time
                print('Start translating validation set, buckle up!')
                sys.stdout.flush()
                bleu_start = time.time()
                with open(opt.valid_out, 'w+') as f:
                    for x in valids_data:
                        y_hat = s2s.translate(x, beam_size=opt.beam_size)
                        translation = [ids2wt[w] for w in y_hat[1:-1]]
                        print(' '.join(translation), file=f)
                bleu, details = evaluation.bleu_score(opt.valid_dst, opt.valid_out)
                bleu_elapsed = time.time()-bleu_start
                print('Finished translating validation set', bleu_elapsed, 'elapsed.')
                print(details)
                # Early stopping : save the latest best model
                if bleu > best_bleu:
                    best_bleu = bleu
                    print('Best BLEU score up to date, saving model to', s2s.model_file)
                    s2s.save()
                sys.stdout.flush()
                start = time.time()
            i = i+1
        trainer.update_epoch()
Esempio n. 6
0
def get_challenge_uar(epoch):
    cmd = 'perl format_pred.pl /home3/srallaba/data/ComParE2018_SelfAssessedAffect/arff/ComParE2018_SelfAssessedAffect.ComParE.devel.arff  submission_' + str(
        epoch) + '.txt submission.arff 6375'
    print cmd
    os.system(cmd)

    cmd = 'perl score.pl /home3/srallaba/data/ComParE2018_SelfAssessedAffect/arff/ComParE2018_SelfAssessedAffect.ComParE.devel.arff submission.arff 6375'
    print cmd
    os.system(cmd)


# Instantiate DNN and define the loss
m = dy.Model()
dnn = FeedForwardNeuralNet(
    m, [input_dim, [hidden, hidden], num_classes, [dy.selu, dy.selu, dy.selu]])
trainer = dy.CyclicalSGDTrainer(m)
update_params = 2
num_epochs = 10
num_embed = 3
lookup = m.add_lookup_parameters((num_classes, num_embed))

train_data = zip(x_train, y_train)
dev_data = zip(x_dev, y_dev)
num_train = len(train_data)
startTime = time.time()

# Loop over the training instances and call the mlp
for epoch in range(num_epochs):
    start_time = time.time()
    print " Epoch ", epoch
    train_loss = 0