def get_trainer(opt, s2s): if opt.trainer == 'sgd': trainer = dy.SimpleSGDTrainer(s2s.pc, e0=opt.learning_rate, edecay=opt.learning_rate_decay) elif opt.trainer == 'clr': trainer = dy.CyclicalSGDTrainer(s2s.pc, e0_min=opt.learning_rate / 10.0, e0_max=opt.learning_rate, edecay=opt.learning_rate_decay) elif opt.trainer == 'momentum': trainer = dy.MomentumSGDTrainer(s2s.pc, e0=opt.learning_rate, edecay=opt.learning_rate_decay) elif opt.trainer == 'rmsprop': trainer = dy.RMSPropTrainer(s2s.pc, e0=opt.learning_rate, edecay=opt.learning_rate_decay) elif opt.trainer == 'adam': trainer = dy.AdamTrainer(s2s.pc, opt.learning_rate, edecay=opt.learning_rate_decay) else: print('Trainer name invalid or not provided, using SGD', file=sys.stderr) trainer = dy.SimpleSGDTrainer(s2s.pc, e0=opt.learning_rate, edecay=opt.learning_rate_decay) trainer.set_clip_threshold(opt.gradient_clip) return trainer
def train(self, dataset, epoch=1, valid_dataset=None, test_dataset=None, resume=True): # 哈哈, 有大型 API 也有小型 API trainer = dy.CyclicalSGDTrainer(self.pc) if resume: resume_from = max( [int(x.split('_')[1]) for x in os.listdir('save/')]) self.pc.populate("save/model_{}".format(resume_from)) print("[Train] Resume from epoch {}".format(resume_from)) else: resume_from = 0 best_uas = 0 best_las = 0 records = [] # (epoch, uas, las) for e in range(resume_from, resume_from + epoch): # shuffle dataset random.shuffle(dataset) for sent_id, sent in enumerate(dataset, 1): dy.renew_cg() length = len(sent.form) if length == 1: continue loss, _ = self.__call__(sent.form, sent.upos, sent.xpos, train=True, target_transitions=sent.transitions) (loss[0] + loss[1]).forward() # 分别训练的 trick if sent_id % 5 == 0: loss[1].backward() else: loss[0].backward() #(loss[0] + loss[1]).backward() trainer.update() if sent_id % 100 == 0: print( "[Train]\tepoch: {}\tsent_id: {}\tstructure_loss: {:.6f}\tdeprel_loss: {:.6f}" .format(e, sent_id, loss[0].scalar_value() / length, loss[1].scalar_value() / length)) if valid_dataset: uas, las, n_sents, n_tokens = self.test(valid_dataset) print("[Valid]\tepoch: {}\tUAS: {:.6f}\tLAS: {:.6f}".format( e, uas, las)) if uas > best_uas or las > best_las: self.pc.save("save/model_{}".format(e)) records.append((e, uas, las)) best_uas = max(best_uas, uas) best_las = max(best_las, las) if test_dataset: best_uas_model = max(records, key=itemgetter(1))[0] self.pc.populate("save/model_{}".format(best_uas_model)) uas, las, n_sents, n_tokens = self.test(test_dataset) print("[Test]\tUAS: {:.6f}\tLAS: {:.6f}".format(uas, las))
def add_parameters(self, dropout, lstm_size, optimizer, model_type, gru=True): if model_type == "gru": self.encoder_rnn = dy.GRUBuilder(NUM_LAYERS, EMBEDDING_SIZE, lstm_size, self.model) self.encoder_rnn.set_dropout(dropout) self.encoder_rnn2 = dy.GRUBuilder(NUM_LAYERS, EMBEDDING_SIZE, lstm_size, self.model) self.encoder_rnn2.set_dropout(dropout) self.decoder_rnn = dy.GRUBuilder(NUM_LAYERS, EMBEDDING_SIZE + lstm_size, lstm_size, self.model) self.decoder_rnn.set_dropout(dropout) else: self.encoder_rnn = dy.LSTMBuilder(NUM_LAYERS, EMBEDDING_SIZE, lstm_size, self.model) self.encoder_rnn.set_dropout(dropout) self.encoder_rnn2 = dy.LSTMBuilder(NUM_LAYERS, EMBEDDING_SIZE, lstm_size, self.model) self.encoder_rnn2.set_dropout(dropout) self.decoder_rnn = dy.LSTMBuilder(NUM_LAYERS, EMBEDDING_SIZE + lstm_size, lstm_size, self.model) self.decoder_rnn.set_dropout(dropout) global DROPOUT DROPOUT = dropout self.W1 = self.model.add_parameters((200, lstm_size)) self.b1 = self.model.add_parameters((200, 1)) self.W2 = self.model.add_parameters((100, 200)) self.b2 = self.model.add_parameters((100, 1)) self.W3 = self.model.add_parameters((len(self.C2I), 100)) self.b3 = self.model.add_parameters((len(self.C2I), 1)) self.W_query = self.model.add_parameters((lstm_size, lstm_size)) self.W_key = self.model.add_parameters((lstm_size, lstm_size)) self.W_val = self.model.add_parameters((lstm_size, lstm_size)) self.W_att = self.model.add_parameters((1, EMBEDDING_SIZE)) self.W_c_s = self.model.add_parameters((lstm_size, EMBEDDING_SIZE)) self.W_direct = self.model.add_parameters((len(self.C2I), lstm_size)) self.b_att = self.model.add_parameters((lstm_size, 1)) self.b_direct = self.model.add_parameters((len(self.C2I), 1)) self.E_lang = self.model.add_lookup_parameters((7, EMBEDDING_SIZE)) if optimizer == "sgd": self.trainer = dy.SimpleSGDTrainer(self.model) elif optimizer == "rms": self.trainer = dy.RMSPropTrainer(self.model) if optimizer == "cyclic": self.trainer = dy.CyclicalSGDTrainer(self.model) elif optimizer == "adam": self.trainer = dy.AdamTrainer(self.model) else: self.trainer = dy.AdagradTrainer(self.model)
def set_trainer(self, optimization): if optimization == 'MomentumSGD': self.trainer = dy.MomentumSGDTrainer( self.model, learning_rate=self.hp.learning_rate) if optimization == 'CyclicalSGD': self.trainer = dy.CyclicalSGDTrainer( self.model, learning_rate_max=self.hp.learning_rate_max, learning_rate_min=self.hp.learning_rate_min) if optimization == 'Adam': self.trainer = dy.AdamTrainer(self.model) if optimization == 'RMSProp': self.trainer = dy.RMSPropTrainer(self.model) else: # 'SimpleSGD' self.trainer = dy.SimpleSGDTrainer( self.model, learning_rate=self.hp.learning_rate)
def train(opt): # Load data ========================================================= if opt.verbose: print('Reading corpora') # Read vocabs if opt.dic_src: widss, ids2ws = data.load_dic(opt.dic_src) else: widss, ids2ws = data.read_dic(opt.train_src, max_size=opt.src_vocab_size) data.save_dic(opt.exp_name + '_src_dic.txt', widss) if opt.dic_dst: widst, ids2wt = data.load_dic(opt.dic_dst) else: widst, ids2wt = data.read_dic(opt.train_dst, max_size=opt.trg_vocab_size) data.save_dic(opt.exp_name + '_trg_dic.txt', widst) # Read training trainings_data = data.read_corpus(opt.train_src, widss) trainingt_data = data.read_corpus(opt.train_dst, widst) # Read validation valids_data = data.read_corpus(opt.valid_src, widss) validt_data = data.read_corpus(opt.valid_dst, widst) # Create model ====================================================== if opt.verbose: print('Creating model') sys.stdout.flush() s2s = seq2seq.Seq2SeqModel(opt.emb_dim, opt.hidden_dim, opt.att_dim, widss, widst, model_file=opt.model, bidir=opt.bidir, word_emb=opt.word_emb, dropout=opt.dropout_rate, max_len=opt.max_len) if s2s.model_file is not None: s2s.load() s2s.model_file = opt.exp_name+'_model.txt' # Trainer ========================================================== if opt.trainer == 'sgd': trainer = dy.SimpleSGDTrainer( s2s.model, e0=opt.learning_rate, edecay=opt.learning_rate_decay) if opt.trainer == 'clr': trainer = dy.CyclicalSGDTrainer(s2s.model, e0_min=opt.learning_rate / 10, e0_max=opt.learning_rate, edecay=opt.learning_rate_decay) elif opt.trainer == 'momentum': trainer = dy.MomentumSGDTrainer( s2s.model, e0=opt.learning_rate, edecay=opt.learning_rate_decay) elif opt.trainer == 'rmsprop': trainer = dy.RMSPropTrainer(s2s.model, e0=opt.learning_rate, edecay=opt.learning_rate_decay) elif opt.trainer == 'adam': trainer = dy.AdamTrainer(s2s.model, opt.learning_rate, edecay=opt.learning_rate_decay) else: print('Trainer name invalid or not provided, using SGD', file=sys.stderr) trainer = dy.SimpleSGDTrainer( s2s.model, e0=opt.learning_rate, edecay=opt.learning_rate_decay) if opt.verbose: print('Using '+opt.trainer+' optimizer') trainer.set_clip_threshold(opt.gradient_clip) # Print configuration =============================================== if opt.verbose: options.print_config(opt, src_dict_size=len(widss), trg_dict_size=len(widst)) sys.stdout.flush() # Creat batch loaders =============================================== if opt.verbose: print('Creating batch loaders') sys.stdout.flush() trainbatchloader = data.BatchLoader(trainings_data, trainingt_data, opt.batch_size) devbatchloader = data.BatchLoader(valids_data, validt_data, opt.dev_batch_size) # Start training ==================================================== if opt.verbose: print('starting training') sys.stdout.flush() start = time.time() train_loss = 0 processed = 0 best_bleu = 0 i = 0 for epoch in range(opt.num_epochs): for x, y in trainbatchloader: processed += sum(map(len, y)) bsize = len(y) # Compute loss loss = s2s.calculate_loss(x, y) # Backward pass and parameter update loss.backward() trainer.update() train_loss += loss.scalar_value() * bsize if (i+1) % opt.check_train_error_every == 0: # Check average training error from time to time logloss = train_loss / processed ppl = np.exp(logloss) elapsed = time.time()-start trainer.status() print(" Training_loss=%f, ppl=%f, time=%f s, tokens processed=%d" % (logloss, ppl, elapsed, processed)) start = time.time() train_loss = 0 processed = 0 sys.stdout.flush() if (i+1) % opt.check_valid_error_every == 0: # Check generalization error on the validation set from time to time dev_loss = 0 dev_processed = 0 dev_start = time.time() for x, y in devbatchloader: dev_processed += sum(map(len, y)) bsize = len(y) loss = s2s.calculate_loss(x, y, test=True) dev_loss += loss.scalar_value() * bsize dev_logloss = dev_loss/dev_processed dev_ppl = np.exp(dev_logloss) dev_elapsed = time.time()-dev_start print("[epoch %d] Dev loss=%f, ppl=%f, time=%f s, tokens processed=%d" % (epoch, dev_logloss, dev_ppl, dev_elapsed, dev_processed)) sys.stdout.flush() start = time.time() if (i+1) % opt.valid_bleu_every == 0: # Check BLEU score on the validation set from time to time print('Start translating validation set, buckle up!') sys.stdout.flush() bleu_start = time.time() with open(opt.valid_out, 'w+') as f: for x in valids_data: y_hat = s2s.translate(x, beam_size=opt.beam_size) translation = [ids2wt[w] for w in y_hat[1:-1]] print(' '.join(translation), file=f) bleu, details = evaluation.bleu_score(opt.valid_dst, opt.valid_out) bleu_elapsed = time.time()-bleu_start print('Finished translating validation set', bleu_elapsed, 'elapsed.') print(details) # Early stopping : save the latest best model if bleu > best_bleu: best_bleu = bleu print('Best BLEU score up to date, saving model to', s2s.model_file) s2s.save() sys.stdout.flush() start = time.time() i = i+1 trainer.update_epoch()
def get_challenge_uar(epoch): cmd = 'perl format_pred.pl /home3/srallaba/data/ComParE2018_SelfAssessedAffect/arff/ComParE2018_SelfAssessedAffect.ComParE.devel.arff submission_' + str( epoch) + '.txt submission.arff 6375' print cmd os.system(cmd) cmd = 'perl score.pl /home3/srallaba/data/ComParE2018_SelfAssessedAffect/arff/ComParE2018_SelfAssessedAffect.ComParE.devel.arff submission.arff 6375' print cmd os.system(cmd) # Instantiate DNN and define the loss m = dy.Model() dnn = FeedForwardNeuralNet( m, [input_dim, [hidden, hidden], num_classes, [dy.selu, dy.selu, dy.selu]]) trainer = dy.CyclicalSGDTrainer(m) update_params = 2 num_epochs = 10 num_embed = 3 lookup = m.add_lookup_parameters((num_classes, num_embed)) train_data = zip(x_train, y_train) dev_data = zip(x_dev, y_dev) num_train = len(train_data) startTime = time.time() # Loop over the training instances and call the mlp for epoch in range(num_epochs): start_time = time.time() print " Epoch ", epoch train_loss = 0