def dynet_trainer_for_args(self, args): if args.trainer.lower() == "sgd": trainer = dy.SimpleSGDTrainer( model_globals.dynet_param_collection.param_col, learning_rate=args.learning_rate) elif args.trainer.lower() == "adam": trainer = dy.AdamTrainer( model_globals.dynet_param_collection.param_col, alpha=args.learning_rate) else: raise RuntimeError("Unknown trainer {}".format(args.trainer)) return trainer
def build_optimizer(self): assert self.model is not None, 'build model first' print(f'Building {self.optimizer_type} optimizer...') if self.optimizer_type == 'sgd': self.optimizer = dy.SimpleSGDTrainer(self.model, learning_rate=self.lr) elif self.optimizer_type == 'adam': self.optimizer = dy.AdamTrainer(self.model, alpha=self.lr) self.optimizer.set_clip_threshold(self.max_grad_norm) self.model.set_weight_decay(self.weight_decay)
def __init__(self, params, vocab, label2tag, pretrained_embeddings=None): """ :param params: :param vocab: :param label2tag: :param pretrained_embeddings: """ self.dim_w = params.dim_w self.win = params.win self.vocab = vocab self.n_words = len(self.vocab) self.dim_asp = params.dim_asp self.dim_y_asp = params.n_asp_tags self.n_steps = params.n_steps self.asp_label2tag = label2tag self.dropout_asp = params.dropout_asp self.dropout = params.dropout self.ds_name = params.ds_name self.model_name = params.model_name self.attention_type = params.attention_type self.pc = dy.ParameterCollection() self.Emb = WDEmb(pc=self.pc, n_words=self.n_words, dim_w=self.dim_w, pretrained_embeddings=pretrained_embeddings) self.DEP_RecNN = DTreeBuilder(pc=self.pc, n_in=self.win * self.dim_w, n_out=self.dim_asp, dropout_rate=self.dropout_asp) self.ASP_RNN = dy.LSTMBuilder(1, self.win * self.dim_w, self.dim_asp, self.pc) self.BiAttention_F=BiAttention(pc=self.pc, n_in=self.dim_asp, n_out=self.dim_asp, dropout_rate=self.dropout_asp) self.BiAttention_B=BiAttention(pc=self.pc, n_in=self.dim_asp, n_out=self.dim_asp, dropout_rate=self.dropout_asp) self.BiAttention_T=BiAttention(pc=self.pc, n_in=self.dim_asp, n_out=self.dim_asp, dropout_rate=self.dropout_asp) self.MultiWeightLayer=MultiWeightLayer(pc=self.pc, n_in=self.dim_asp, n_out=self.dim_asp, dropout_rate=self.dropout_asp) self.ASP_FC = Linear(pc=self.pc, n_in=self.dim_asp, n_out=self.dim_y_asp) self.layers = [self.ASP_FC,self.DEP_RecNN,self.BiAttention_F,self.BiAttention_B,self.BiAttention_T,self.MultiWeightLayer] if params.optimizer == 'sgd': self.optimizer = dy.SimpleSGDTrainer(self.pc, params.sgd_lr) elif params.optimizer == 'momentum': self.optimizer = dy.MomentumSGDTrainer(self.pc, 0.01, 0.9) elif params.optimizer == 'adam': self.optimizer = dy.AdamTrainer(self.pc, 0.001, 0.9, 0.9) elif params.optimizer == 'adagrad': self.optimizer = dy.AdagradTrainer(self.pc) elif params.optimizer == 'adadelta': self.optimizer = dy.AdadeltaTrainer(self.pc) else: raise Exception("Invalid optimizer!!")
def setUp(self): # Create model self.m = dy.ParameterCollection() # Parameters self.p1 = self.m.add_parameters((10, 10), init=dy.ConstInitializer(1)) self.p2 = self.m.add_parameters((10, 10), init=dy.ConstInitializer(1)) self.lp1 = self.m.add_lookup_parameters((10, 10), init=dy.ConstInitializer(1)) self.lp2 = self.m.add_lookup_parameters((10, 10), init=dy.ConstInitializer(1)) # Trainer self.trainer = dy.SimpleSGDTrainer(self.m, learning_rate=0.1) self.trainer.set_clip_threshold(-1)
def __init__(self, embedding, output_size, hidden_size, seed=1, crf=False, dropout_rate=0.5, optimizer="sgd", learning_rate=0.1): self.tmp_patience_filename = str(uuid4()) + ".model" self.set_seed(seed) self.model = dy.ParameterCollection() if optimizer == "sgd": self.trainer = dy.SimpleSGDTrainer(self.model, learning_rate=learning_rate) elif optimizer == "adam": self.trainer = dy.AdamTrainer(self.model, alpha=learning_rate) else: raise ValueError("Unknown optimizer") # CRF if crf: self.num_tags = output_size + 2 # Add 2 to account for start and end tags in CRF self.trans_mat = self.model.add_parameters( (self.num_tags, self.num_tags)) self._loss = self._calculate_crf_loss self._predict = self._crf_predict_sentence else: self.num_tags = output_size self._loss = self._calculate_loss self._predict = self._predict_sentence # Embedding self.lookup = self.model.lookup_parameters_from_numpy( embedding.vectors) (embed_size, _), _ = self.lookup.dim() # Bi-LSTM self.bilstm = dy.BiRNNBuilder(num_layers=2, input_dim=embed_size, hidden_dim=hidden_size * 2, model=self.model, rnn_builder_factory=dy.LSTMBuilder) # Dense layer self.w = self.model.add_parameters((self.num_tags, hidden_size * 2)) self.b = self.model.add_parameters(self.num_tags) self.dropout_rate = dropout_rate
def __init__(self, words_size, chars_size, w_embed_size, c_embed_size, lstm_hidden_size, clstm_hidden_size, lstm_num_layers, out_hidden_size, out_size): self.words_size = words_size self.chars_size = chars_size self.out_size = out_size self.w_embed_size = w_embed_size self.c_embed_size = c_embed_size self.clstm_hidden_size = clstm_hidden_size self.lstm_hidden_size = lstm_hidden_size * 2 # must be even self.lstm_num_layers = lstm_num_layers self.out_hidden_size = out_hidden_size self.model = dy.ParameterCollection() self.trainer = dy.SimpleSGDTrainer(self.model, learning_rate=0.1) # words and tags, entities embeddings self.wlookup = self.model.add_lookup_parameters((words_size, self.w_embed_size)) self.clookup = self.model.add_lookup_parameters((chars_size, self.c_embed_size)) # feature extractor self.lstm = dy.LSTMBuilder( self.lstm_num_layers, self.w_embed_size +self.clstm_hidden_size, self.lstm_hidden_size, self.model, ) # char encoder self.clstm = dy.LSTMBuilder( self.lstm_num_layers, self.c_embed_size, self.clstm_hidden_size, self.model, ) self.char_to_lstm = self.model.add_parameters((self.clstm_hidden_size, self.c_embed_size)) self.char_to_lstm_bias = self.model.add_parameters((self.clstm_hidden_size)) # transform word+pos vector into a vector similar to the lstm output # used to generate padding vectors self.word_to_lstm = self.model.add_parameters((self.lstm_hidden_size, self.w_embed_size + self.clstm_hidden_size)) self.word_to_lstm_bias = self.model.add_parameters((self.lstm_hidden_size)) self.output_hidden = self.model.add_parameters((self.out_hidden_size, self.lstm_hidden_size)) self.output_hidden_bias = self.model.add_parameters((self.out_hidden_size)) self.output = self.model.add_parameters((self.out_size, self.out_hidden_size)) self.output_bias = self.model.add_parameters((self.out_size))
def fit(self, train_X, train_Y, num_iterations, train_algo, seed=None): """ train the tagger """ print("read training data", file=sys.stderr) if seed: print(">>> using seed: ", seed, file=sys.stderr) random.seed(seed) #setting random seed # init lookup parameters and define graph print("build graph", file=sys.stderr) num_words = len(self.w2i) num_chars = len(self.c2i) self.predictors, self.char_rnn, self.wembeds, self.cembeds = self.build_computation_graph( num_words, num_chars) if train_algo == "sgd": trainer = dynet.SimpleSGDTrainer(self.model) elif train_algo == "adam": trainer = dynet.AdamTrainer(self.model) assert (len(train_X) == len(train_Y)) train_data = list(zip(train_X, train_Y)) for cur_iter in range(num_iterations): total_loss = 0.0 total_tagged = 0.0 random.shuffle(train_data) for ((word_indices, char_indices), y) in train_data: # use same predict function for training and testing output = self.predict(word_indices, char_indices, train=True) loss1 = dynet.esum([ self.pick_neg_log(pred, gold) for pred, gold in zip(output, y) ]) lv = loss1.value() total_loss += lv total_tagged += len(word_indices) loss1.backward() trainer.update() print("iter {2} {0:>12}: {1:.2f}".format("total loss", total_loss / total_tagged, cur_iter), file=sys.stderr)
def train(self, examples, clusters): # num_examples = len(examples) num_examples = 10 trainer = dy.SimpleSGDTrainer(self.params) # Conditional Language Model for epoch in range(self.num_epochs): batch_loss = [] loss_sum = 0 for idx in range(num_examples): z_list = clusters[idx] onehot_zlist = [] for z in z_list: onehot_z = np.zeros(self.num_clusters) onehot_z[z] = 1 onehot_z = dy.inputVector(onehot_z) onehot_zlist.append(onehot_z) loss = self.lm_train_example(examples[idx], onehot_zlist) batch_loss.append(loss) # Minibatching: if (idx % self.minibatch == 0) or (idx + 1 == num_examples): batch_loss = dy.esum(batch_loss) loss_sum += batch_loss.value() batch_loss.backward() batch_loss = [] trainer.update() dy.renew_cg() print("(Language Model) Epoch: {} | Loss: {}".format( epoch + 1, loss_sum)) # Latent Variable Prediction for epoch in range(self.num_epochs): batch_loss = [] loss_sum = 0 for idx in range(num_examples): z_list = clusters[idx] loss = self.latent_variable_prediction(examples[idx]) batch_loss.append(loss) # Minibatching: if (idx % self.minibatch == 0) or (idx + 1 == num_examples): batch_loss = dy.esum(batch_loss) loss_sum += batch_loss.value() batch_loss.backward() batch_loss = [] trainer.update() dy.renew_cg() print("(Latent Variable Prediction) Epoch: {} | Loss: {}".format( epoch + 1, loss_sum))
def get_trainer(opt, s2s): if opt.trainer == 'sgd': trainer = dy.SimpleSGDTrainer(s2s.pc, learning_rate=opt.learning_rate) elif opt.trainer == 'clr': trainer = dy.CyclicalSGDTrainer(s2s.pc, learning_rate_min=opt.learning_rate / 10.0, learning_rate_max=opt.learning_rate) elif opt.trainer == 'momentum': trainer = dy.MomentumSGDTrainer(s2s.pc, learning_rate=opt.learning_rate) elif opt.trainer == 'rmsprop': trainer = dy.RMSPropTrainer(s2s.pc, learning_rate=opt.learning_rate) elif opt.trainer == 'adam': trainer = dy.AdamTrainer(s2s.pc, opt.learning_rate) else: print('Trainer name invalid or not provided, using SGD', file=sys.stderr) trainer = dy.SimpleSGDTrainer(s2s.pc, learning_rate=opt.learning_rate) trainer.set_clip_threshold(opt.gradient_clip) return trainer
def main(): training_log = open('training-'+str(datetime.now())+'.log','w') model = dy.Model() trainer = dy.SimpleSGDTrainer(model) training_src = read_file(sys.argv[1]) word_freq_src = build_dict(training_src) training_tgt = read_file(sys.argv[2]) word_freq_tgt = build_dict(training_tgt) training_src = sentence_clean(training_src,word_freq_src) training_tgt = sentence_clean(training_tgt,word_freq_tgt) dev_src = sentence_clean(read_file(sys.argv[3]),word_freq_src) dev_tgt = sentence_clean(read_file(sys.argv[4]),word_freq_tgt) test_src = sentence_clean(read_file(sys.argv[5]),word_freq_src) attention = Attention(model, list(training_src), list(training_tgt)) train_data = zip(training_src, training_tgt) train_data.sort(key=lambda x: -len(x[0])) train_src = [sent[0] for sent in train_data] train_tgt = [sent[1] for sent in train_data] start = time.time() for epoch in range(150): epoch_loss = 0 train_zip = zip(train_src, train_tgt) i = 0 while i < len(train_zip): esum,num_words = attention.step_batch(train_zip[i:i+attention.BATCH_SIZE]) i += attention.BATCH_SIZE epoch_loss += esum.scalar_value() esum.backward() trainer.update() # if epoch_loss < 10: # end = time.time() # print 'TIME ELAPSED:', end - start, 'SECONDS' # break print 'Epoch:',epoch training_log.write("Epoch %d: loss=%f \n" % (epoch, epoch_loss)) training_log.flush() trainer.update_epoch(1.0) #training_log.write(attention.translate_sentence(training_src[0])+'\n') if epoch % 5 == 0: #attention.save(epoch) ori_sentence = attention.translate_sentence_ori(training_src[0]) training_log.write('ori:'+ori_sentence+'\n') #print '----ori finished----' training_log.write('new:'+attention.translate_sentence_beam(training_src[0])+'\n')
def __init__(self): print("hello from Talha Yılmaz :)") self.word_number = 15 self.epoch_num = 4 self.embedding_layer_size = 32 self.hidden_layer_size = 32 self.min_count = 2 self.model = dy.Model() self.trainer = dy.SimpleSGDTrainer(self.model) self.pW_hidden = self.model.add_parameters( (self.hidden_layer_size, self.embedding_layer_size)) self.pB_hidden = self.model.add_parameters(self.hidden_layer_size)
def train(network, train_set, val_set, epochs): global TEXTE TEXTE += "<ul>" MAX_STRING_LEN = 50 def get_val_set_loss(network, val_set): loss = [ network.get_loss(input_string, output_string).value() for input_string, output_string in val_set ] return sum(loss) trainer = dy.SimpleSGDTrainer(network.model) losses = list() iterations = list() occurences = 0 for i in range(epochs): print "Epoch ", i for training_example in train_set: occurences += 1 input_string, output_string = training_example loss = network.get_loss(input_string, output_string) # performing a forward through the network. loss_value = loss.value() # an optimization step loss.backward() trainer.update() if occurences % ((len(train_set) * epochs) / 100) == 0: val_loss = get_val_set_loss(network, val_set) losses.append(val_loss) iterations.append(occurences / (((len(train_set) * epochs) / 100))) plot_name = 'plots/' + str(network).split()[0].split('.')[1] + '.png' plt.ioff() fig = plt.figure() plt.plot(iterations, losses) plt.axis([0, 100, 0, len(val_set) * MAX_STRING_LEN]) if not os.path.exists("plots"): os.makedirs("plots") plt.savefig(plot_name) plt.close(fig) TEXTE += "<il>Epoche %d - loss on validation set is %.9f </il>" % ( i, val_loss) TEXTE += '</ul><img src="%s">' % plot_name
def set_trainer(self, optimization): if optimization == 'MomentumSGD': self.trainer = dy.MomentumSGDTrainer( self.model, learning_rate=self.hp.learning_rate) if optimization == 'CyclicalSGD': self.trainer = dy.CyclicalSGDTrainer( self.model, learning_rate_max=self.hp.learning_rate_max, learning_rate_min=self.hp.learning_rate_min) if optimization == 'Adam': self.trainer = dy.AdamTrainer(self.model) if optimization == 'RMSProp': self.trainer = dy.RMSPropTrainer(self.model) else: # 'SimpleSGD' self.trainer = dy.SimpleSGDTrainer( self.model, learning_rate=self.hp.learning_rate)
def trainExample(model): #SGD Trainer trainer = pc.SimpleSGDTrainer(model) symbols = [] # words, symbols = read(TRAIN_FILEPATH_SRC) for i in xrange(EPOCHS): s = 0 for src, target in read(TRAIN_FILEPATH_SRC): loss_value = train(model, trainer, src, target) if (s % 1000 == 0): print "Epoch: ", i, " Sentence: ", s, " Loss: ", loss_value s = s + 1 print "Epoch: ", i, "Loss: ", loss_value model.save(MODEL_PATH)
def main(): model = dy.Model() trainer = dy.SimpleSGDTrainer(model) #training_src = read_file(sys.argv[1]) #training_tgt = read_file(sys.argv[2]) #trainFileName_src = "train.en-de.low.filt.de" #trainFileName_tgt = "train.en-de.low.filt.en" trainFileName_src = sys.argv[1] trainFileName_tgt = sys.argv[2] training_src = [] training_tgt = [] for line in open(trainFileName_src, 'r'): fields = line.strip().split(' ') training_src.append(fields) for line in open(trainFileName_tgt, 'r'): fields = line.strip().split(' ') training_tgt.append(fields) attention = Attention(model, training_src, training_tgt) (attention.src_lookup, attention.tgt_lookup, attention.l2r_builder, attention.r2l_builder, attention.dec_builder, attention.W_y, attention.b_y, attention.W1_att_f, attention.W1_att_e, attention.w2_att) = model.load('myModel') attention.l2r_builder.disable_dropout() attention.r2l_builder.disable_dropout() attention.dec_builder.disable_dropout() testFileName = sys.argv[3] testSent = [] for line in open(testFileName, 'r'): fields = line.strip().split(' ') testSent.append(fields) rst = [] #translate ccc = 0 for testS in testSent: ccc += 1 rst.append(attention.translate_sentence(testS)) f = open('rst', 'w') for i in rst: f.write(i + '\n') f.close()
def __init__(self, char_dim, feat_dim, hidden_dim, char_size, feat_sizes): self._char_dim = char_dim self._feat_dim = feat_dim self._pc = dy.ParameterCollection() if config.adam: self._trainer = dy.AdamTrainer(self._pc, config.learning_rate, config.beta_1, config.beta_2, config.epsilon) else: # self._trainer = dy.AdadeltaTrainer(self._pc) trainer = dy.SimpleSGDTrainer(self._pc, config.learning_rate) trainer.set_clip_threshold(config.clip_threshold) # self._trainer.set_clip_threshold(1.0) self.params = dict() self.lp_c = self._pc.add_lookup_parameters((char_size, char_dim)) self.lp_feats = [] for idx in range(len(feat_sizes)): self.lp_feats.append(self._pc.add_lookup_parameters((feat_sizes[idx], feat_dim), init=dy.ConstInitializer(0.))) # self._pdrop_embs = pdrop_embs # self._pdrop_lstm = pdrop_lstm # self._pdrop_mlp = pdrop_mlp self.LSTM_builders = [] f = dy.VanillaLSTMBuilder(1, char_dim, hidden_dim, self._pc) b = dy.VanillaLSTMBuilder(1, char_dim, hidden_dim, self._pc) self.LSTM_builders.append((f, b)) for i in range(config.layers - 1): f = dy.VanillaLSTMBuilder(1, 2 * hidden_dim, hidden_dim, self._pc) b = dy.VanillaLSTMBuilder(1, 2 * hidden_dim, hidden_dim, self._pc) self.LSTM_builders.append((f, b)) self.dec_LSTM = dy.VanillaLSTMBuilder(1, hidden_dim, hidden_dim, self._pc) self.MLP = self._pc.add_parameters((char_dim + feat_dim * 6 + 6, hidden_dim)) self.MLP_bias = self._pc.add_parameters((hidden_dim)) self.classifier = self._pc.add_parameters((hidden_dim, char_size)) self.classifier_bias = self._pc.add_parameters((char_size)) self.MLP_attn = self.add_parameters((char_dim + feat_dim * 6 + 6, hidden_dim)) self.MLP_attn_bias = self.add_parameters((hidden_dim)) self.attn_weight = self._pc.add_parameters((char_dim))
def train(seq2seq, sentence_pairs, n_round=200): trainer = dn.SimpleSGDTrainer(seq2seq.model) for i in xrange(n_round): if (i + 1) % ((n_round + 19) / 20) == 0: from random import randint idx = randint(0, len(sentence_pairs) - 1) else: idx = -1 for i, (in_s, out_s) in enumerate(sentence_pairs): loss = seq2seq.compute_loss(in_s, out_s[::-1]) loss_value = loss.value() loss.backward() trainer.update() if i == idx: print loss_value, idx, print in_s, " >>> ", print seq2seq.generate(in_s)[::-1]
def train(lstm, params, train_data, dev_data, epochs): trainer = dy.SimpleSGDTrainer(pc) for ep in range(epochs): i = 0 print("EPOCH {}".format(ep)) np.random.shuffle(train_data) #print("train_data {}".format(train_data[0])) for train_y, sentence in train_data: #print("sentence\n{}\ntrain_y{}".format(sentence, train_y)) loss, _ = do_one_sentence(lstm, params, sentence, train_y) #print("after do one sent") loss.backward() trainer.update() if i % 200 == 0: dev_loss, dev_acc = check_loss(lstm, params, dev_data, ep > 1) print("loss: {:.4f}\tacc: {:.2f}".format(dev_loss, dev_acc)) i += 1
def train_network(train_data, dev_data, encoder, network): global prev_acc, prev_acc_ex0, model_file, report model = network.model trainer = dy.SimpleSGDTrainer(model) prev_acc = prev_acc or 0.5 prev_acc_ex0 = prev_acc_ex0 or 0.5 report = [] tagged = loss = 0 i = 1 t0 = time.clock() for ep in range(EPOCHS): random.shuffle(train_data) for s in train_data: i += 1 if i % 20000 == 0: print("average loss last 500 cycles: {}".format(loss / tagged)) acc, acc_ex0 = test_a_classifier_on_dev(network, dev_data) print("dev accuracy after {} cycles: {}, {}".format(i, acc, acc_ex0)) ti = time.clock() report.append(OrderedDict([ ("cycles", i), ("dev_accuracy", acc), ("dev_accuracy_except_common", acc_ex0), ("loss", loss / tagged), ("clock_time", round(ti-t0,2)), ("saved", 0) ])) loss = 0 tagged = 0 ti = t0 if acc > prev_acc: print("saving") network.save(model_file) report[-1]["saved"] = 1 prev_acc = acc if acc_ex0 > prev_acc_ex0: prev_acc_ex0 = acc_ex0 sum_errs = single_training_pass(s, encoder, network) loss += sum_errs.scalar_value() tagged += len(s) sum_errs.backward() trainer.update()
def train(network, train_set, val_set, epochs): global TEXTE TEXTE += "<ul>" MAX_STRING_LEN = 50 # for the scale of the plot of gradient descent def get_val_set_loss(network, val_set): loss = [ network.get_loss(input_string, output_string).value() for input_string, output_string in val_set ] return sum(loss) trainer = dy.SimpleSGDTrainer(network.model) losses = list() iterations = list() occurences = 0 for i in range(epochs): print "Epoch ", i for training_example in train_set: occurences += 1 input_string, output_string = training_example loss = network.get_loss(input_string, output_string) loss_value = loss.value() loss.backward() trainer.update() if occurences % ((len(train_set) * epochs) / 100) == 0: val_loss = get_val_set_loss(network, val_set) losses.append(val_loss) iterations.append(occurences / (((len(train_set) * epochs) / 100))) plt.ioff() fig = plt.figure() plt.plot(iterations, losses) plt.axis([0, 100, 0, len(val_set) * MAX_STRING_LEN]) if not os.path.exists("plots"): os.makedirs("plots") plt.savefig('plots/plot.png') plt.close(fig) TEXTE += "<li>Epoche %d - loss on validation set is %.9f </li>" % ( i, val_loss) TEXTE += '</ul><img src="plots/plot.png">'
def main(): model = dy.Model() trainer = dy.SimpleSGDTrainer(model) training_src = read_file(sys.argv[1]) training_tgt = read_file(sys.argv[2]) dev_src = read_file(sys.argv[3]) dev_tgt = read_file(sys.argv[4]) test_src = read_file(sys.argv[5]) model_name = sys.argv[6] src_vector_file = None if len(sys.argv) > 6: src_vector_file = sys.argv[7] test_sense_src = None if len(sys.argv) > 7 and not sys.argv[8].startswith("--"): test_sense_src = read_file_sense(sys.argv[8]) dev = [(x, y) for (x, y) in zip(dev_src, dev_tgt)] if OLAF: print("Burrrr! The vectors are frozen!") else: print("The vectors are not frozen and olaf is melting!") if DEV: print("In DEV mode, limiting each corpus to {0} sentences".format( DEV_LIMIT)) attention = Attention(model, training_src, training_tgt, model_name, src_vectors_file=src_vector_file, frozen_vectors=OLAF) out_language = sys.argv[1].split('.')[-1] if LOAD_MODEL: attention.load_model() if TRAIN: attention.train_batch(dev, trainer, test_src, True, 'test.' + out_language, test_sense_src) attention.translate(test_src, 'test.' + out_language)
def __init__(self, input_path, n): self.n = n self.vocabs = {} self.sentences = [] self.initialize_vocab(input_path) self.vocab_size = len(self.vocabs) self.model = dy.Model() # must not be garbage-collected/out of scope self.trainer = dy.SimpleSGDTrainer(self.model) self.embedding = self.model.add_lookup_parameters( (self.vocab_size, EMBEDDING_DIMENSION)) self.w1 = self.model.add_parameters( (HIDDEN_SIZE, (self.n - 1) * EMBEDDING_DIMENSION)) self.b1 = self.model.add_parameters(HIDDEN_SIZE) self.w2 = self.model.add_parameters((self.vocab_size, HIDDEN_SIZE)) self.b2 = self.model.add_parameters(self.vocab_size) self.unknown_words = set()
def __init__(self, init_learning_rate, vw): self.model = dt.Model() self.vw = vw n_words = vw.size() self.learner = dt.SimpleSGDTrainer(self.model, e0=init_learning_rate) self.E = self.model.add_lookup_parameters( (n_words, SqaModel.WORD_EMBEDDING_DIM)) # similarity(v,o): v^T o self.SelColW = self.model.add_parameters((4)) self.SelColWhereW = self.model.add_parameters((4)) self.NulW = self.model.add_parameters((SqaModel.WORD_EMBEDDING_DIM)) self.ColW = self.model.add_parameters((SqaModel.WORD_EMBEDDING_DIM)) # LSTM question representation self.builders = [ dt.LSTMBuilder(1, SqaModel.WORD_EMBEDDING_DIM, SqaModel.LSTM_HIDDEN_DIM, self.model), dt.LSTMBuilder(1, SqaModel.WORD_EMBEDDING_DIM, SqaModel.LSTM_HIDDEN_DIM, self.model) ] self.pH = self.model.add_parameters( (SqaModel.WORD_EMBEDDING_DIM, SqaModel.LSTM_HIDDEN_DIM * 2)) if config.d["USE_PRETRAIN_WORD_EMBEDDING"]: n_hit_pretrain = 0.0 trie = config.d["embeddingtrie"] print("beginning to load embeddings....") for i in range(n_words): word = self.vw.i2w[i].lower() results = trie.items(word + config.d["recordtriesep"]) if len(results) == 1: pretrain_v = np.array(list(results[0][1])) pretrain_v = pretrain_v / np.linalg.norm(pretrain_v) self.E.init_row(i, pretrain_v) n_hit_pretrain += 1 else: pretrain_v = self.E[i].npvalue() pretrain_v = pretrain_v / np.linalg.norm(pretrain_v) self.E.init_row(i, pretrain_v) print("the number of words that are in pretrain", n_hit_pretrain, n_words, n_hit_pretrain / n_words) print("loading complete!")
def __init__(self, args): dy.renew_cg() self.args = args # save for later self.model = dy.Model() if args.trainer.lower() == "sgd": self.trainer = dy.SimpleSGDTrainer(self.model, e0=args.learning_rate) elif args.trainer.lower() == "adam": self.trainer = dy.AdamTrainer(self.model, alpha=args.learning_rate) else: raise RuntimeError("Unknown trainer {}".format(args.trainer)) if args.lr_decay > 1.0 or args.lr_decay <= 0.0: raise RuntimeError( "illegal lr_decay, must satisfy: 0.0 < lr_decay <= 1.0") self.learning_scale = 1.0 self.early_stopping_reached = False # Create the model serializer self.create_model() # single mode if args.batch_size is None or args.batch_size == 1 or args.batch_strategy.lower( ) == 'none': print('Start training in non-minibatch mode...') self.logger = NonBatchLossTracker(args.eval_every, self.total_train_sent) # minibatch mode else: print('Start training in minibatch mode...') self.batcher = Batcher.select_batcher(args.batch_strategy)( args.batch_size) if args.input_format == "contvec": assert self.train_src[0].nparr.shape[ 1] == self.input_embedder.emb_dim, "input embed dim is different size than expected" self.batcher.pad_token = np.zeros(self.input_embedder.emb_dim) self.train_src, self.train_trg = self.batcher.pack( self.train_src, self.train_trg) self.dev_src, self.dev_trg = self.batcher.pack( self.dev_src, self.dev_trg) self.logger = BatchLossTracker(args.eval_every, self.total_train_sent)
def train(model, data): trainer = dy.SimpleSGDTrainer(model) for n in range(EPOCHS): totalloss = 0 random.shuffle(data) for i, ex in enumerate(data): stdout.write('EPOCH %u: ex %u of %u\r' % (n+1,i+1,len(data))) tab,label,wf = ex loss = get_loss(tab,label,wf) totalloss += loss.value() loss.backward() trainer.update() print() print(totalloss/len(data)) for ex in data[:10]: tab, label, wf = ex print('input:',''.join([tab,label]), 'sys:',generate(tab,label), 'gold:',''.join(wf))
def main(): training_log = open('training-' + str(datetime.now()) + '.log', 'w') model = dy.Model() trainer = dy.SimpleSGDTrainer(model) training_src = read_file(sys.argv[1]) training_tgt = read_file(sys.argv[2]) dev_src = read_file(sys.argv[3]) dev_tgt = read_file(sys.argv[4]) test_src = read_file(sys.argv[5]) attention = Attention(model, list(training_src), list(training_tgt)) train_data = zip(training_src, training_tgt) train_data.sort(key=lambda x: -len(x[0])) train_src = [sent[0] for sent in train_data] train_tgt = [sent[1] for sent in train_data] start = time.time() for epoch in range(5000): epoch_loss = 0 train_zip = zip(train_src, train_tgt) i = 0 while i < len(train_zip): esum, num_words = attention.step_batch( train_zip[i:i + attention.BATCH_SIZE]) i += attention.BATCH_SIZE epoch_loss += esum.scalar_value() #/ num_words esum.backward() trainer.update() # if epoch_loss < 10: # end = time.time() # print 'TIME ELAPSED:', end - start, 'SECONDS' # break #print 'Epoch:',epoch print "Epoch %d: loss=%f \n" % (epoch, epoch_loss) training_log.write("Epoch %d: loss=%f \n" % (epoch, epoch_loss)) training_log.flush() trainer.update_epoch(1.0) training_log.write( attention.translate_sentence(training_src[0]) + '\n') if epoch % 100 == 0: attention.save(epoch)
def train(model, data): trainer = dy.SimpleSGDTrainer(model) for n in range(5): totalloss = 0 random.shuffle(data) for i, io in enumerate(data): if i > 5000: break stdout.write('EPOCH %u: ex %u of %u\r' % (n+1,i+1,len(data))) input,output = io loss = get_loss(input, output, enc_fwd_lstm, enc_bwd_lstm, dec_lstm) totalloss += loss.value() loss.backward() trainer.update() print() print(totalloss/len(data)) for input, output in data[:10]: print('input:',''.join(input), 'sys:',generate(input, enc_fwd_lstm, enc_bwd_lstm, dec_lstm), 'gold:',''.join(output))
def __init__(self, vocab_size): self.model = dy.Model() self.trainer = dy.SimpleSGDTrainer(self.model) self.layers = 2 self.embed_size = 1 self.hidden_size = 1 self.src_vocab_size = vocab_size self.tgt_vocab_size = vocab_size self.enc_builder = dy.LSTMBuilder(self.layers, self.embed_size, self.hidden_size, self.model) self.dec_builder = dy.LSTMBuilder(self.layers, self.embed_size, self.hidden_size, self.model) self.src_lookup = self.model.add_lookup_parameters( (self.src_vocab_size, self.embed_size)) self.tgt_lookup = self.model.add_lookup_parameters( (self.tgt_vocab_size, self.embed_size)) self.W_y = self.model.add_parameters( (self.tgt_vocab_size, self.hidden_size)) self.b_y = self.model.add_parameters((self.tgt_vocab_size))
def train(self, examples): num_examples = len(examples) trainer = dy.SimpleSGDTrainer(self.params) for epoch in range(self.num_epochs): batch_loss = [] loss_sum = 0 for idx in range(num_examples): loss = self.train_example(examples[idx]) batch_loss.append(loss) # Minibatching: if (idx % self.minibatch == 0) or (idx + 1 == num_examples): batch_loss = dy.esum(batch_loss) loss_sum += batch_loss.value() batch_loss.backward() batch_loss = [] trainer.update() dy.renew_cg() print("Epoch: {} | Loss: {}".format(epoch + 1, loss_sum))
def test_softmax_model(): """Train softmax model for a number of steps.""" config = Config() # Generate random data to train the model on np.random.seed(1234) inputs = np.random.rand(config.n_samples, config.n_features) labels = np.zeros((config.n_samples, config.n_classes), dtype=np.int32) labels[:, 1] = 1 #for i in xrange(config.n_samples): # labels[i, i%config.n_classes] = 1 mini_batches = [[ inputs[k:k + config.batch_size], labels[k:k + config.batch_size] ] for k in xrange(0, config.n_samples, config.batch_size)] m = dy.ParameterCollection() trainer = dy.SimpleSGDTrainer(m) trainer.learning_rate = config.lr net = SoftmaxModel(config, m) for epoch in range(config.n_epochs): start_time = time.time() for mini_batch in mini_batches: dy.renew_cg() losses = [] for ix in xrange(config.batch_size): l = net.create_network_return_loss( np.array(mini_batch[0][ix]).reshape(1, config.n_features), np.array(mini_batch[1][ix]).reshape(1, config.n_classes)) losses.append(l) loss = dy.esum(losses) / config.batch_size loss.forward() loss.backward() trainer.update() duration = time.time() - start_time print 'Epoch {:}: loss = {:.2f} ({:.3f} sec)'.format( epoch, loss.value(), duration) print loss.value() assert loss.value() < .5 print "Basic (non-exhaustive) classifier tests pass"