def __init__(self, params, vocab, embeddings, char_embeddings): """ :param params: :param vocab: :param embeddings: :param char_embeddings: """ self.params = params self.name = 'lstm_cascade' self.dim_char = params.dim_char self.dim_w = params.dim_w self.dim_char_h = params.dim_char_h self.dim_ote_h = params.dim_ote_h self.dim_ts_h = params.dim_ts_h self.input_win = params.input_win self.ds_name = params.ds_name # tag vocabulary of opinion target extraction and targeted sentiment self.ote_tag_vocab = params.ote_tag_vocab self.ts_tag_vocab = params.ts_tag_vocab self.dim_ote_y = len(self.ote_tag_vocab) self.dim_ts_y = len(self.ts_tag_vocab) self.n_epoch = params.n_epoch self.dropout_rate = params.dropout self.tagging_schema = params.tagging_schema self.clip_grad = params.clip_grad self.use_char = params.use_char # name of word embeddings self.emb_name = params.emb_name self.embeddings = embeddings self.vocab = vocab # character vocabulary self.char_vocab = params.char_vocab #self.td_proportions = params.td_proportions self.epsilon = params.epsilon #self.tc_proportions = params.tc_proportions self.pc = dy.ParameterCollection() if self.use_char: self.char_emb = CharEmb(pc=self.pc, n_chars=len(self.char_vocab), dim_char=self.dim_char, pretrained_embeddings=char_embeddings) self.lstm_char = dy.LSTMBuilder(1, self.dim_char, self.dim_char_h, self.pc) dim_input = self.input_win * self.dim_w + 2 * self.dim_char_h else: dim_input = self.input_win * self.dim_w # word embedding layer self.emb = WDEmb(pc=self.pc, n_words=len(vocab), dim_w=self.dim_w, pretrained_embeddings=embeddings) # lstm layers self.lstm_ote = dy.LSTMBuilder(1, dim_input, self.dim_ote_h, self.pc) self.lstm_ts = dy.LSTMBuilder(1, 2*self.dim_ote_h, self.dim_ts_h, self.pc) # fully connected layer self.fc_ote = Linear(pc=self.pc, n_in=2*self.dim_ote_h, n_out=self.dim_ote_y) self.fc_ts = Linear(pc=self.pc, n_in=2 * self.dim_ts_h, n_out=self.dim_ts_y) assert self.tagging_schema == 'BIEOS' transition_path = {'B': ['B-POS', 'B-NEG', 'B-NEU'], 'I': ['I-POS', 'I-NEG', 'I-NEU'], 'E': ['E-POS', 'E-NEG', 'E-NEU'], 'S': ['S-POS', 'S-NEG', 'S-NEU'], 'O': ['O']} self.transition_scores = np.zeros((self.dim_ote_y, self.dim_ts_y)) for t in transition_path: next_tags = transition_path[t] n_next_tag = len(next_tags) ote_id = self.ote_tag_vocab[t] for nt in next_tags: ts_id = self.ts_tag_vocab[nt] self.transition_scores[ote_id][ts_id] = 1.0 / n_next_tag print(self.transition_scores) self.transition_scores = np.array(self.transition_scores, dtype='float32').transpose() # opinion target-opinion words co-occurrence modeling self.stm_lm = Linear(pc=self.pc, n_in=2*self.dim_ote_h, n_out=2*self.dim_ote_h, nonlinear='tanh') # fully connected layer for opinion-enhanced indicator prediction task self.fc_stm = Linear(pc=self.pc, n_in=2 * self.dim_ote_h, n_out=2) # gate for maintaining sentiment consistency self.W_gate = self.pc.add_parameters((2*self.dim_ote_h, 2*self.dim_ote_h), init=dy.UniformInitializer(0.2)) # determine the optimizer if params.optimizer == 'sgd': self.optimizer = dy.SimpleSGDTrainer(self.pc, params.sgd_lr) elif params.optimizer == 'adam': self.optimizer = dy.AdamTrainer(self.pc, 0.001, 0.9, 0.9) elif params.optimizer == 'adadelta': self.optimizer = dy.AdadeltaTrainer(self.pc) elif params.optimizer == 'momentum': self.optimizer = dy.MomentumSGDTrainer(self.pc, 0.01, 0.9) else: raise Exception("Unsupported optimizer type: %s" % params.optimizer)
def __init__(self, params, vocab, embeddings): """ :param params: parameters :param vocab: vocabulary :param embeddings: pretrained word embeddings """ self.params = params self.name = 'lstm_crf' self.dim_char = params.dim_char self.dim_w = params.dim_w self.dim_char_h = params.dim_char_h self.dim_ote_h = params.dim_ote_h self.dim_ts_h = params.dim_ts_h self.input_win = params.input_win self.ds_name = params.ds_name # tag vocabulary of opinion target extraction and targeted sentiment self.ote_tag_vocab = params.ote_tag_vocab self.ts_tag_vocab = params.ts_tag_vocab self.dim_ote_y = len(self.ote_tag_vocab) self.dim_ts_y = len(self.ts_tag_vocab) self.n_epoch = params.n_epoch self.dropout_rate = params.dropout self.tagging_schema = params.tagging_schema self.clip_grad = params.clip_grad self.use_char = params.use_char # name of word embeddings self.emb_name = params.emb_name self.embeddings = embeddings self.vocab = vocab # character vocabulary self.char_vocab = params.char_vocab self.pc = dy.ParameterCollection() # word embedding layer self.emb = WDEmb(pc=self.pc, n_words=len(vocab), dim_w=self.dim_w, pretrained_embeddings=embeddings) # input dimension dim_input = self.input_win * self.dim_w self.lstm_ts = dy.LSTMBuilder(1, dim_input, self.dim_ts_h, self.pc) # hidden layer between LSTM and CRF decoding layer self.hidden = Linear(pc=self.pc, n_in=2*self.dim_ts_h, n_out=self.dim_ts_h, use_bias=True, nonlinear='tanh') # map the word representation to the ts label space # in the label space, both BEG and END tag are considered self.fc_ts = Linear(pc=self.pc, n_in=self.dim_ts_h, n_out=self.dim_ts_y) # transition matrix, [i, j] is the transition score from tag i to tag j self.transitions = self.pc.add_lookup_parameters((self.dim_ts_y + 2, self.dim_ts_y + 2)) # determine the optimizer if params.optimizer == 'sgd': self.optimizer = dy.SimpleSGDTrainer(self.pc, params.sgd_lr) elif params.optimizer == 'adam': self.optimizer = dy.AdamTrainer(self.pc, 0.001, 0.9, 0.9) elif params.optimizer == 'adadelta': self.optimizer = dy.AdadeltaTrainer(self.pc) elif params.optimizer == 'momentum': self.optimizer = dy.MomentumSGDTrainer(self.pc, 0.01, 0.9) else: raise Exception("Unsupported optimizer type: %s" % params.optimizer)
def __init__(self, params, model=None, runtime=False): self.UPSAMPLE_PROJ = 200 self.RNN_SIZE = 448 self.RNN_LAYERS = 1 self.OUTPUT_EMB_SIZE = 1 self.params = params if model is None: self.model = dy.Model() else: self.model = model self.trainer = dy.AdamTrainer(self.model, alpha=1e-4) self.trainer.set_sparse_updates(True) self.trainer.set_clip_threshold(5.0) # self.trainer = dy.AdamTrainer(self.model) # MGCs are extracted at 12.5 ms from utils import orthonormal_VanillaLSTMBuilder lstm_builder = orthonormal_VanillaLSTMBuilder if runtime: lstm_builder = dy.VanillaLSTMBuilder upsample_count = int(12.5 * self.params.target_sample_rate / 1000) # self.upsample_w_s = [] self.upsample_w_t = [] # self.upsample_b_s = [] self.upsample_b_t = [] for _ in xrange(upsample_count): # self.upsample_w_s.append(self.model.add_parameters((self.UPSAMPLE_PROJ, self.params.mgc_order))) self.upsample_w_t.append( self.model.add_parameters( (self.UPSAMPLE_PROJ, self.params.mgc_order * 2))) # self.upsample_b_s.append(self.model.add_parameters((self.UPSAMPLE_PROJ))) self.upsample_b_t.append( self.model.add_parameters((self.UPSAMPLE_PROJ))) self.output_coarse_lookup = self.model.add_lookup_parameters( (256, self.OUTPUT_EMB_SIZE)) self.output_fine_lookup = self.model.add_lookup_parameters( (256, self.OUTPUT_EMB_SIZE)) # self.rnn = orthonormal_VanillaLSTMBuilder(self.RNN_LAYERS, self.OUTPUT_EMB_SIZE + self.UPSAMPLE_PROJ, self.RNN_SIZE, self.model) self.rnnCoarse = lstm_builder( self.RNN_LAYERS, self.OUTPUT_EMB_SIZE * 2 + self.UPSAMPLE_PROJ, self.RNN_SIZE, self.model) self.rnnFine = lstm_builder( self.RNN_LAYERS, self.OUTPUT_EMB_SIZE * 3 + self.UPSAMPLE_PROJ, self.RNN_SIZE, self.model) # self.rnnCoarse = dy.GRUBuilder(self.RNN_LAYERS, self.OUTPUT_EMB_SIZE * 2 + self.UPSAMPLE_PROJ, # self.RNN_SIZE, self.model) # self.rnnFine = dy.GRUBuilder(self.RNN_LAYERS, self.OUTPUT_EMB_SIZE * 3 + self.UPSAMPLE_PROJ, # self.RNN_SIZE, self.model) self.mlp_coarse_w = [] self.mlp_coarse_b = [] self.mlp_coarse_w.append( self.model.add_parameters((self.RNN_SIZE, self.RNN_SIZE))) self.mlp_coarse_b.append(self.model.add_parameters((self.RNN_SIZE))) self.mlp_fine_w = [] self.mlp_fine_b = [] self.mlp_fine_w.append( self.model.add_parameters((self.RNN_SIZE, self.RNN_SIZE))) self.mlp_fine_b.append(self.model.add_parameters((self.RNN_SIZE))) self.softmax_coarse_w = self.model.add_parameters((256, self.RNN_SIZE)) self.softmax_coarse_b = self.model.add_parameters((256)) self.softmax_fine_w = self.model.add_parameters((256, self.RNN_SIZE)) self.softmax_fine_b = self.model.add_parameters((256))
def train(self, traindata, lr=1e-5): dy.renew_cg() trainer = dy.AdamTrainer(self.model, alpha=lr) # Loggin path = self.fname() + '.log' if len( traindata) > 5000 else self.fname() + 'tuning.log' f = open(os.path.join(EVALUATION_PATH, path), 'w') epoch_timing = [] early = 0.0 best = -1 for epoch in range(self.EPOCH): print('\n') dy.renew_cg() losses = [] closs = 0 batch_timing = [] for i, q1id in enumerate(traindata): for q2id in traindata[q1id]: start = time.time() query = traindata[q1id][q2id]['q1_full'] question = traindata[q1id][q2id]['q2_full'] label = traindata[q1id][q2id]['label'] loss = self.get_loss(query, question, label) losses.append(loss) if len(losses) == self.BATCH: loss = dy.esum(losses) _loss = loss.value() closs += _loss loss.backward() trainer.update() dy.renew_cg() # percentage of trainset processed percentage = str( round((float(i + 1) / len(traindata)) * 100, 2)) + '%' # time of epoch processing time_epoch = self.tepoch(epoch_timing) print( "Epoch: {0} \t\t Loss: {1} \t\t Epoch time: {2} \t\t Trainset: {3}" .format(epoch + 1, round(_loss, 2), time_epoch, percentage), end=' \r') losses = [] batch_timing = [] end = time.time() t = (end - start) batch_timing.append(t) epoch_timing.append(t) log = "Epoch: {0} \t\t Loss: {1} \t\t Best: {2}".format( epoch + 1, round(closs / self.BATCH, 2), round(best, 2)) print('\n' + log) f.write(' '.join([log, '\n'])) log = 'Dev evaluation...' print(log) f.write(log + '\n') map_baseline, map_model, f1score, accuracy = self.test( self.devdata) results = 'MAP Model: {0} \t MAP baseline: {1} \t F1 score: {2} \t Accuracy: {3}'.format( round(map_model, 2), round(map_baseline, 2), round(f1score, 2), round(accuracy, 2)) print(results) f.write(results) epoch_timing = [] if map_model > best: best = copy.copy(map_model) early = 0 path = self.fname() + '.dy' self.model.save(os.path.join(EVALUATION_PATH, path)) else: trainer.learning_rate *= 0.5 early += 1 if early == self.EARLY_STOP: break f.close()
input = np.loadtxt('input_sequences') print "Loaded input" output = np.loadtxt('output_sequences') print "Loaded output" X_train, X_test, y_train, y_test = train_test_split(input, output, test_size=0.1) print "Split the data into train and test" num_input = len(input[0]) num_output = num_input num_hidden = int(sys.argv[1]) m = dy.Model() dnn_1 = FeedForwardNeuralNet(m, [num_input, [num_hidden, num_hidden], num_output, [dy.rectify, dy.rectify, dy.logistic]]) trainer = dy.AdamTrainer(m) train = zip(X_train, y_train) test = zip(X_test, y_test) for epoch in range(40): print "Epoch: ", epoch random.shuffle(train) train_loss = 0 count = 0 for (inp,out) in train: count += 1 dy.renew_cg() loss = dnn_1.calculate_loss_classification(dy.inputTensor(inp), dy.inputTensor(out)) train_loss += loss.value() loss.backward()
np.random.seed(666) argparser = argparse.ArgumentParser() argparser.add_argument('--config_file', default='../configs/default.cfg') argparser.add_argument('--model', default='BaseParser') args, extra_args = argparser.parse_known_args() config = Configurable(args.config_file, extra_args) Parser = getattr(models, args.model) vocab = Vocab(config.train_file, config.pretrained_embeddings_file, config.min_occur_count) cPickle.dump(vocab, open(config.save_vocab_path, 'w')) parser = Parser(vocab, config.word_dims, config.pret_dims, config.lemma_dims, config.tag_dims, config.dropout_emb, config.lstm_layers, config.lstm_hiddens, config.dropout_lstm_input, config.dropout_lstm_hidden, config.mlp_rel_size, config.dropout_mlp) data_loader = DataLoader(config.train_file, config.num_buckets_train, vocab) pc = parser.parameter_collection trainer = dy.AdamTrainer(pc, config.learning_rate , config.beta_1, config.beta_2, config.epsilon) global_step = 0 def update_parameters(): trainer.learning_rate =config.learning_rate*config.decay**(global_step / config.decay_steps) trainer.update() epoch = 0 best_F1 = 0. history = lambda x, y : open(os.path.join(config.save_dir, 'valid_history'),'a').write('%.2f %.2f\n'%(x,y)) while global_step < config.train_iters: print time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), '\nStart training epoch #%d'%(epoch, ) epoch += 1 for words, lemmas, tags, arcs, rels in \ data_loader.get_batches(batch_size = config.train_batch_size, shuffle = True): num = int(words.shape[1]/2)
def __init__(self, config, encodings, embeddings, runtime=False): # INTERNAL PARAMS ################################################### self.config = config self.encodings = encodings self.word_embeddings = embeddings self.config.char_vocabulary_size = len(encodings.characters) self.decoder_output_class_count = 3 # O S SX self.decoder_output_i2c = {} self.decoder_output_i2c[0] = "O" self.decoder_output_i2c[1] = "S" self.decoder_output_i2c[2] = "SX" self.decoder_output_c2i = {} self.decoder_output_c2i["O"] = 0 self.decoder_output_c2i["S"] = 1 self.decoder_output_c2i["SX"] = 2 # NETWORK ########################################################### self.model = dy.Model() self.trainer = dy.AdamTrainer(self.model) self.trainer.set_sparse_updates(False) # EMBEDDING SPECIAL TOKENS self.word_embeddings_special = self.model.add_lookup_parameters( (2, self.word_embeddings.word_embeddings_size )) # [0] = UNK, [1] = SENTENCE START # ENCODER-CHAR self.char_embeddings = self.model.add_lookup_parameters( (self.config.char_vocabulary_size, self.config.char_embedding_size)) # self.next_chars_embedding = self.model.add_lookup_parameters( # (self.config.char_vocabulary_size, self.config.next_chars_embedding_size)) self.char_embeddings_punctuation = self.model.add_lookup_parameters( (self.config.char_generic_feature_vocabulary_size, self.config.char_generic_feature_embedding_size)) self.char_embeddings_whitespace = self.model.add_lookup_parameters( (self.config.char_generic_feature_vocabulary_size, self.config.char_generic_feature_embedding_size)) self.char_embeddings_uppercase = self.model.add_lookup_parameters( (self.config.char_generic_feature_vocabulary_size, self.config.char_generic_feature_embedding_size)) self.encoder_char_input_size = self.config.char_embedding_size + 3 * self.config.char_generic_feature_embedding_size if runtime: self.encoder_char_lstm1_fw_builder = dy.VanillaLSTMBuilder( 1, self.encoder_char_input_size, self.config.encoder_char_lstm_size, self.model) self.encoder_char_lstm2_bw_builder = dy.VanillaLSTMBuilder( 1, self.config.next_chars_embedding_size + 3 * self.config.char_generic_feature_embedding_size, self.config.encoder_char_lstm_size, self.model) self.encoder_word_lstm_builder = dy.VanillaLSTMBuilder( 1, self.word_embeddings.word_embeddings_size, self.config.encoder_word_lstm_size, self.model) else: from utils import orthonormal_VanillaLSTMBuilder self.encoder_char_lstm1_fw_builder = orthonormal_VanillaLSTMBuilder( 1, self.encoder_char_input_size, self.config.encoder_char_lstm_size, self.model) self.encoder_char_lstm2_bw_builder = orthonormal_VanillaLSTMBuilder( 1, self.config.next_chars_embedding_size + 3 * self.config.char_generic_feature_embedding_size, self.config.encoder_char_lstm_size, self.model) self.encoder_word_lstm_builder = orthonormal_VanillaLSTMBuilder( 1, self.word_embeddings.word_embeddings_size, self.config.encoder_word_lstm_size, self.model) # ENCODER-WORD # self.att_w1 = self.model.add_parameters(( # self.config.next_chars_embedding_size + self.config.char_generic_feature_embedding_size * 3, # self.config.encoder_char_lstm_size)) # self.att_w2 = self.model.add_parameters(( # self.config.next_chars_embedding_size + self.config.char_generic_feature_embedding_size * 3, # self.config.encoder_char_lstm_size)) # self.att_v = self.model.add_parameters( # (1, self.config.next_chars_embedding_size + self.config.char_generic_feature_embedding_size * 3)) # DECODER self.holisticWE = self.model.add_lookup_parameters( (len(encodings.word2int), self.word_embeddings.word_embeddings_size)) self.decoder_input_size = 2 * self.config.encoder_char_lstm_size + self.config.encoder_word_lstm_size + self.word_embeddings.word_embeddings_size self.decoder_hiddenW = self.model.add_parameters( (self.config.decoder_hidden_size, self.decoder_input_size)) self.decoder_hiddenB = self.model.add_parameters( (self.config.decoder_hidden_size)) self.decoder_outputW = self.model.add_parameters( (self.decoder_output_class_count, self.config.decoder_hidden_size)) self.decoder_outputB = self.model.add_parameters( (self.decoder_output_class_count)) self.aux_softmax_char_peek_w = self.model.add_parameters( (self.decoder_output_class_count, self.config.encoder_char_lstm_size)) self.aux_softmax_char_peek_b = self.model.add_parameters( (self.decoder_output_class_count)) self.aux_softmax_char_hist_w = self.model.add_parameters( (self.decoder_output_class_count, self.config.encoder_char_lstm_size)) self.aux_softmax_char_hist_b = self.model.add_parameters( (self.decoder_output_class_count)) print("done")
def char_train(network, train_set, val_set, test_set, test_set2, train_set_word, val_set_word, test_set_word, test_set2_word, epochs, batch_size, args, tag_to_ix): def get_val_set_loss(network, val_set, val_set_word, val_author_vecs, pretrain, num_basis): loss = [] vae_loss = [0] l2_loss = [0] for i, (input_sentence, output_sentence) in enumerate(val_set): if args.use_vae: l, a, v, l2 = network.get_full_loss(input_sentence, val_set_word[i][0], output_sentence, val_author_vecs[i], pretrain) loss.append(l.value()) vae_loss.append(v.value()) l2_loss.append(l2.value()) else: loss.append( network.get_loss(input_sentence, val_set_word[i][0], output_sentence, val_author_vecs[i], pretrain).value()) dy.renew_cg() return sum(loss) / len(val_set), sum(vae_loss) / len(val_set), sum( l2_loss) / len(val_set) def get_val_set_acc(network, val_set, val_set_word, val_author_vecs, val_author_ids, pretrain, num_basis): evals = [] if args.use_vae: for i, (input_sentence, output_sentence) in enumerate(val_set): evals.append( network.full_evaluate_acc(input_sentence, val_set_word[i][0], output_sentence, val_author_vecs[i], val_author_ids[i], pretrain)) dy.renew_cg() else: for i, (input_sentence, output_sentence) in enumerate(val_set): evals.append( network.evaluate_acc(input_sentence, val_set_word[i][0], output_sentence, val_author_vecs[i], val_author_ids[i], pretrain)) dy.renew_cg() dy.renew_cg() correct = [c for c, t, d, w, cc, e in evals] total = [t for c, t, d, w, cc, e in evals] mean = 0 confidence = 0 oov = [d for c, t, d, w, cc, e in evals] wrong = [w for c, t, d, w, cc, e in evals] correct2 = [cc for c, t, d, w, cc, e in evals] auth_correct = [ c for i, (c, t, d, w, cc, e) in enumerate(evals) if val_author_vecs[i] is not None ] auth_total = [ t for i, (c, t, d, w, cc, e) in enumerate(evals) if val_author_vecs[i] is not None ] non_auth_correct = [ c for i, (c, t, d, w, cc, e) in enumerate(evals) if val_author_vecs[i] is None ] non_auth_total = [ t for i, (c, t, d, w, cc, e) in enumerate(evals) if val_author_vecs[i] is None ] eids = [e for c, t, d, w, cc, e in evals] #unique_eid = set(eids) len_eid = num_basis counts = [] for i in range(len_eid): counts.append(sum([e == i for e in eids])) counts2 = [] for i in range(len_eid): counts2.append( sum([ e == i for j, e in enumerate(eids) if val_author_vecs[j] is not None ])) if sum(non_auth_total) == 0: non_auth_total = [1] return 100.0 * sum(correct) / sum(total), mean, confidence, sum( oov), sum(wrong), sum(correct2), 100.0 * sum(auth_correct) / sum( auth_total), 100.0 * sum(non_auth_correct) / sum( non_auth_total), counts, counts2 #original_set = train_set #train_set = train_set*epochs if args.optimizer == 'adadelta': trainer = dy.AdadeltaTrainer(network.model) trainer.set_clip_threshold(5) elif args.optimizer == 'adam': trainer = dy.AdamTrainer(network.model, alpha=args.lr) trainer.set_clip_threshold(5) elif args.optimizer == 'sgd-momentum': trainer = dy.MomentumSGDTrainer(network.model, learning_rate=args.lr) else: logging.critical('This Optimizer is not valid or not allowed') losses = [] iterations = [] kk = args.pretrain_epochs if args.use_all_networks: args.network = 'follow' train_author_vecs1, dev_author_vecs1, test_author_vecs1, test2_author_vecs1, train_author_ids, dev_author_ids, test_author_ids, test2_author_ids = extract_authorvecs( args) args.network = 'mention' train_author_vecs2, dev_author_vecs2, test_author_vecs2, test2_author_vecs2, _, _, _, _ = extract_authorvecs( args) args.network = 'retweet' train_author_vecs3, dev_author_vecs3, test_author_vecs3, test2_author_vecs3, _, _, _, _ = extract_authorvecs( args) train_author_vecs = [] for i, j, k in zip(train_author_vecs1, train_author_vecs2, train_author_vecs3): train_author_vecs.append((i, j, k)) dev_author_vecs = [] for i, j, k in zip(dev_author_vecs1, dev_author_vecs2, dev_author_vecs3): dev_author_vecs.append((i, j, k)) test_author_vecs = [] for i, j, k in zip(test_author_vecs1, test_author_vecs2, test_author_vecs3): test_author_vecs.append((i, j, k)) test2_author_vecs = [] for i, j, k in zip(test2_author_vecs1, test2_author_vecs2, test2_author_vecs3): test2_author_vecs.append((i, j, k)) else: train_author_vecs, dev_author_vecs, test_author_vecs, test2_author_vecs, train_author_ids, dev_author_ids, test_author_ids, test2_author_ids = extract_authorvecs( args) logging.info('obtained all author vectors ' + str(len(train_author_vecs)) + ' ' + str(len(dev_author_vecs)) + ' ' + str(len(test_author_vecs)) + ' ' + str(len(test2_author_vecs))) batch_loss_vec = [] dy.renew_cg() is_best = 0 best_val = 0 count = 0 count_train = -1 #early_stopping = 0 for epoch in range(epochs): #if early_stopping>args.early_epochs: # break all_inds = [] num_train = int(len(train_set) / args.batch_size + 1) * args.batch_size #prev_time=time.time() for ii in range(num_train): count_train += 1 if count_train == len(train_set): count_train = 0 count += 1 inputs, outputs = train_set[count_train] inputs_word, _ = train_set_word[count_train] ''' data_point = {'inputs':inputs, 'inputs_word':inputs_word, 'outputs':outputs, 'train_author_vecs':train_author_vecs[i]} pickle.dump(data_point,open( "data_pickle/"+str(i)+".p", "wb" )) data_point = pickle.load( open( "data_pickle/"+str(i)+".p", "rb" ) ) inputs = data_point['inputs'] inputs_word = data_point['inputs_word'] outputs = data_point['outputs'] train_author_vec = data_point['train_author_vecs'] ''' #prev_time2 = time.time() #if train_author_vecs[count_train] !=None: vae_loss = 0 if args.use_vae: loss, ind, vae_loss, l2_loss = network.get_full_loss( inputs, inputs_word, outputs, train_author_vecs[count_train], epoch < kk, True) else: loss, ind = network.get_loss(inputs, inputs_word, outputs, train_author_vecs[count_train], epoch < kk, True) #curr_time2 = time.time() #print ('time for one instance: ', curr_time2 - prev_time2) all_inds.append(ind) #print (loss) #a = input() batch_loss_vec.append(loss) if count % batch_size == 0: batch_loss = dy.esum(batch_loss_vec) / batch_size batch_loss.forward() batch_loss.backward() trainer.update() batch_loss_vec = [] dy.renew_cg() count = 0 #logging.info('finished minibatch: %d/%d',ii,num_train) #print ('until here-----') #curr_time = time.time() #print ('time for one epoch training: ', curr_time - prev_time) counts = [] for i in range(args.num_basis): a = [v == i for v in all_inds] counts.append(sum(a)) logging.info('distribution of the data points' + str(counts)) #if ((i+1))%len(original_set) == 0: if args.plots: val_loss = get_val_set_loss(network, val_set, val_set_word, dev_author_vecs, epoch < kk, args.num_basis) losses.append(val_loss) iterations.append(epoch) #dy.renew_cg() #if ((i+1))%len(original_set)==0: train_loss = 0 if args.slow: train_loss, train_vae_loss, train_l2_loss = get_val_set_loss( network, train_set, train_set_word, train_author_vecs, epoch < kk, args.num_basis) if args.write_errors: f = open(args.log_errors_file, 'a') f.write('\n--------- epoch no: --------- ') f.write(str(epoch) + '\n') f.close() f = open(args.log_errors_file, 'a') f.write('\n--------- oct27.train errors: --------- \n') f.close() #prev_time = time.time() trainacc, train_acc, train_confidence, oov_train, wrong_train, correct_train, auth_acc1, non_auth_acc1, eids1, counts21 = get_val_set_acc( network, train_set, train_set_word, train_author_vecs, train_author_ids, epoch < kk, args.num_basis) #curr_time = time.time() #print ('time for acc train: ', curr_time - prev_time) if args.write_errors: f = open(args.log_errors_file, 'a') f.write('\n--------- oct27.dev errors: ---------\n') f.close() val_loss, val_vae_loss, val_l2_loss = 0, 0, 0 val_acc, oov_val, wrong_val, correct_val = 0, 0, 0, 0 if args.slow: pass #val_loss,val_vae_loss = get_val_set_loss(network, val_set, val_set_word, dev_author_vecs,epoch<kk, args.num_basis) #prev_time = time.time() valacc, val_acc, val_confidence, oov_val, wrong_val, correct_val, auth_acc2, non_auth_acc2, eids2, counts22 = 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 #valacc, val_acc, val_confidence, oov_val, wrong_val, correct_val, auth_acc2, non_auth_acc2, eids2, counts22 = get_val_set_acc(network, val_set, val_set_word, dev_author_vecs, dev_author_ids, epoch<kk, args.num_basis) #curr_time = time.time() #print ('time for acc val: ', curr_time - prev_time) if args.write_errors: f = open(args.log_errors_file, 'a') f.write('\n--------- oct27.test errors: --------- \n') f.close() test_loss = 0 if args.slow: test_loss, test_vae_loss, test_l2_loss = get_val_set_loss( network, test_set, test_set_word, test_author_vecs, epoch < kk, args.num_basis) #prev_time = time.time() testacc, test_acc, test_confidence, oov_test, wrong_test, correct_test, auth_acc3, non_auth_acc3, eids3, counts23 = get_val_set_acc( network, test_set, test_set_word, test_author_vecs, test_author_ids, epoch < kk, args.num_basis) #curr_time = time.time() #print ('time for acc test: ', curr_time - prev_time) if args.write_errors: f = open(args.log_errors_file, 'a') f.write('\n--------- daily547.test errors: --------- \n') f.close() test_loss2 = 0 if args.slow: test_loss2, test_vae_loss2, test2_l2_loss = get_val_set_loss( network, test_set2, test_set2_word, test2_author_vecs, epoch < kk, args.num_basis) #prev_time = time.time() testacc2, test_acc2, test2_confidence, oov_test2, wrong_test2, correct_test2, auth_acc4, non_auth_acc4, eids4, counts24 = get_val_set_acc( network, test_set2, test_set2_word, test2_author_vecs, test2_author_ids, epoch < kk, args.num_basis) #curr_time = time.time() #print ('time for acc test2: ', curr_time - prev_time) #test_loss2 = get_val_set_loss(network, test_set2, test_set2_word, test_author_vecs, epoch<kk) #test_acc2, oov_test2, wrong_test2, correct_test2, auth_acc4, non_auth_acc4, eids4 = get_val_set_acc(network, test_set2, test_set2_word, test_author_vecs,epoch<kk) #prev_time = time.time() logging.info('epoch %d done', epoch) logging.info( 'train loss: %f, train vae loss: %f, train l2 loss: %f, train acc: %f', train_loss, train_vae_loss, train_l2_loss, trainacc) logging.info( 'val loss: %f, val vae loss: %f, val l2 loss: %f, val acc: %f', val_loss, val_vae_loss, val_l2_loss, valacc) logging.info( 'test loss: %f, test vae loss: %f, test l2 loss: %f, test acc: %f', test_loss, test_vae_loss, test_l2_loss, testacc) logging.info( 'test2 loss: %f, tes2 vae loss: %f, tes2 l2 loss: %f, test2 acc: %f', test_loss2, test_vae_loss2, test2_l2_loss, testacc2) logging.info( ' oov_train: %d/%d, %d, oov_val: %d/%d, %d, oov_test: %d/%d, %d, oov_test2: %d/%d, %d', oov_train, wrong_train, correct_train, oov_val, wrong_val, correct_val, oov_test, wrong_test, correct_test, oov_test2, wrong_test2, correct_test2) logging.info( 'train: author_acc: %f, non_author_acc: %f, ' + str(eids1) + ' ' + str(counts21), auth_acc1, non_auth_acc1) logging.info( 'dev: author_acc: %f, non_author_acc: %f, ' + str(eids2) + ' ' + str(counts22), auth_acc2, non_auth_acc2) logging.info( 'test: author_acc: %f, non_author_acc: %f, ' + str(eids3) + ' ' + str(counts23), auth_acc3, non_auth_acc3) logging.info( 'test2: author_acc: %f, non_author_acc: %f, ' + str(eids4) + ' ' + str(counts24), auth_acc4, non_auth_acc4) if args.plots: test_acc, test_confidence, confusion_matrix, auth_acc, non_auth_acc, eids = get_val_set_acc2( network, test_set, test_set_word, test_author_vecs, epoch < kk, args.num_basis) df_cm = pd.DataFrame(confusion_matrix, index=[i for i in tag_to_ix.keys()], columns=[i for i in tag_to_ix.keys()]) fig = plt.figure(figsize=(10, 7)) sn.heatmap(df_cm, annot=True) fig.savefig('figs/conf_matrix_' + str(epoch) + '.png') #a = input() if args.combine_train_dev: valacc = testacc elif args.combine_train_dev_test: valacc = testacc2 else: valacc = valacc m = network.model if epoch == 0: best_acc = valacc best_epoch = 0 #best_val = val_loss #if args.combine_train_dev: # best_acc = testacc #else: # best_acc = valacc if args.save_model: m.save(args.save_model) logging.info('saving best model') else: #if args.combine_train_dev: # valacc = testacc # #if best_acc < valacc: # early_stopping = 0 # if args.combine_train_dev: # best_acc = testacc # else: # best_acc = valacc if best_acc <= valacc: best_acc = valacc best_epoch = epoch if args.save_model: m.save(args.save_model) logging.info('re-saving best model') #else: # early_stopping+=1 logging.info('best model is at epoch no: %d', best_epoch) logging.info('\nbest model details are at epoch no: %d', best_epoch) #curr_time = time.time() #print ('time for rest junk: ', curr_time - prev_time) ''' if count%batch_size!=0: batch_loss = dy.esum(batch_loss_vec)/len(batch_loss_vec) batch_loss.forward() batch_loss.backward() trainer.update() batch_loss_vec=[] dy.renew_cg() ''' if args.plots: fig = plt.figure() plt.plot(iterations, losses) axes = plt.gca() axes.set_xlim([0, epochs]) axes.set_ylim([0, 10000]) fig.savefig('figs/loss_plot.png')
def __init__(self, pc, epochs=40): self.epochs = epochs self.pc = pc self.trainer = dy.AdamTrainer(pc, alpha=.005) self.BATCH_SIZE = 1 self.lr = .005
def __init__(self, config, encodings, embeddings, runtime=False): self.config = config self.word_embeddings = embeddings self.encodings = encodings self.modelSS = dy.Model() self.modelTok = dy.Model() self.trainerSS = dy.AdamTrainer(self.modelSS, alpha=2e-3, beta_1=0.9, beta_2=0.9) self.trainerTok = dy.AdamTrainer(self.modelTok, alpha=2e-3, beta_1=0.9, beta_2=0.9) # sentence split model from wrappers import CNN, CNNConvLayer, CNNPoolingLayer from utils import orthonormal_VanillaLSTMBuilder # character-level-embeddings self.SS_char_lookup = self.modelSS.add_lookup_parameters( (len(self.encodings.char2int), self.config.ss_char_embeddings_size)) self.SS_char_lookup_casing = self.modelSS.add_lookup_parameters( (3, 5)) # lower, upper N/A self.SS_char_lookup_special = self.modelSS.add_lookup_parameters( (2, self.config.ss_char_embeddings_size + 5)) # lstm-peek network if runtime: self.SS_peek_lstm = dy.VanillaLSTMBuilder( self.config.ss_peek_lstm_layers, self.config.ss_char_embeddings_size + 5, self.config.ss_peek_lstm_size, self.modelSS) else: self.SS_peek_lstm = orthonormal_VanillaLSTMBuilder( self.config.ss_peek_lstm_layers, self.config.ss_char_embeddings_size + 5, self.config.ss_peek_lstm_size, self.modelSS) layer_is = self.config.ss_peek_lstm_size self.SS_aux_softmax_peek_w = self.modelSS.add_parameters((2, layer_is)) self.SS_aux_softmax_peek_b = self.modelSS.add_parameters((2)) if runtime: self.SS_lstm = dy.VanillaLSTMBuilder( self.config.ss_lstm_layers, self.config.ss_char_embeddings_size + 5, self.config.ss_lstm_size, self.modelSS) else: self.SS_lstm = orthonormal_VanillaLSTMBuilder( self.config.ss_lstm_layers, self.config.ss_char_embeddings_size + 5, self.config.ss_lstm_size, self.modelSS) self.SS_aux_softmax_prev_w = self.modelSS.add_parameters( (2, self.config.ss_lstm_size)) self.SS_aux_softmax_prev_b = self.modelSS.add_parameters((2)) # post MLP and softmax self.SS_mlp_w = [] self.SS_mlp_b = [] layer_is = self.config.ss_lstm_size + self.config.ss_peek_lstm_size for layer in self.config.ss_mlp_layers: self.SS_mlp_w.append(self.modelSS.add_parameters( (layer, layer_is))) self.SS_mlp_b.append(self.modelSS.add_parameters((layer))) layer_is = layer self.SS_mlp_softmax_w = self.modelSS.add_parameters((2, layer_is)) self.SS_mlp_softmax_b = self.modelSS.add_parameters((2)) # tokenization model self.TOK_char_lookup = self.modelTok.add_lookup_parameters( (len(self.encodings.char2int), self.config.tok_char_embeddings_size)) self.TOK_char_lookup_casing = self.modelTok.add_lookup_parameters( (3, 5)) # lower, upper N/A self.TOK_char_lookup_special = self.modelTok.add_lookup_parameters( (2, self.config.tok_char_embeddings_size + 5)) self.TOK_word_lookup = self.modelTok.add_lookup_parameters( (len(self.encodings.word2int), self.config.tok_word_embeddings_size)) self.TOK_word_embeddings_special = self.modelTok.add_lookup_parameters( (2, self.word_embeddings.word_embeddings_size)) self.TOK_word_proj_w = self.modelTok.add_parameters( (self.config.tok_word_embeddings_size, self.word_embeddings.word_embeddings_size)) # lstm networks if runtime: self.TOK_backward_lstm = dy.VanillaLSTMBuilder( self.config.tok_char_peek_lstm_layers, self.config.tok_char_embeddings_size + 5, self.config.tok_char_peek_lstm_size, self.modelTok) self.TOK_forward_lstm = dy.VanillaLSTMBuilder( self.config.tok_char_lstm_layers, self.config.tok_char_embeddings_size + 5, self.config.tok_char_lstm_size, self.modelTok) self.TOK_word_lstm = dy.VanillaLSTMBuilder( self.config.tok_word_lstm_layers, self.config.tok_word_embeddings_size, self.config.tok_word_lstm_size, self.modelTok) else: self.TOK_backward_lstm = orthonormal_VanillaLSTMBuilder( self.config.tok_char_peek_lstm_layers, self.config.tok_char_embeddings_size + 5, self.config.tok_char_peek_lstm_size, self.modelTok) self.TOK_forward_lstm = orthonormal_VanillaLSTMBuilder( self.config.tok_char_lstm_layers, self.config.tok_char_embeddings_size + 5, self.config.tok_char_lstm_size, self.modelTok) self.TOK_word_lstm = orthonormal_VanillaLSTMBuilder( self.config.tok_word_lstm_layers, self.config.tok_word_embeddings_size, self.config.tok_word_lstm_size, self.modelTok) self.TOK_mlp_w = [] self.TOK_mlp_b = [] layer_input = self.config.tok_word_lstm_size + self.config.tok_char_lstm_size + self.config.tok_char_peek_lstm_size + 2 + self.config.tok_word_embeddings_size for layer_size in self.config.tok_mlp_layers: self.TOK_mlp_w.append( self.modelTok.add_parameters((layer_size, layer_input))) self.TOK_mlp_b.append(self.modelTok.add_parameters((layer_size))) layer_input = layer_size self.TOK_softmax_w = self.modelTok.add_parameters((2, layer_input)) self.TOK_softmax_b = self.modelTok.add_parameters((2)) self.TOK_softmax_peek_w = self.modelTok.add_parameters( (2, self.config.tok_char_peek_lstm_size)) self.TOK_softmax_peek_b = self.modelTok.add_parameters((2)) self.TOK_softmax_prev_w = self.modelTok.add_parameters( (2, self.config.tok_char_lstm_size)) self.TOK_softmax_prev_b = self.modelTok.add_parameters((2)) self.losses = [] self.losses_tok = []
UNK = w2i["<unk>"] def read_dataset(filename): with open(filename, "r") as f: for line in f: yield [w2i[x] for x in line.strip().split(" ")] # Read in the data train = list(read_dataset("../data/ptb/train.txt")) w2i = defaultdict(lambda: UNK, w2i) dev = list(read_dataset("../data/ptb/valid.txt")) i2w = {v: k for k, v in w2i.items()} nwords = len(w2i) # Start DyNet and define trainer model = dy.Model() trainer = dy.AdamTrainer(model, alpha=0.001) # Define the model W_emb = model.add_lookup_parameters((nwords, EMB_SIZE)) # Word weights at each position W_h_p = model.add_parameters((HID_SIZE, EMB_SIZE * N)) # Weights of the softmax b_h_p = model.add_parameters((HID_SIZE)) # Weights of the softmax W_sm_p = model.add_parameters((nwords, HID_SIZE)) # Weights of the softmax b_sm_p = model.add_parameters((nwords)) # Softmax bias # A function to calculate scores for one value def calc_score_of_histories(words, dropout=0.0): # This will change from a list of histories, to a list of words in each history position words = np.transpose(words) # Lookup the embeddings and concatenate them emb = dy.concatenate([dy.lookup_batch(W_emb, x) for x in words]) # Create the hidden layer
model.enable_dropout() uas = correct_ua / num_tokens las = correct_la / num_tokens print("\nUAS: {0:.4}, LAS: {1:.4}".format(uas, las)) if __name__ == "__main__": basename = "../build/en" index = read_index(basename) train_data = list(map_to_instances(read_conllu("../treebanks/train/en/en.conllu"), index, FIELDS)) pc = dy.ParameterCollection() model = MLPParser(pc, basename="../build/en") model.enable_dropout() trainer = dy.AdamTrainer(pc) print("training sentences: {0}, tokens: {1}".format(len(train_data), sum([len(tree) for tree in train_data]))) batch_size = 50 max_steps = 1000 step = 0 total_loss = 0 batch_loss = [] batch_tokens = 0 dy.renew_cg() for tree in shuffled_stream(train_data): batch_loss.append(arc_loss(model, tree))
def __init__(self, lemmatizer_config, encodings, embeddings, runtime=False): self.config = lemmatizer_config self.encodings = encodings self.embeddings = embeddings self.losses = [] self.model = dy.Model() self.trainer = dy.AdamTrainer(self.model, alpha=2e-3, beta_1=0.9, beta_2=0.9) self.character_network = CharacterNetwork( self.config.tag_embeddings_size, encodings, rnn_size=self.config.char_rnn_size, rnn_layers=self.config.char_rnn_layers, embeddings_size=self.config.char_embeddings, model=self.model, runtime=runtime) self.upos_lookup = self.model.add_lookup_parameters( (len(self.encodings.upos2int), self.config.tag_embeddings_size)) self.xpos_lookup = self.model.add_lookup_parameters( (len(self.encodings.xpos2int), self.config.tag_embeddings_size)) self.attrs_lookup = self.model.add_lookup_parameters( (len(self.encodings.attrs2int), self.config.tag_embeddings_size)) self.char_lookup = self.model.add_lookup_parameters( (len(self.encodings.char2int), self.config.char_embeddings)) if runtime: self.rnn = dy.LSTMBuilder( self.config.rnn_layers, self.config.char_rnn_size * 2 + self.config.char_embeddings, self.config.rnn_size, self.model) else: from utils import orthonormal_VanillaLSTMBuilder self.rnn = orthonormal_VanillaLSTMBuilder( self.config.rnn_layers, self.config.char_rnn_size * 2 + self.config.char_embeddings, self.config.rnn_size, self.model) self.att_w1 = self.model.add_parameters( (200, self.config.char_rnn_size * 2)) self.att_w2 = self.model.add_parameters( (200, self.config.rnn_size + self.config.tag_embeddings_size)) self.att_v = self.model.add_parameters((1, 200)) self.start_lookup = self.model.add_lookup_parameters( (1, self.config.char_rnn_size * 2 + self.config.char_embeddings)) self.softmax_w = self.model.add_parameters( (len(self.encodings.char2int) + 1, self.config.rnn_size)) self.softmax_b = self.model.add_parameters( (len(self.encodings.char2int) + 1)) self.softmax_casing_w = self.model.add_parameters( (2, self.config.rnn_size)) self.softmax_casing_b = self.model.add_parameters((2))
def train(self): trainer = dy.AdamTrainer(self.model) total_processed = 0 current_processed = 0 check_every = len(self.train_trees) / 3 best_dev_score = -np.inf start_time = time.time() batch_size = 10 epochs = 1 for epoch in itertools.count(start=1): if epoch > epochs: break np.random.shuffle(self.train_trees) epoch_start_time = time.time() for start_index in range(0, len(self.train_trees), batch_size): dy.renew_cg() batch_losses = [] for tree in self.train_trees[start_index:start_index + batch_size]: document = [leaf._text for leaf in tree.leaves()] _, loss = self.parse(document, tree) batch_losses.append(loss) total_processed += 1 current_processed += 1 batch_loss = dy.average(batch_losses) batch_loss_value = batch_loss.scalar_value() batch_loss.backward() trainer.update() print( "epoch {:,} " "batch {:,}/{:,} " "processed {:,} " "batch-loss {:.4f} ".format( epoch, start_index // batch_size + 1, int(np.ceil(len(self.train_trees) / batch_size)), total_processed, batch_loss_value, #format_elapsed(epoch_start_time), #format_elapsed(start_time), ) ) if current_processed >= check_every: current_processed -= check_every dev_predicted = [] scores = 0 for tree in self.dev_trees: dy.renew_cg() sentence = [leaf._text for leaf in tree.leaves()] predicted, score = self.parse(sentence) dev_predicted.append((predicted, score.npvalue())) for i, (predicted, score) in enumerate(dev_predicted): scores += score i_path = os.path.join("predicted_long", self.trees_indices[i]) try: os.mkdir("predicted_long") except FileExistsError: pass try: os.mkdir(i_path) except FileExistsError: pass j = 0 while os.path.exists(os.path.join(i_path, str(j))): j += 1 with open(os.path.join(i_path, str(j)), "w") as f: f.write(myTree.tree_utils.output_tree(predicted)) with open(os.path.join(i_path, str(j)+'.score.txt'), 'w') as f: f.write(str(score)) logger.info("Score: {}".format(score))
def __init__(self, vocab, pos, xpos, rels, w2i, c2i, ext_words_train, ext_words_devtest, options): self.model = dy.ParameterCollection() self.pretrained_embs = dy.ParameterCollection() self.learning_rate = options.learning_rate self.trainer = dy.AdamTrainer(self.model, alpha=self.learning_rate, beta_1=0.9, beta_2=0.9, eps=1e-12) self.dropout = float(options.dropout) self.ldims = options.lstm_dims self.hidden2 = options.hidden_2 self.wdims = options.wembedding_dims self.cdims = options.cembedding_dims self.posdims = options.posembedding_dims self.pred_batch_size = options.pred_batch_size self.ext_words_train = { word: ind + 2 for word, ind in ext_words_train.items() } self.ext_words_devtest = { word: ind + 2 for word, ind in ext_words_devtest.items() } self.wordsCount = vocab self.vocab = {word: ind + 2 for word, ind in w2i.items()} self.pos = {word: ind + 2 for ind, word in enumerate(pos)} self.id2pos = {ind: word for word, ind in self.pos.items()} self.xpos = {word: ind + 2 for ind, word in enumerate(xpos)} self.id2xpos = {ind: word for word, ind in self.xpos.items()} self.c2i = c2i self.rels = {word: ind for ind, word in enumerate(rels)} self.irels = {ind: word for word, ind in self.rels.items()} self.vocab['PAD'] = 1 self.pos['PAD'] = 1 self.xpos['PAD'] = 1 self.external_embedding, self.edim, self.edim_out = None, 0, 0 if options.external_embedding is not None: self.external_embedding = np.load(options.external_embedding) self.ext_voc = pickle.load( open(options.external_embedding_voc, "rb")) self.edim = self.external_embedding.shape[1] self.projected_embs = Lin_Projection(self.model, self.edim, self.wdims) self.elookup_train = self.pretrained_embs.add_lookup_parameters( (len(self.ext_words_train) + 2, self.edim)) for word, i in self.ext_words_train.items(): self.elookup_train.init_row( i, self.external_embedding[self.ext_voc[word], :]) self.elookup_train.init_row(0, np.zeros(self.edim)) self.elookup_train.init_row(1, np.zeros(self.edim)) self.elookup_devtest = self.pretrained_embs.add_lookup_parameters( (len(self.ext_words_devtest) + 2, self.edim)) for word, i in self.ext_words_devtest.items(): self.elookup_devtest.init_row( i, self.external_embedding[self.ext_voc[word], :]) self.elookup_devtest.init_row(0, np.zeros(self.edim)) self.elookup_devtest.init_row(1, np.zeros(self.edim)) self.ext_words_train['PAD'] = 1 self.ext_words_devtest['PAD'] = 1 print( 'Load external embeddings. External embeddings vectors dimension', self.edim) #LSTMs self.fwdLSTM1 = LSTM(self.model, self.wdims + self.posdims, self.ldims, forget_bias=0.0) self.bwdLSTM1 = LSTM(self.model, self.wdims + self.posdims, self.ldims, forget_bias=0.0) self.fwdLSTM2 = LSTM(self.model, 2 * self.ldims, self.ldims, forget_bias=0.0) self.bwdLSTM2 = LSTM(self.model, 2 * self.ldims, self.ldims, forget_bias=0.0) self.attention = AttentionDecoder(self.model, len(self.rels), src_ctx_dim=self.ldims * 2, hidden=self.hidden2, dropout=self.dropout) self.HybridCharembs = HybridCharacterAttention(self.model, ldims=400, input_size=self.cdims, output_size=self.wdims, dropout=self.dropout) self.wlookup = self.model.add_lookup_parameters( (len(vocab) + 2, self.wdims), init=dy.ConstInitializer(0)) #0 for unknown 1 for [PAD] self.poslookup = self.model.add_lookup_parameters( (len(self.pos) + 2, self.posdims), init=dy.ConstInitializer(0)) #0 for unknown 1 for [PAD] self.xposlookup = self.model.add_lookup_parameters( (len(self.xpos) + 2, self.posdims), init=dy.ConstInitializer(0)) #0 for unknown 1 for [PAD] self.clookup = self.model.add_lookup_parameters( (len(c2i), self.cdims), init=dy.NormalInitializer()) self.ROOT = self.model.add_parameters((self.wdims * 2), init=dy.ConstInitializer(0))
def __init__(self): # first initialize a computation graph container (or model). self.nnmodel = dynet.Model() # assign the algorithm for backpropagation updates. self.updater = dynet.AdamTrainer(self.nnmodel) num_words, num_tags, num_labels = 4808, 46, 46 word_embed_dim, pos_embed_dim, label_embed_dim = 100, 32, 32 hidden_layer1_dim, hidden_layer2_dim = 600, 600 num_actions = 93 self.minibatch_size = 1000 # create embeddings for words and tag features. self.word_embedding = self.nnmodel.add_lookup_parameters( (num_words, word_embed_dim)) glove_word_embeddings_dict = { } # key is the word, value is the list of 100 embeddings embed_lines = open("glove.6B.100d.txt", 'r').read().splitlines() for line in embed_lines: word = line.split()[0] values = line.split() del values[0] glove_word_embeddings_dict[word] = values vocab_words = open("./data/vocabs.word", 'r').read().splitlines() i = 0 for word_line in vocab_words: word = word_line.split()[0] if (word in glove_word_embeddings_dict): self.word_embedding[i] = np.asarray( glove_word_embeddings_dict[word]) self.pos_embedding = self.nnmodel.add_lookup_parameters( (num_tags, pos_embed_dim)) self.label_embedding = self.nnmodel.add_lookup_parameters( (num_labels, label_embed_dim)) # mbda x: dynet.bmax(.1 * x, x))assign transfer function self.transfer = (lambda x: dynet.bmax(.1 * x, x)) self.input_dim = 20 * (word_embed_dim + pos_embed_dim) + 12 * label_embed_dim self.hidden_layer1 = self.nnmodel.add_parameters( (hidden_layer1_dim, self.input_dim)) self.hidden_layer1_bias = self.nnmodel.add_parameters( hidden_layer1_dim, init=dynet.ConstInitializer(0.2)) self.hidden_layer2 = self.nnmodel.add_parameters( (hidden_layer2_dim, hidden_layer1_dim)) self.hidden_layer2_bias = self.nnmodel.add_parameters( hidden_layer2_dim, init=dynet.ConstInitializer(0.2)) # define the output weight. self.output_layer = self.nnmodel.add_parameters( (num_actions, hidden_layer2_dim)) # define the bias vector and initialize it as zero. self.output_bias = self.nnmodel.add_parameters( num_actions, init=dynet.ConstInitializer(0)) self.dropout_prob = 0.2 ''' You can add more arguments for examples actions and model paths. You need to load your model here. actions: provides indices for actions. it has the same order as the data/vocabs.actions file. ''' # if you prefer to have your own index for actions, change this. self.actions = [ 'SHIFT', 'LEFT-ARC:prep', 'LEFT-ARC:dobj', 'LEFT-ARC:poss', 'LEFT-ARC:amod', 'LEFT-ARC:xcomp', 'LEFT-ARC:mark', 'LEFT-ARC:conj', 'LEFT-ARC:nn', 'LEFT-ARC:rcmod', 'LEFT-ARC:advcl', 'LEFT-ARC:cc', 'LEFT-ARC:pcomp', 'LEFT-ARC:expl', 'LEFT-ARC:tmod', 'LEFT-ARC:csubj', 'LEFT-ARC:number', 'LEFT-ARC:iobj', 'LEFT-ARC:<null>', 'LEFT-ARC:preconj', 'LEFT-ARC:nsubj', 'LEFT-ARC:appos', 'LEFT-ARC:infmod', 'LEFT-ARC:partmod', 'LEFT-ARC:ccomp', 'LEFT-ARC:aux', 'LEFT-ARC:auxpass', 'LEFT-ARC:parataxis', 'LEFT-ARC:det', 'LEFT-ARC:punct', 'LEFT-ARC:discourse', 'LEFT-ARC:dep', 'LEFT-ARC:cop', 'LEFT-ARC:pobj', 'LEFT-ARC:num', 'LEFT-ARC:prt', 'LEFT-ARC:possessive', 'LEFT-ARC:rroot', 'LEFT-ARC:npadvmod', 'LEFT-ARC:mwe', 'LEFT-ARC:neg', 'LEFT-ARC:predet', 'LEFT-ARC:nsubjpass', 'LEFT-ARC:quantmod', 'LEFT-ARC:root', 'LEFT-ARC:acomp', 'LEFT-ARC:advmod', 'RIGHT-ARC:prep', 'RIGHT-ARC:dobj', 'RIGHT-ARC:poss', 'RIGHT-ARC:amod', 'RIGHT-ARC:xcomp', 'RIGHT-ARC:mark', 'RIGHT-ARC:conj', 'RIGHT-ARC:nn', 'RIGHT-ARC:rcmod', 'RIGHT-ARC:advcl', 'RIGHT-ARC:cc', 'RIGHT-ARC:pcomp', 'RIGHT-ARC:expl', 'RIGHT-ARC:tmod', 'RIGHT-ARC:csubj', 'RIGHT-ARC:number', 'RIGHT-ARC:iobj', 'RIGHT-ARC:<null>', 'RIGHT-ARC:preconj', 'RIGHT-ARC:nsubj', 'RIGHT-ARC:appos', 'RIGHT-ARC:infmod', 'RIGHT-ARC:partmod', 'RIGHT-ARC:ccomp', 'RIGHT-ARC:aux', 'RIGHT-ARC:auxpass', 'RIGHT-ARC:parataxis', 'RIGHT-ARC:det', 'RIGHT-ARC:punct', 'RIGHT-ARC:discourse', 'RIGHT-ARC:dep', 'RIGHT-ARC:cop', 'RIGHT-ARC:pobj', 'RIGHT-ARC:num', 'RIGHT-ARC:prt', 'RIGHT-ARC:possessive', 'RIGHT-ARC:rroot', 'RIGHT-ARC:npadvmod', 'RIGHT-ARC:mwe', 'RIGHT-ARC:neg', 'RIGHT-ARC:predet', 'RIGHT-ARC:nsubjpass', 'RIGHT-ARC:quantmod', 'RIGHT-ARC:root', 'RIGHT-ARC:acomp', 'RIGHT-ARC:advmod' ]
def __init__(self, word_size, tag_size, rel_size, input_dim, hidden_dim, pdrop_embs, pdrop_lstm, pdrop_mlp, layers, mlp_dim, arc_dim, biaffine_bias_x_arc, biaffine_bias_y_arc, biaffine_bias_x_rel, biaffine_bias_y_rel, embs_word=None): self._global_step = 0 self._early_stop_count = 0 self._update = False self._best_score = 0. self._best_score_las = 0. self._punct_id = 0 self._masks_w = [] self._masks_t = [] self._vocab_size_w = word_size self._vocab_size_t = tag_size self._vocab_size_r = rel_size self._mlp_dim = mlp_dim self._arc_dim = arc_dim self._rel_dim = mlp_dim - arc_dim self.biaffine_bias_x_arc = biaffine_bias_x_arc self.biaffine_bias_y_arc = biaffine_bias_y_arc self.biaffine_bias_x_rel = biaffine_bias_x_rel self.biaffine_bias_y_rel = biaffine_bias_y_rel self._pc = dy.ParameterCollection() if config.adam: self._trainer = dy.AdamTrainer(self._pc, config.learning_rate, config.beta_1, config.beta_2, config.epsilon) else: # self._trainer = dy.AdadeltaTrainer(self._pc) trainer = dy.SimpleSGDTrainer(self._pc, config.learning_rate) trainer.set_clip_threshold(config.clip_threshold) # self._trainer.set_clip_threshold(1.0) self.params = dict() if embs_word is None: self.lp_w = self._pc.add_lookup_parameters( (word_size, input_dim), init=dy.ConstInitializer(0.)) else: self.lp_w = self._pc.lookup_parameters_from_numpy(embs_word) self.lp_t = self._pc.add_lookup_parameters( (tag_size, input_dim), init=dy.ConstInitializer(0.)) self.emb_root = self._pc.add_lookup_parameters( (2, input_dim), init=dy.ConstInitializer(0.)) # if config.isTest: # self.l2r_lstm = dy.VanillaLSTMBuilder(layers, input_dim * 2, hidden_dim, self._pc) # self.r2l_lstm = dy.VanillaLSTMBuilder(layers, input_dim * 2, hidden_dim, self._pc) # else: # self.l2r_lstm = utils.orthonormal_VanillaLSTMBuilder(layers, input_dim * 2, hidden_dim, self._pc) # self.r2l_lstm = utils.orthonormal_VanillaLSTMBuilder(layers, input_dim * 2, hidden_dim, self._pc) self._pdrop_embs = pdrop_embs self._pdrop_lstm = pdrop_lstm self._pdrop_mlp = pdrop_mlp self.LSTM_builders = [] # f = utils.orthonormal_VanillaLSTMBuilder(layers, input_dim * 2, hidden_dim, self._pc) # b = utils.orthonormal_VanillaLSTMBuilder(layers, input_dim * 2, hidden_dim, self._pc) # # self.LSTM_builders = [f, b] f = utils.orthonormal_VanillaLSTMBuilder(1, input_dim * 2, hidden_dim, self._pc) b = utils.orthonormal_VanillaLSTMBuilder(1, input_dim * 2, hidden_dim, self._pc) self.LSTM_builders.append((f, b)) for i in range(layers - 1): f = utils.orthonormal_VanillaLSTMBuilder(1, 2 * hidden_dim, hidden_dim, self._pc) b = utils.orthonormal_VanillaLSTMBuilder(1, 2 * hidden_dim, hidden_dim, self._pc) self.LSTM_builders.append((f, b)) if config.biaffine: W = utils.orthonormal_initializer(mlp_dim, 2 * hidden_dim) self.mlp_dep = self._pc.parameters_from_numpy(W) self.mlp_head = self._pc.parameters_from_numpy(W) self.mlp_dep_bias = self._pc.add_parameters( (mlp_dim, ), init=dy.ConstInitializer(0.)) self.mlp_head_bias = self._pc.add_parameters( (mlp_dim, ), init=dy.ConstInitializer(0.)) else: W = utils.orthonormal_initializer(mlp_dim * 2, 2 * hidden_dim) self.mlp = self._pc.parameters_from_numpy(W) self.mlp_bias = self._pc.add_parameters( (mlp_dim * 2, ), init=dy.ConstInitializer(0.)) # self.mlp_arc_size = mlp_arc_size # self.mlp_rel_size = mlp_rel_size # self.dropout_mlp = dropout_mlp if config.biaffine: self.W_arc = self._pc.add_parameters( (self._arc_dim, self._arc_dim + 1), init=dy.ConstInitializer(0.)) self.W_rel = self._pc.add_parameters( (self._vocab_size_r * (self._rel_dim + 1), self._rel_dim + 1), init=dy.ConstInitializer(0.)) else: self.V_r_arc = self._pc.add_parameters((self._arc_dim)) self.V_i_arc = self._pc.add_parameters((self._arc_dim)) self.bias_arc = self._pc.add_parameters((self._arc_dim * 2)) self.V_r_rel = self._pc.add_parameters( (self._rel_dim * self._vocab_size_r)) self.V_i_rel = self._pc.add_parameters( (self._rel_dim * self._vocab_size_r)) self.bias_rel = self._pc.add_parameters( (self._rel_dim * self._vocab_size_r * 2)) return
def __init__(self, args, vocabLengthSource, vocabLengthActionRule, vocabLengthNodes, vocabLengthTarget): self.flag_copy = True self.vocabLengthSource = vocabLengthSource self.vocabLengthActionRule = vocabLengthActionRule self.vocabLengthNodes = vocabLengthNodes self.vocabLengthTarget = vocabLengthTarget # parameters for the model self.numLayer = args.numLayer self.embeddingSourceSize = args.embeddingSourceSize self.embeddingApplySize = args.embeddingApplySize self.embeddingGenSize = args.embeddingGenSize self.embeddingNodeSize = args.embeddingNodeSize self.hiddenSize = args.hiddenSize self.attSize = args.attSize self.pointerSize = args.pointerSize self.dropout = args.dropout self.embeddingRuletypeSize = 2 self.learningRate = args.learningRate self.model = dy.ParameterCollection() self.trainer = dy.AdamTrainer(self.model, alpha=self.learningRate) # source lookup self.sourceLookup = self.model.add_lookup_parameters( (self.vocabLengthSource, self.embeddingSourceSize)) # action embeddging matrix self.actionRuleLookup = self.model.add_lookup_parameters( (self.vocabLengthActionRule, self.embeddingApplySize)) # for node type lookup self.nodeTypeLookup = self.model.add_lookup_parameters( (self.vocabLengthNodes, self.embeddingNodeSize)) # gor gen type lookup self.gentokenLookup = self.model.add_lookup_parameters( (self.vocabLengthTarget, self.embeddingGenSize)) # adding paramteters to the AST Neural Network self.attentionSource = self.model.add_parameters( (self.attSize, self.hiddenSize * 2)) self.attentionTarget = self.model.add_parameters( (self.attSize, self.numLayer * self.hiddenSize * 2)) self.attentionParameter = self.model.add_parameters((1, self.attSize)) self.w_selection_gen_softmax = self.model.add_parameters( (2, self.hiddenSize)) self.w_out_rule = self.model.add_parameters( (self.embeddingApplySize, self.hiddenSize)) # should change whe hidden layers increase self.b_out_rule = self.model.add_parameters((self.embeddingApplySize)) self.w_out_vocab = self.model.add_parameters( (self.embeddingApplySize, self.hiddenSize + self.hiddenSize * 2)) # should change whe hidden layers increase self.b_out_vocab = self.model.add_parameters((self.embeddingApplySize)) self.w_pointer_hidden = self.model.add_parameters( (self.pointerSize, 2 * self.hiddenSize + 2 * self.hiddenSize + self.hiddenSize)) self.b_pointer_hidden = self.model.add_parameters((self.pointerSize)) self.w_pointer_out = self.model.add_parameters((1, self.pointerSize)) self.b_pointer_out = self.model.add_parameters((1)) # initializing the encoder and decoder self.forward_encoder = dy.LSTMBuilder(self.numLayer, self.embeddingSourceSize, self.hiddenSize, self.model) self.backward_encoder = dy.LSTMBuilder(self.numLayer, self.embeddingSourceSize, self.hiddenSize, self.model) # check this # embedding size + (previous action embedding + context vector + node type mebedding + parnnet feeding ) # parent feeding - hidden states of parent action + embedding of parent action self.inputDecoderSize = self.embeddingApplySize + self.hiddenSize * 2 + self.hiddenSize + self.embeddingApplySize + self.embeddingNodeSize self.decoder = dy.VanillaLSTMBuilder(self.numLayer, self.inputDecoderSize, self.hiddenSize, self.model)
EPOCHS = 500 BATCH_SIZE = 50 HIDDEN_LAYER = 1 NUM_OUT = 100 VOCAB_SIZE = 13 start_time = time.time() model = dn.Model() input_lookup = model.add_lookup_parameters((VOCAB_SIZE, EMBEDDINGS_SIZE)) lstm = dn.LSTMBuilder(LSTM_NUM_OF_LAYERS, EMBEDDINGS_SIZE, STATE_SIZE, model) output_w_1 = model.add_parameters((VOCAB_SIZE, STATE_SIZE)) output_b_1 = model.add_parameters((VOCAB_SIZE)) output_w_2 = model.add_parameters((NUM_OUT, VOCAB_SIZE)) output_b_2 = model.add_parameters((NUM_OUT)) trainer = dn.AdamTrainer(model) def passed_time(previous_time): return round(time.time() - previous_time, 3) def read_to_data(name_file): words_tags = [] max_length = 0 words = open(name_file, "r").read().split('\n') for word_tag in words: if word_tag != "": word, tag = word_tag.split('/') words_tags.append((word, tag)) if len(word) > max_length:
def __init__(self, vocab, pos, rels, w2i, options): self.model = dn.Model() random.seed(1) self.trainer = dn.AdamTrainer(self.model) self.activation = self.activations[options.activation] self.decoder = self.decoders[options.decoder](options) self.test_decoder = self.decoders[options.test_decoder](options) \ if options.test_decoder is not None \ else self.decoder self.cost_augment = cost_augments[options.cost_augment] self.labelsFlag = options.labelsFlag self.options = options self.ldims = options.lstm_dims self.wdims = options.wembedding_dims self.pdims = options.pembedding_dims self.rdims = options.rembedding_dims self.layers = options.lstm_layers self.wordsCount = vocab self.vocab = {word: ind + 3 for word, ind in w2i.iteritems()} self.pos = {word: ind + 3 for ind, word in enumerate(pos)} self.rels = {word: ind for ind, word in enumerate(rels)} # type: dict[str, int] self.irels = rels if options.external_embedding is not None: self.extrnd, self.elookup, self.edim = nn.get_external_embedding( self.model, options.external_embedding) logger.info('Load external embedding. Vector dimensions %d', self.edim) else: self.extrnd, self.elookup, self.edim = None, None, 0 dims = self.wdims + self.pdims + self.edim self.rnn = nn.BiLSTM(self.model, [dims] + [self.ldims * 2] * options.lstm_layers) self.hidden_units = options.hidden_units self.hidden2_units = options.hidden2_units self.vocab['*PAD*'] = 1 self.pos['*PAD*'] = 1 self.vocab['*INITIAL*'] = 2 self.pos['*INITIAL*'] = 2 self.wlookup = self.model.add_lookup_parameters( (len(vocab) + 3, self.wdims)) self.plookup = self.model.add_lookup_parameters( (len(pos) + 3, self.pdims)) self.rlookup = self.model.add_lookup_parameters( (len(rels), self.rdims)) if self.hidden2_units > 0: dense_dims = [self.hidden_units, self.hidden2_units, 1] use_bias = [True, False] else: dense_dims = [self.hidden_units, 1] # use_bias = [dn.NormalInitializer(0, 0)] use_bias = [False] self.head_dense_layer = DenseLayers( self.model, [self.ldims * 2, self.hidden_units], self.activation) self.dep_dense_layer = DenseLayers(self.model, [self.ldims * 2, self.hidden_units], self.activation) self.fusion_layer = nn.Biaffine(self.model, self.hidden_units, self.activation) if self.labelsFlag: self.relation_binear_layer = BiLinear(self.model, self.ldims * 2, self.hidden_units) relation_dense_dims = list(dense_dims) relation_dense_dims[-1] = len(self.irels) self.relation_dense_layer = DenseLayers(self.model, relation_dense_dims, self.activation)
meta.n_out = len(meta.tdmaps) meta.rmaps = {v: k for k, v in meta.tdmaps.items()} meta.w2i = {} for w in wvm.vocab: meta.w2i[w] = wvm.vocab[w].index if args.save_model: pickle.dump(meta, open('%s.meta' % args.save_model, 'wb')) if args.load_model: ontoparser = SubsumptionLearning(model=args.load_model) else: ontoparser = SubsumptionLearning(meta=meta) trainers = { 'momsgd': dy.MomentumSGDTrainer(ontoparser.model, edecay=0.25), 'adam': dy.AdamTrainer(ontoparser.model, edecay=0.25), 'simsgd': dy.SimpleSGDTrainer(ontoparser.model, edecay=0.25), 'adagrad': dy.AdagradTrainer(ontoparser.model, edecay=0.25), 'adadelta': dy.AdadeltaTrainer(ontoparser.model, edecay=0.25) } trainer = trainers[args.trainer] nntraining(train_sents) if args.dev: accuracy = Test(inputGenDev) sys.stdout.write("Accuracy: {}%\n".format(accuracy)) if args.isDaemon and args.daemonPort: sys.stderr.write('Leastening at port %d\n' % args.daemonPort) host = "0.0.0.0" #Listen on all interfaces port = args.daemonPort #Port number
import copy import dynet as dy import numpy as np import os from common import BEGIN_CHAR, STOP_CHAR, UNK_CHAR, SRC_FOLDER, RESULTS_FOLDER, DATA_FOLDER, check_path, write_pred_file, write_param_file, write_eval_file from vocab_builder import build_vocabulary, Vocab from norm_soft import SoftDataSet MAX_PRED_SEQ_LEN = 50 # option OPTIMIZERS = { 'ADAM': lambda m: dy.AdamTrainer( m, lam=0.0, alpha=0.0001, #common beta_1=0.9, beta_2=0.999, eps=1e-8), 'SGD': dy.SimpleSGDTrainer, 'ADADELTA': dy.AdadeltaTrainer } ### IO handling and evaluation def load_data(filename, input_format, lowercase=False): """ Load data from file
def main(): # Read in data wids_en=defaultdict(lambda: len(wids_en)) wids_de=defaultdict(lambda: len(wids_de)) train_sentences_en=readData.read_corpus(wids_en,mode="train",update_dict=True,min_frequency=MIN_EN_FREQUENCY,language="en") train_sentences_de=readData.read_corpus(wids_de,mode="train",update_dict=True,min_frequency=MIN_DE_FREQUENCY,language="de") enDictionaryFile="Models/"+"en-dict_"+str(MIN_EN_FREQUENCY)+".txt" deDictionaryFile="Models/"+"de-dict_"+str(MIN_DE_FREQUENCY)+".txt" dicFile=open(enDictionaryFile,"w") print len(wids_en) for key in wids_en: dicFile.write(key+" "+str(wids_en[key])+"\n") dicFile.close() print "Writing EN" dicFile=open(deDictionaryFile,"w") print len(wids_de) for key in wids_en: dicFile.write(key+" "+str(wids_de[key])+"\n") dicFile.close() print "Writing DE" reverse_wids_en=reverseDictionary(wids_en) reverse_wids_de=reverseDictionary(wids_de) valid_sentences_en=readData.read_corpus(wids_en,mode="valid",update_dict=False,min_frequency=MIN_EN_FREQUENCY,language="en") valid_sentences_de=readData.read_corpus(wids_de,mode="valid",update_dict=False,min_frequency=MIN_DE_FREQUENCY,language="de") train_sentences=zip(train_sentences_de,train_sentences_en) valid_sentences=zip(valid_sentences_de,valid_sentences_en) for train_sentence in train_sentences[:10]: print "German:",[reverse_wids_de[x] for x in train_sentence[0]] print "English:",[reverse_wids_en[x] for x in train_sentence[1]] train_sentences=train_sentences[:MAX_TRAIN_SENTENCES] valid_sentences=valid_sentences print "Number of Training Sentences:",len(train_sentences) print "Number of Validation Sentences:",len(valid_sentences) VOCAB_SIZE_EN=len(wids_en) VOCAB_SIZE_DE=len(wids_de) random.shuffle(train_sentences) random.shuffle(valid_sentences) #Prepare batches lengthMap={} for x in train_sentences: if len(x[0]) not in lengthMap: lengthMap[len(x[0])]=[] lengthMap[len(x[0])].append(x) print "Number of Different Lengths:",len(lengthMap) train_batches=[] for megaBatch in lengthMap.values(): index=0 while index<len(megaBatch): if index%BATCH_SIZE==0: batch=megaBatch[index:min(index+BATCH_SIZE,len(megaBatch))] train_batches.append(batch) index+=BATCH_SIZE print [len(batch) for batch in train_batches] print sum([len(batch) for batch in train_batches]) #Free some memory.Dump useless references train_sentences=None train_sentences_en=None train_sentences_de=None #Specify model model=dy.Model() encoder=dy.LSTMBuilder(LAYER_DEPTH,EMB_SIZE,HIDDEN_SIZE,model) revcoder=dy.LSTMBuilder(LAYER_DEPTH,EMB_SIZE,HIDDEN_SIZE,model) decoder=dy.LSTMBuilder(LAYER_DEPTH,EMB_SIZE+HIDDEN_SIZE,HIDDEN_SIZE,model) encoder_params={} encoder_params["lookup"]=model.add_lookup_parameters((VOCAB_SIZE_DE,EMB_SIZE)) decoder_params={} decoder_params["lookup"]=model.add_lookup_parameters((VOCAB_SIZE_EN,EMB_SIZE)) decoder_params["R"]=model.add_parameters((VOCAB_SIZE_EN,2*HIDDEN_SIZE)) decoder_params["bias"]=model.add_parameters((VOCAB_SIZE_EN)) trainer=dy.AdamTrainer(model) totalSentences=0 sentencesCovered=totalSentences/3200 startTime=datetime.datetime.now() print "Start Time",startTime for epochId in xrange(NUM_EPOCHS): random.shuffle(train_batches) for batchId,batch in enumerate(train_batches): if len(batch)>1: totalSentences+=len(batch) if totalSentences/3200>sentencesCovered: sentencesCovered=totalSentences/3200 print "Sentences covered:",totalSentences,"Current Time",datetime.datetime.now() sentence_de=[sentence[0] for sentence in batch] sentence_en=[sentence[1] for sentence in batch] loss,words=do_one_batch(model,encoder,revcoder,decoder,encoder_params,decoder_params,sentence_de,sentence_en) loss.value() loss.backward() trainer.update() else: totalSentences+=1 #print "Sentences covered:",totalSentences sentence=batch[0] sentence_de=sentence[0] sentence_en=sentence[1] loss,words=do_one_example(model,encoder,revcoder,decoder,encoder_params,decoder_params,sentence_de,sentence_en) loss.value() loss.backward() trainer.update() #if totalSentences%1000<20: # print "Total Sentences Covered:",totalSentences perplexity=0.0 totalLoss=0.0 totalWords=0.0 for valid_sentence in valid_sentences: valid_sentence_de=valid_sentence[0] valid_sentence_en=valid_sentence[1] validLoss,words=do_one_example(model,encoder,revcoder,decoder,encoder_params,decoder_params,valid_sentence_de,valid_sentence_en) totalLoss+=float(validLoss.value()) totalWords+=words print totalLoss print totalWords perplexity=math.exp(totalLoss/totalWords) print "Validation perplexity after epoch:",epochId,"Perplexity:",perplexity,"Time:",datetime.datetime.now() trainer.update_epoch(1.0) #Save Model modelFile="Models/"+"barebones_enc_dec_batched"+"_"+str(datetime.datetime.now())+"_"+str(EMB_SIZE)+"_"+str(LAYER_DEPTH)+"_"+str(HIDDEN_SIZE)+"_"+str(MIN_EN_FREQUENCY)+"_"+str(MIN_DE_FREQUENCY) model.save(modelFile,[encoder,revcoder,decoder,encoder_params["lookup"],decoder_params["lookup"],decoder_params["R"],decoder_params["bias"]]) return wids_de,wids_en,modelFile
with open(filename, "r") as f: for line in f: tag, words = line.lower().strip().split(" ||| ") yield ([w2i[x] for x in words.split(" ")], t2i[tag]) # Read in the data train = list(read_dataset("../data/classes/train.txt")) w2i = defaultdict(lambda: UNK, w2i) dev = list(read_dataset("../data/classes/test.txt")) nwords = len(w2i) ntags = len(t2i) # Start DyNet and define trainer model = dy.Model() trainer = dy.AdamTrainer(model) # Define the model EMB_SIZE = 64 W_emb = model.add_lookup_parameters( (nwords, 1, 1, EMB_SIZE)) # Word embeddings WIN_SIZE = 3 FILTER_SIZE = 64 W_cnn = model.add_parameters( (1, WIN_SIZE, EMB_SIZE, FILTER_SIZE)) # cnn weights b_cnn = model.add_parameters((FILTER_SIZE)) # cnn bias W_sm = model.add_parameters((ntags, FILTER_SIZE)) # Softmax weights b_sm = model.add_parameters((ntags)) # Softmax bias
lm_words = [] for sent in lm_train: for w in sent: lm_words.append(w) lm_words.append("_UNK_") lm_w2i = defaultdict(count(0).next) for word in lm_words: lm_w2i[word] lm_i2w = {i: w for w, i in lm_w2i.iteritems()} lm_nwords = len(lm_w2i) lm_model = dy.Model() lm_trainer = dy.AdamTrainer(lm_model) lm_WORDS_LOOKUP = lm_model.add_lookup_parameters((lm_nwords, 64)) lm_RNN = dy.LSTMBuilder(1, 64, 128, lm_model) lm_pW = lm_model.add_parameters((lm_nwords, 128)) lm_pb = lm_model.add_parameters(lm_nwords) def calc_lm_loss(sent): dy.renew_cg() W = dy.parameter(lm_pW) b = dy.parameter(lm_pb) f_init = lm_RNN.initial_state() wids = [] for w in sent: if w in lm_words: