def save_shared_parameters(self): print("Saving the encoder parameter") # self.word_embedding.save("models/word_embedding.m") dy.save("basename", [ self.char_rnn.char_emb, self.char_rnn.fw_lstm, self.char_rnn.bw_lstm, self.word_embedding, self.bilstm ])
def save_checkpoint(self, save_model=True): assert self.model is not None, 'no model built' if save_model: dy.save(self.model_checkpoint_path, [self.lm]) self.word_vocab.save(self.word_vocab_path) self.label_vocab.save(self.label_vocab_path) with open(self.state_checkpoint_path, 'w') as f: state = { 'model': 'rnn-lm', 'multitask': self.multitask, 'num-params': int(self.lm.num_params), 'num-epochs': self.current_epoch, 'num-updates': self.num_updates, 'elapsed': self.timer.format_elapsed(), 'current-lr': self.get_lr(), 'best-dev-perplexity': self.best_dev_perplexity, 'best-dev-perplexity-epoch': self.best_dev_epoch, 'test-perplexity': self.test_perplexity, } json.dump(state, f, indent=4)
def check_performance_and_save(parser, best_dev_fscore, best_dev_model_path, treebank, sentence_embeddings, args): dev_fscore, dev_start_time = check_performance(parser, treebank, sentence_embeddings, args) print('dev-fscore {}\ndev-elapsed {}'.format(dev_fscore, format_elapsed(dev_start_time))) if dev_fscore.fscore > best_dev_fscore: if best_dev_model_path is not None: for ext in [".data", ".meta"]: path = best_dev_model_path + ext if os.path.exists(path): print("Removing previous model file {}...".format(path)) os.remove(path) best_dev_fscore = dev_fscore.fscore best_dev_model_path = "{}_dev={:.2f}".format(args.model_path_base, dev_fscore.fscore) print("Saving new best model to {}...".format(best_dev_model_path)) dy.save(best_dev_model_path, [parser]) return best_dev_fscore, best_dev_model_path
def save_model(self): self.finalize() d = { "param_keys": list(self.params.keys()), "max_num_labels": self.max_num_labels, "layers": self.layers, "layer_dim": self.layer_dim, "output_dim": self.output_dim, "activation": self.activation_str, "init": self.init_str, } d.update(self.save_extra()) started = time.time() try: os.remove(self.filename) print("Removed existing '%s'." % self.filename) except OSError: pass print("Saving model to '%s'... " % self.filename, end="", flush=True) try: dy.save(self.filename, self.params.values()) print("Done (%.3fs)." % (time.time() - started)) except ValueError as e: print("Failed saving model: %s" % e) return d
def train_network(config, saver, parser, embeddings, train_examples, dev_set, test_set): best_dev_UAS = 0 model = ParserModel(config, embeddings, parser) parser.model = model for epoch in range(config.n_epochs): print "Epoch {:} out of {:}".format(epoch + 1, config.n_epochs) dev_UAS = run_epoch(model, config, parser, train_examples, dev_set) if dev_UAS > best_dev_UAS: best_dev_UAS = dev_UAS if not saver: print "New best dev UAS! Saving model in ./data/weights/parser.weights" dy.save('./data/weights/parser.weights', [model.pW, model.pB1, model.pU, model.pB2]) if saver: print 80 * "=" print "TESTING" print 80 * "=" print "Restoring the best model weights found on the dev set" model.pW, model.pB1, model.pU, model.pB2 = dy.load( './data/weights/parser.weights', model.m) print "Final evaluation on test set", UAS, dependencies = parser.parse(test_set) print "- test UAS: {:.2f}".format(UAS * 100.0) print "Writing predictions" with open('q2_test.predicted.pkl', 'w') as f: cPickle.dump(dependencies, f, -1) print "Done!"
def save(self, basename): '''save parameters, lookup parameters and builder objects to disk @param basename: string, file to save into, used in dy.save() @return: None ''' obs = [self.pb, self.pe, self.pT] dy.save(basename, obs)
def save_checkpoint(self): assert self.model is not None, 'no model built' dy.save(self.model_checkpoint_path, [self.parser]) self.word_vocab.save(self.word_vocab_path) self.label_vocab.save(self.label_vocab_path) self.action_vocab.save(self.action_vocab_path) with open(self.state_checkpoint_path, 'w') as f: state = { 'model': self.model_type, 'num-params': int(self.parser.num_params), 'num-epochs': self.current_epoch, 'num-updates': self.num_updates, 'elapsed': self.timer.format_elapsed(), 'current-lr': self.get_lr(), 'best-dev-epoch': self.best_dev_epoch, 'current-dev-fscore': self.current_dev_fscore, 'best-dev-fscore': self.best_dev_fscore, 'test-fscore': self.test_fscore, 'current-dev-perplexity': self.current_dev_perplexity, 'best-dev-perplexity': self.best_dev_perplexity, 'test-perplexity': self.test_perplexity } json.dump(state, f, indent=4)
def save_to_disk(self, filename): dy.save(filename, [ self.enc_fwd_lstm, self.enc_bwd_lstm, self.dec_lstm, self.input_lookup, self.attention_w1, self.attention_w2, self.attention_v, self.decoder_w, self.decoder_b, self.output_lookup ])
def save_model(self, path, model_version, fold=None): full_saving_path = os.path.join(path, "model_" + model_version) if not os.path.exists(full_saving_path): os.makedirs(full_saving_path) # list of all the vars we need to remove since these are dynet features (they will be saved soon separately nn_vars = [ 'W_emb', 'W_cnn', 'b_cnn', 'W_mlp', 'b_mlp', 'V_mlp', 'a_mlp' ] # saving these features (it will be saved in the desired directory under the model name) dy.save( os.path.join(full_saving_path, 'model_' + model_version + '_fold' + str(fold)), [getattr(self, i) for i in nn_vars]) obj_to_save = copy.copy(self) # now setting these to None, since we cannot save them as is in a pickle type for n in nn_vars: setattr(obj_to_save, n, None) obj_to_save.model = None # converting default dict into dict, since pickle can only save dict objects and not defaultdict ones obj_to_save.w2i = dict(obj_to_save.w2i) obj_to_save.t2i = dict(obj_to_save.t2i) pickle.dump( obj_to_save, open( os.path.join( full_saving_path, 'model_' + model_version + '_fold' + str(fold) + ".p"), "wb"))
def save_param_values(self, filename, values): remove_existing(filename + ".data", filename + ".meta") try: self.set_weight_decay_lambda(0.0) # Avoid applying weight decay due to clab/dynet#1206, we apply it on load dy.save(filename, tqdm(values, desc="Saving model to '%s'" % filename, unit="param", file=sys.stdout)) self.set_weight_decay_lambda() except ValueError as e: print("Failed saving model: %s" % e)
def save_model(self, filename): dy.renew_cg() with open(filename + "_output_w.txt", "w") as f: f.write(np.array_str(dy.parameter(self.output_w).npvalue()) + "\n") with open(filename + "_output_b.txt", "w") as f: f.write(np.array_str(dy.parameter(self.output_b).npvalue()) + "\n") dy.save("models/" + filename + "_train.model", [self.output_w, self.output_b, self.embeddings, self.RNN])
def save_latest_model(model_path_base, parser): latest_model_path = "{}_latest_model".format(model_path_base) for ext in [".data", ".meta"]: path = latest_model_path + ext if os.path.exists(path): print("Removing previous model file {}...".format(path)) os.remove(path) print("Saving new model to {}...".format(latest_model_path)) dy.save(latest_model_path, [parser])
def test_save_load(self): self.p.forward() self.p.backward() self.t.update() dy.renew_cg() v1 = self.p.value() dy.save(self.file, [self.p]) [p2] = dy.load(self.file, self.m2) v2 = p2.value() self.assertTrue(np.allclose(v1, v2))
def train_network(params, ntags, train_data, dev_set): global telemetry_file, randstring, MIN_ACC prev_acc = 0 m = params[0] t0 = time.clock() # train the network trainer = dy.SimpleSGDTrainer(m) total_loss = 0 seen_instances = 0 train_good = 0 for train_x, train_y in train_data: dy.renew_cg() output = build_network(params, train_x) # l2 regularization did not look promising at all, so it's commented out loss = -dy.log(output[train_y]) + REG_LAMBDA * sum( [dy.l2_norm(p) for p in params[2:]]) if train_y == np.argmax(output.npvalue()): train_good += 1 seen_instances += 1 total_loss += loss.value() loss.backward() trainer.update() if seen_instances % 20000 == 0: # measure elapsed seconds secs = time.clock() - t0 t0 = time.clock() good = case = 0 max_dev_instances = 70 * 1000 dev_instances = 0 for x_tuple, dev_y in dev_set: output = build_network(params, x_tuple) if np.argmax(output.npvalue()) == dev_y: good += 1 case += 1 dev_instances += 1 if dev_instances >= max_dev_instances: break acc = float(good) / case print( "iterations: {}. train_accuracy: {} accuracy: {} avg loss: {} secs per 1000:{}" .format(seen_instances, float(train_good) / 20000, acc, total_loss / (seen_instances + 1), secs / 20)) train_good = 0 if acc > MIN_ACC and acc > prev_acc: print("saving.") dy.save("params_" + randstring, list(params)[1:]) prev_acc = acc telemetry_file.write("{}\t{}\t{}\t{}\n".format( seen_instances, acc, total_loss / (seen_instances + 1), secs / 20)) MIN_ACC = max(prev_acc, MIN_ACC)
def fit(self, saver, parser, train_examples, dev_set): best_dev_UAS = 0 for epoch in range(self.config.n_epochs): print "Epoch {:} out of {:}".format(epoch + 1, self.config.n_epochs) dev_UAS = self.run_epoch(parser, train_examples, dev_set) if dev_UAS > best_dev_UAS: best_dev_UAS = dev_UAS if saver: print "New best dev UAS! Saving model in ./data/weights/parser.weights" dy.save('./data/weights/parser.weights') print
def predit_sql(sentence): tokens = sentence.strip().split() word_ids = [vocab_words.w2i.get(word, UNK) for word in tokens] tag_ids = [0 for t in tokens] pred_tags, pred_template, _ = build_tagging_graph(word_ids, tag_ids, 0, builders, False) pred_complete = insert_tagged_tokens(tokens, pred_tags, pred_template) dy.save("model_new", [ pEmbedding, pOutput, builders[0], builders[1], pHiddenTemplate, pOutputTemplate ]) print(pred_tags) print("SQL query generated: ", pred_complete)
def save(self, filename, initial_weights=None, save_with_embeddings=True): # model payload if save_with_embeddings: np.save(filename + '-embs.npy', self.embeddings.as_array()) # self.model.save(filename + '.dyn') # saves all embeddings - move next row to else dy.save(filename + '.dyn', [self.ergm_weights] +\ [self.word_assoc_weights[r] for r in self.relation_names]) # order matters for loading # feature ordering pickle.dump(self.feature_set, open(filename + '.feats', 'wb')) # nice-to-read score summary if initial_weights is not None: self.save_weights(filename, initial_weights)
def save(self, file_name): members_to_save = [] members_to_save.append(self.char_lookup) members_to_save.append(self.char_fwd_lstm) members_to_save.append(self.char_bwd_lstm) members_to_save.append(self.lstm_to_rep_params) members_to_save.append(self.lstm_to_rep_bias) members_to_save.append(self.mlp_out) members_to_save.append(self.mlp_out_bias) # self._model.save(file_name, members_to_save) dy.save(file_name, members_to_save) # character mapping saved separately cPickle.dump(self.c2i, open(file_name[:-4] + '.c2i', 'w'))
def model_save_helper(mode, prefix, savable, options): if mode == "dynet": # noinspection PyArgumentList dn.save(prefix, [savable]) with open(prefix + ".options", "wb") as f: pickle.dump(options, f) elif mode == "pickle": picklable = savable.get_picklable_obj() with open(prefix, "wb") as f: pickle.dump((options, picklable), f) elif mode == "pickle-gzip": picklable = savable.get_picklable_obj() with gzip.open(prefix, "wb") as f: pickle.dump((options, picklable), f) else: raise TypeError("Invalid model format.")
def check_dev(): nonlocal best_dev_fscore nonlocal best_dev_model_path dev_start_time = time.time() dev_predicted = [] for tree in dev_treebank: dy.renew_cg() sentence = [(leaf.tag, leaf.word) for leaf in tree.leaves()] predicted, _, _ = parser.parse(sentence) if args.parser_type == "bottom-up": dev_predicted.append(predicted) else: dev_predicted.append(predicted.convert()) dev_fscore = evaluate.evalb(args.evalb_dir, dev_treebank, dev_predicted, args.parser_type) print("dev-fscore {} " "dev-elapsed {} " "total-elapsed {}".format( dev_fscore, format_elapsed(dev_start_time), format_elapsed(start_time), )) if dev_fscore.fscore > best_dev_fscore: if best_dev_model_path is not None: for ext in [".data", ".meta"]: path = best_dev_model_path + ext if os.path.exists(path): print( "Removing previous model file {}...".format(path)) os.remove(path) best_dev_fscore = dev_fscore.fscore best_dev_model_path = "{}_dev={:.2f}".format( args.model_path_base, dev_fscore.fscore) print("Saving new best model to {}...".format(best_dev_model_path)) dy.save(best_dev_model_path, [parser])
def check_dev(): nonlocal best_dev_fscore nonlocal best_dev_model_path dev_start_time = time.time() dev_predicted = [] #dev_gold = [] #dev_gold = latent_tree.build_latent_trees(dev_chunk_insts) dev_gold = [] for inst in dev_chunk_insts: chunks = util.inst2chunks(inst) dev_gold.append(chunks) for x, chunks in dev_chunk_insts: dy.renew_cg() #sentence = [(leaf.tag, leaf.word) for leaf in tree.leaves()] sentence = [(parse.XX, ch) for ch in x] predicted, _ = parser.parse(sentence) dev_predicted.append(predicted.convert().to_chunks()) #dev_fscore = evaluate.evalb(args.evalb_dir, dev_gold, dev_predicted, args.expname + '.dev.') #evalb dev_fscore = evaluate.eval_chunks2(args.evalb_dir, dev_gold, dev_predicted, output_filename=args.expname + '.dev.txt') # evalb print("dev-fscore {} " "dev-elapsed {} " "total-elapsed {}".format( dev_fscore, format_elapsed(dev_start_time), format_elapsed(start_time), )) if dev_fscore.fscore > best_dev_fscore: if best_dev_model_path is not None: for ext in [".data", ".meta"]: path = best_dev_model_path + ext if os.path.exists(path): print( "Removing previous model file {}...".format(path)) os.remove(path) best_dev_fscore = dev_fscore.fscore best_dev_model_path = "{}_dev={:.2f}".format( args.model_path_base + "_" + args.expname, dev_fscore.fscore) print("Saving new best model to {}...".format(best_dev_model_path)) dy.save(best_dev_model_path, [parser]) test_start_time = time.time() test_predicted = [] #test_gold = latent_tree.build_latent_trees(test_chunk_insts) test_gold = [] for inst in test_chunk_insts: chunks = util.inst2chunks(inst) test_gold.append(chunks) ftreelog = open(args.expname + '.test.predtree.txt', 'w', encoding='utf-8') for x, chunks in test_chunk_insts: dy.renew_cg() #sentence = [(leaf.tag, leaf.word) for leaf in tree.leaves()] sentence = [(parse.XX, ch) for ch in x] predicted, _ = parser.parse(sentence) pred_tree = predicted.convert() ftreelog.write(pred_tree.linearize() + '\n') test_predicted.append(pred_tree.to_chunks()) ftreelog.close() #test_fscore = evaluate.evalb(args.evalb_dir, test_chunk_insts, test_predicted, args.expname + '.test.') test_fscore = evaluate.eval_chunks2(args.evalb_dir, test_gold, test_predicted, output_filename=args.expname + '.test.txt') # evalb print("epoch {:,} " "test-fscore {} " "test-elapsed {} " "total-elapsed {}".format( epoch, test_fscore, format_elapsed(test_start_time), format_elapsed(start_time), ))
def tagger(repre, mission, char_lstm, lin_dim, bi_lstm1, bi_lstm2, embed_size, epochs, lr, batch_size, train_file, dev_file, per_log, dev_per_log, saved_model_path, test_model_after_train=False): # read train and dev data sets train, vocab, labels = get_train_dataset(train_file, mission, repre) dev = read_data(dev_file, mission) # define vocabulary and help structures word2int = {w: i for i, w in enumerate(vocab)} label2int = {l: i for i, l in enumerate(labels)} vocab_size = len(vocab) num_labels = len(labels) # create a transducer classifier m = dy.ParameterCollection() trainer = dy.AdamTrainer(m, lr) # define trainer # lr if repre == "a": transducer = biLSTMTaggerA(vocab_size, bi_lstm1, bi_lstm2, embed_size, num_labels, m) # create classifier dev_accuracy = train_model(mission, train, dev, vocab, epochs, batch_size, trainer, transducer, word2int, label2int, per_log, dev_per_log) dy.save(saved_model_path + "_" + dev_accuracy, [ transducer.params["lookup"], transducer.fw_builder1, transducer.bw_builder1, transducer.fw_builder2, transducer.bw_builder2, transducer.params["W"], transducer.params["b"] ]) elif repre == "b": transducer = biLSTMTaggerB(vocab_size, char_lstm, bi_lstm1, bi_lstm2, embed_size, num_labels, m) # create classifier dev_accuracy = train_model(mission, train, dev, vocab, epochs, batch_size, trainer, transducer, word2int, label2int, per_log, dev_per_log) dy.save(saved_model_path + "_" + dev_accuracy, [ transducer.params["lookup"], transducer.char_builder, transducer.fw_builder1, transducer.bw_builder1, transducer.fw_builder2, transducer.bw_builder2, transducer.params["W"], transducer.params["b"] ]) elif repre == "c": transducer = biLSTMTaggerC(vocab_size, bi_lstm1, bi_lstm2, embed_size, num_labels, m) # create classifier dev_accuracy = train_model(mission, train, dev, vocab, epochs, batch_size, trainer, transducer, word2int, label2int, per_log, dev_per_log) dy.save(saved_model_path + "_" + dev_accuracy, [ transducer.params["lookup"], transducer.fw_builder1, transducer.bw_builder1, transducer.fw_builder2, transducer.bw_builder2, transducer.params["W"], transducer.params["b"] ]) else: transducer = biLSTMTaggerD(vocab_size, char_lstm, lin_dim, bi_lstm1, bi_lstm2, embed_size, num_labels, m) # create classifier dev_accuracy = train_model(mission, train, dev, vocab, epochs, batch_size, trainer, transducer, word2int, label2int, per_log, dev_per_log) dy.save(saved_model_path + "_" + dev_accuracy, [ transducer.params["lookup"], transducer.char_builder, transducer.params["W_con"], transducer.params["b_con"], transducer.fw_builder1, transducer.bw_builder1, transducer.fw_builder2, transducer.bw_builder2, transducer.params["W"], transducer.params["b"] ]) if test_model_after_train == True: if mission == "NER": test_model_on_blind_set( "./ner/test", saved_model_path + "_" + dev_accuracy + ".ner", transducer, word2int, label2int, vocab) else: test_model_on_blind_set( "./pos/test", saved_model_path + "_" + dev_accuracy + ".pos", transducer, word2int, label2int, vocab)
def test_save_load(self): dy.save(self.file, [self.b]) [b] = dy.load(self.file, self.m2)
def test_save_load_generator(self): dy.save(self.file, (x for x in [self.b])) [b] = list(dy.load_generator(self.file, self.m2))
def save(self, prefix): with open(prefix + ".options", "wb") as f: pickle.dump(self.options, f) # noinspection PyArgumentList dn.save(prefix, [self.network])
def save_to_disk(self, filename): dy.save(filename, [self.builder, self.lookup, self.R, self.bias])
def save(self, prefix): with open(prefix + ".options", "wb") as f: pickle.dump((self.options, self.statistics), f) # noinspection PyArgumentList dn.save(prefix, [self.container])
def main(): # Configuration file processing argparser = argparse.ArgumentParser() argparser.add_argument('--config_file', default='../configs/debug.cfg') argparser.add_argument('--continue_training', action='store_true', help='Load model Continue Training') argparser.add_argument('--name', default='experiment', help='The name of the experiment.') argparser.add_argument('--model', default='s2s', help='s2s: seq2seq-head-selection-model' 's2tBFS: seq2tree-BFS-decoder-model' 's2tDFS: seq2tree-DFS-decoder-model') argparser.add_argument('--gpu', default='0', help='GPU ID (-1 to cpu)') args, extra_args = argparser.parse_known_args() cfg = IniConfigurator(args.config_file, extra_args) # Logger setting logger = dual_channel_logger( __name__, file_path=cfg.LOG_FILE, file_model='w', formatter='%(asctime)s - %(levelname)s - %(message)s', time_formatter='%m-%d %H:%M') from eval.script_evaluator import ScriptEvaluator # DyNet setting os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu import dynet_config dynet_config.set(mem=cfg.DYNET_MEM, random_seed=cfg.DYNET_SEED) dynet_config.set_gpu() import dynet as dy from models.token_representation import TokenRepresentation from antu.nn.dynet.seq2seq_encoders import DeepBiRNNBuilder, orthonormal_VanillaLSTMBuilder from models.graph_nn_decoder import GraphNNDecoder # Build the dataset of the training process # Build data reader data_reader = PTBReader( field_list=['word', 'tag', 'head', 'rel'], root='0\t**root**\t_\t**rcpos**\t**rpos**\t_\t0\t**rrel**\t_\t_', spacer=r'[\t]', ) # Build vocabulary with pretrained glove vocabulary = Vocabulary() g_word, _ = glove_reader(cfg.GLOVE) pretrained_vocabs = {'glove': g_word} vocabulary.extend_from_pretrained_vocab(pretrained_vocabs) # Setup datasets datasets_settings = { 'train': DatasetSetting(cfg.TRAIN, True), 'dev': DatasetSetting(cfg.DEV, False), 'test': DatasetSetting(cfg.TEST, False), } datasets = PTBDataset(vocabulary, datasets_settings, data_reader) counters = {'word': Counter(), 'tag': Counter(), 'rel': Counter()} datasets.build_dataset(counters, no_pad_namespace={'rel'}, no_unk_namespace={'rel'}) # Build model # Parameter pc = dy.ParameterCollection() trainer = dy.AdamTrainer(pc, alpha=cfg.LR, beta_1=cfg.ADAM_BETA1, beta_2=cfg.ADAM_BETA2, eps=cfg.EPS) # Token Representation Layer token_repre = TokenRepresentation(pc, cfg, datasets.vocabulary) # BiLSTM Encoder Layer encoder = DeepBiRNNBuilder(pc, cfg.ENC_LAYERS, token_repre.token_dim, cfg.ENC_H_DIM, orthonormal_VanillaLSTMBuilder) # GNN Decoder Layer decoder = GraphNNDecoder(pc, cfg, datasets.vocabulary) # PTB Evaluator my_eval = ScriptEvaluator(['Valid', 'Test'], datasets.vocabulary) # Build Training Batch def cmp(ins): return len(ins['word']) train_batch = datasets.get_batches('train', cfg.TRAIN_BATCH_SIZE, True, cmp, True) valid_batch = list( datasets.get_batches('dev', cfg.TEST_BATCH_SIZE, False, cmp, False)) test_batch = list( datasets.get_batches('test', cfg.TEST_BATCH_SIZE, False, cmp, False)) # Train model BEST_DEV_LAS = BEST_DEV_UAS = BEST_ITER = cnt_iter = 0 valid_loss = [[] for i in range(cfg.GRAPH_LAYERS + 3)] logger.info("Experiment name: %s" % args.name) SHA = os.popen('git log -1 | head -n 1 | cut -c 8-13').readline().rstrip() logger.info('Git SHA: %s' % SHA) while cnt_iter < cfg.MAX_ITER: dy.renew_cg() cnt_iter += 1 indexes, masks, truth = train_batch.__next__() vectors = token_repre(indexes, True) vectors = encoder(vectors, None, cfg.RNN_DROP, cfg.RNN_DROP, np.array(masks['1D']).T, True) loss, part_loss = decoder(vectors, masks, truth, True, True) for i, l in enumerate([loss] + part_loss): valid_loss[i].append(l.value()) loss.backward() trainer.learning_rate = cfg.LR * cfg.LR_DECAY**(cnt_iter / cfg.LR_ANNEAL) trainer.update() if cnt_iter % cfg.VALID_ITER: continue # Validation for i in range(len(valid_loss)): valid_loss[i] = str(round(np.mean(valid_loss[i]), 2)) avg_loss = ', '.join(valid_loss) logger.info("") logger.info("Iter: %d-%d, Avg_loss: %s, LR (%f), Best (%d)" % (cnt_iter / cfg.VALID_ITER, cnt_iter, avg_loss, trainer.learning_rate, BEST_ITER)) valid_loss = [[] for i in range(cfg.GRAPH_LAYERS + 3)] my_eval.clear('Valid') for indexes, masks, truth in valid_batch: dy.renew_cg() vectors = token_repre(indexes, False) vectors = encoder(vectors, None, cfg.RNN_DROP, cfg.RNN_DROP, np.array(masks['1D']).T, False) pred = decoder(vectors, masks, None, False, True) my_eval.add_truth('Valid', truth) my_eval.add_pred('Valid', pred) dy.save(cfg.LAST_FILE, [token_repre, encoder, decoder]) if my_eval.evaluation('Valid', cfg.PRED_DEV, cfg.DEV): BEST_ITER = cnt_iter / cfg.VALID_ITER os.system('cp %s.data %s.data' % (cfg.LAST_FILE, cfg.BEST_FILE)) os.system('cp %s.meta %s.meta' % (cfg.LAST_FILE, cfg.BEST_FILE)) # Just record test result my_eval.clear('Test') for indexes, masks, truth in test_batch: dy.renew_cg() vectors = token_repre(indexes, False) vectors = encoder(vectors, None, cfg.RNN_DROP, cfg.RNN_DROP, np.array(masks['1D']).T, False) pred = decoder(vectors, masks, None, False, True) my_eval.add_truth('Test', truth) my_eval.add_pred('Test', pred) my_eval.evaluation('Test', cfg.PRED_TEST, cfg.TEST) my_eval.print_best_result('Valid') test_pc = dy.ParameterCollection() token_repre, encoder, decoder = dy.load(cfg.BEST_FILE, test_pc) my_eval.clear('Test') test_batch = datasets.get_batches('test', cfg.TEST_BATCH_SIZE, False, cmp, False) for indexes, masks, truth in test_batch: dy.renew_cg() vectors = token_repre(indexes, False) vectors = encoder(vectors, None, cfg.RNN_DROP, cfg.RNN_DROP, np.array(masks['1D']).T, False) pred = decoder(vectors, masks, None, False, True) my_eval.add_truth('Test', truth) my_eval.add_pred('Test', pred) my_eval.evaluation('Test', cfg.PRED_TEST, cfg.TEST)
def main(): # Configuration file processing argparser = argparse.ArgumentParser() argparser.add_argument('--config_file', default='../configs/debug.cfg') argparser.add_argument('--continue_training', action='store_true', help='Load model Continue Training') argparser.add_argument('--name', default='experiment', help='The name of the experiment.') argparser.add_argument('--model', default='s2s', help='s2s: seq2seq-head-selection-model' 's2tDFS: seq2tree-DFS-decoder-model') argparser.add_argument('--gpu', default='0', help='GPU ID (-1 to cpu)') args, extra_args = argparser.parse_known_args() cfg = IniConfigurator(args.config_file, extra_args) # Logger setting logger = dual_channel_logger( __name__, file_path=cfg.LOG_FILE, file_model='w', formatter='%(asctime)s - %(levelname)s - %(message)s', time_formatter='%m-%d %H:%M') from eval.script_evaluator import ScriptEvaluator # DyNet setting os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu import dynet_config dynet_config.set(mem=cfg.DYNET_MEM, random_seed=cfg.DYNET_SEED) dynet_config.set_gpu() import dynet as dy from models.token_representation import TokenRepresentation from antu.nn.dynet.seq2seq_encoders import DeepBiRNNBuilder, orthonormal_VanillaLSTMBuilder from models.graph_nn_decoder import GraphNNDecoder from models.jackknife_decoder import JackKnifeGraphNNDecoder # Build the dataset of the training process # Build data reader data_reader = PTBReader( field_list=['word', 'tag', 'head', 'rel'], root='0\t**root**\t_\t**rcpos**\t**rpos**\t_\t0\t**rrel**\t_\t_', spacer=r'[\t]',) # Build vocabulary with pretrained glove vocabulary = Vocabulary() g_word, _ = glove_reader(cfg.GLOVE) pretrained_vocabs = {'glove': g_word} vocabulary.extend_from_pretrained_vocab(pretrained_vocabs) # Setup datasets datasets_settings = {'train': DatasetSetting(cfg.TRAIN, True), 'dev': DatasetSetting(cfg.DEV, False), 'test': DatasetSetting(cfg.TEST, False), } datasets = PTBDataset(vocabulary, datasets_settings, data_reader) counters = {'word': Counter(), 'tag': Counter(), 'rel': Counter()} datasets.build_dataset(counters, no_pad_namespace={'rel'}, no_unk_namespace={'rel'}) # Build model # Parameter pc = dy.ParameterCollection() LR = 0.0005 trainer = dy.AdamTrainer(pc, LR, cfg.ADAM_BETA1, cfg.ADAM_BETA2, cfg.EPS) # Token Representation Layer token_repre = TokenRepresentation(pc, cfg, datasets.vocabulary, include_pos=True) # BiLSTM Encoder Layer #encoder = BiaffineAttention() #encoder = MultiHeadedAttention(pc, 10, token_repre.token_dim) #encoder = MultiLayerMultiHeadAttention(pc, 10, token_repre.token_dim, num_layers=1) #encoder = MyMultiHeadAttention(None, 6, token_repre.token_dim, 32, 32, model=pc) #encoder = LabelAttention(None, token_repre.token_dim, 128, 128, 112, 128, use_resdrop=True, q_as_matrix=False, residual_dropout=0.1, attention_dropout=0.1, d_positional=None, model=pc) # encoder = Encoder(None, token_repre.token_dim, # num_layers=1, num_heads=2, d_kv = 32, d_ff=1024, d_l=112, # d_positional=None, # num_layers_position_only=0, # relu_dropout=0.1, residual_dropout=0.1, attention_dropout=0.1, # use_lal=True, # lal_d_kv=128, # lal_d_proj=128, # lal_resdrop=True, # lal_pwff=True, # lal_q_as_matrix=False, # lal_partitioned=True, # model=pc) #encoder = ScaledDotProductAttention(pc, 10) encoder = DeepBiRNNBuilder(pc, cfg.ENC_LAYERS, token_repre.token_dim, cfg.ENC_H_DIM, orthonormal_VanillaLSTMBuilder) # GNN Decoder Layer decoder = GraphNNDecoder(pc, cfg, datasets.vocabulary) #decoder = JackKnifeGraphNNDecoder(pc, cfg, datasets.vocabulary) # PTB Evaluator my_eval = ScriptEvaluator(['Valid', 'Test'], datasets.vocabulary) #dy.save(cfg.LAST_FILE, [token_repre, encoder, decoder]) #exit(0) # Build Training Batch def cmp(ins): return len(ins['word']) train_batch = datasets.get_batches('train', cfg.TRAIN_BATCH_SIZE, True, cmp, True) valid_batch = list(datasets.get_batches('dev', cfg.TEST_BATCH_SIZE, False, cmp, False)) test_batch = list(datasets.get_batches('test', cfg.TEST_BATCH_SIZE, False, cmp, False)) #print('-----------------------') # print('TRAIN BATCH IS: ') # # print(train_batch) # indexes, masks, truth = train_batch.__next__() # print(indexes) # print('------------------',end='\n\n\n\n\n\n\n') # print(len(indexes)) # exit(0) # exit(0) # for k in indexes: # print(k) #print(indexes) #print(masks) # Train model BEST_DEV_LAS = BEST_DEV_UAS = BEST_ITER = 0 cnt_iter = -cfg.WARM * cfg.GRAPH_LAYERS valid_loss = [[] for i in range(cfg.GRAPH_LAYERS+3)] logger.info("Experiment name: %s" % args.name) SHA = os.popen('git log -1 | head -n 1 | cut -c 8-13').readline().rstrip() logger.info('Git SHA: %s' % SHA) while cnt_iter < cfg.MAX_ITER: print(cnt_iter, cfg.MAX_ITER) #dy.renew_cg() dy.renew_cg(immediate_compute = True, check_validity = True) cnt_iter += 1 indexes, masks, truth = train_batch.__next__() vectors = token_repre(indexes, True) #vectors = encoder(vectors, np.array(masks['1D']).T) #print(vectors.npvalue) #vectors= encoder(vectors, vectors, vectors, np.array(masks['1D']).T) #vectors= encoder(vectors, vectors, vectors, np.array(masks['1D']).T, cfg.RNN_DROP) vectors = encoder(vectors, None, cfg.RNN_DROP, cfg.RNN_DROP, np.array(masks['1D']).T, False, True) loss, part_loss = decoder(vectors, masks, truth, cnt_iter, True, True) for i, l in enumerate([loss]+part_loss): valid_loss[i].append(l.value()) loss.backward() trainer.learning_rate = LR*cfg.LR_DECAY**(max(cnt_iter, 0)/cfg.LR_ANNEAL) #trainer.learning_rate = cfg.LR*cfg.LR_DECAY**(max(cnt_iter, 0)/cfg.LR_ANNEAL) trainer.update() if cnt_iter % cfg.VALID_ITER: continue # Validation for i in range(len(valid_loss)): valid_loss[i] = str(round(np.mean(valid_loss[i]), 2)) avg_loss = ', '.join(valid_loss) logger.info("") logger.info("Iter: %d-%d, Avg_loss: %s, LR (%f), Best (%d)" % (cnt_iter/cfg.VALID_ITER, cnt_iter, avg_loss, trainer.learning_rate, BEST_ITER)) valid_loss = [[] for i in range(cfg.GRAPH_LAYERS+3)] my_eval.clear('Valid') for indexes, masks, truth in valid_batch: dy.renew_cg() vectors = token_repre(indexes, False) vectors = encoder(vectors, np.array(masks['1D']).T) #vectors= encoder(vectors, vectors, vectors, np.array(masks['1D']).T) #vectors = encoder(vectors, vectors, vectors, np.array(masks['1D']).T, cfg.RNN_DROP) #vectors = encoder(vectors, None, cfg.RNN_DROP, cfg.RNN_DROP, np.array(masks['1D']).T, False, False) pred = decoder(vectors, masks, None, cnt_iter, False, True) my_eval.add_truth('Valid', truth) my_eval.add_pred('Valid', pred) dy.save(cfg.LAST_FILE, [token_repre, encoder, decoder]) if my_eval.evaluation('Valid', cfg.PRED_DEV, cfg.DEV): BEST_ITER = cnt_iter/cfg.VALID_ITER os.system('cp %s.data %s.data' % (cfg.LAST_FILE, cfg.BEST_FILE)) os.system('cp %s.meta %s.meta' % (cfg.LAST_FILE, cfg.BEST_FILE)) # Just record test result my_eval.clear('Test') for indexes, masks, truth in test_batch: dy.renew_cg() vectors = token_repre(indexes, False) vectors = encoder(vectors, np.array(masks['1D']).T) #vectors= encoder(vectors, vectors, vectors, np.array(masks['1D']).T) #vectors = encoder(vectors, vectors, vectors, np.array(masks['1D']).T, cfg.RNN_DROP) #vectors = encoder(vectors, None, cfg.RNN_DROP, cfg.RNN_DROP, np.array(masks['1D']).T, False, False) pred = decoder(vectors, masks, None, cnt_iter, False, True) my_eval.add_truth('Test', truth) my_eval.add_pred('Test', pred) my_eval.evaluation('Test', cfg.PRED_TEST, cfg.TEST) my_eval.print_best_result('Valid') # Final Test test_pc = dy.ParameterCollection() token_repre, encoder, decoder = dy.load(cfg.BEST_FILE, test_pc) my_eval.clear('Test') for indexes, masks, truth in test_batch: dy.renew_cg() vectors = token_repre(indexes, False) vectors = encoder(vectors, np.array(masks['1D']).T) #vectors= encoder(vectors, vectors, vectors, np.array(masks['1D']).T) #vectors = encoder(vectors, vectors, vectors, np.array(masks['1D']).T, cfg.RNN_DROP) #vectors = encoder(vectors, None, cfg.RNN_DROP, cfg.RNN_DROP, np.array(masks['1D']).T, False, False) pred = decoder(vectors, masks, None, 0, False, True) my_eval.add_truth('Test', truth) my_eval.add_pred('Test', pred) my_eval.evaluation('Test', cfg.PRED_TEST, cfg.TEST)
total_loss = sum(batch_loss_ls) / len(batch_loss_ls) time2 = time.time() # print info print("total loss: {}".format(total_loss)) print("time consumed for training: {}s".format(time2 - time1)) # showing evaluation on dev if not config.use_crf: acc, f1 = evaluate(config.trim_dev_path, char_acceptor ,word_acceptor, char_embed, word_embed) else: acc, f1 = evaluate(config.trim_dev_path, char_acceptor ,word_acceptor, char_embed, word_embed, crf_acceptor) time3 = time.time() # print info print("acc: {}%\nf1 score: {}%".format(100 * acc, 100 * f1)) print("time consumed for evaluating: {}s".format(time3 - time2)) print("epoch done.") # save model parameters, lookup parameters and builder objects to disk obs = [word_embed, char_embed] basename = config.model_basename dy.save(basename, obs) char_acceptor.save(basename + ".charBilstm") word_acceptor.save(basename + ".wordBilstm") if config.use_crf: crf_acceptor.save(basename + ".crf")