def tagger(repre, mission, train_file, input_file, saved_model_path, output_file): # read train and dev data sets train, vocab, labels = get_train_dataset(train_file, mission, repre) test = read_data(input_file, "test") # define vocabulary and help structures word2int = {w: i for i, w in enumerate(vocab)} label2int = {l: i for i, l in enumerate(labels)} # create a transducer classifier m = dy.ParameterCollection() if repre == "a": model_params = dy.load(saved_model_path, m) elif repre == "b": model_params = dy.load(saved_model_path, m) elif repre == "c": model_params = dy.load(saved_model_path, m) else: model_params = dy.load(saved_model_path, m) predictions = test_model(repre, test, model_params, word2int, label2int, vocab) # write predictions to file file = open(output_file, "w") for idx, sentence in enumerate(predictions): word_idx = 0 for word, label in sentence: orig_word = test[idx][word_idx] file.write(orig_word + " " + label + "\n") word_idx += 1 file.write("\n")
def train_network(config, saver, parser, embeddings, train_examples, dev_set, test_set): best_dev_UAS = 0 model = ParserModel(config, embeddings, parser) parser.model = model for epoch in range(config.n_epochs): print "Epoch {:} out of {:}".format(epoch + 1, config.n_epochs) dev_UAS = run_epoch(model, config, parser, train_examples, dev_set) if dev_UAS > best_dev_UAS: best_dev_UAS = dev_UAS if not saver: print "New best dev UAS! Saving model in ./data/weights/parser.weights" dy.save('./data/weights/parser.weights', [model.pW, model.pB1, model.pU, model.pB2]) if saver: print 80 * "=" print "TESTING" print 80 * "=" print "Restoring the best model weights found on the dev set" model.pW, model.pB1, model.pU, model.pB2 = dy.load( './data/weights/parser.weights', model.m) print "Final evaluation on test set", UAS, dependencies = parser.parse(test_set) print "- test UAS: {:.2f}".format(UAS * 100.0) print "Writing predictions" with open('q2_test.predicted.pkl', 'w') as f: cPickle.dump(dependencies, f, -1) print "Done!"
def build_model(self, pc, best_model_path): if best_model_path: print 'Loading model from: {}'.format(best_model_path) self.RNN, self.VOCAB_LOOKUP, self.R, self.bias = dy.load(best_model_path, pc) else: # LSTM self.RNN = dy.CoupledLSTMBuilder(self.hyperparams['LAYERS'], self.hyperparams['INPUT_DIM'], self.hyperparams['HIDDEN_DIM'], pc) # embedding lookups for vocabulary self.VOCAB_LOOKUP = pc.add_lookup_parameters((self.hyperparams['VOCAB_SIZE'], self.hyperparams['INPUT_DIM'])) # softmax parameters self.R = pc.add_parameters((self.hyperparams['VOCAB_SIZE'], self.hyperparams['HIDDEN_DIM'])) self.bias = pc.add_parameters(self.hyperparams['VOCAB_SIZE']) print 'Model dimensions:' print ' * VOCABULARY EMBEDDING LAYER: IN-DIM: {}, OUT-DIM: {}'.format(self.hyperparams['VOCAB_SIZE'], self.hyperparams['INPUT_DIM']) print print ' * LSTM: IN-DIM: {}, OUT-DIM: {}'.format(self.hyperparams['INPUT_DIM'], self.hyperparams['HIDDEN_DIM']) print ' LSTM has {} layer(s)'.format(self.hyperparams['LAYERS']) print print ' * SOFTMAX: IN-DIM: {}, OUT-DIM: {}'.format(self.hyperparams['HIDDEN_DIM'], self.hyperparams['VOCAB_SIZE']) print
def __init__(self, c2i, num_lstm_layers=-1,\ char_dim=-1, hidden_dim=-1, word_embedding_dim=-1, file=None): self.c2i = c2i self._model = dy.Model() if file == None: # Char LSTM Parameters self.char_lookup = self._model.add_lookup_parameters((len(c2i), char_dim)) self.char_fwd_lstm = dy.LSTMBuilder(num_lstm_layers, char_dim, hidden_dim, self._model) self.char_bwd_lstm = dy.LSTMBuilder(num_lstm_layers, char_dim, hidden_dim, self._model) # Post-LSTM Parameters self.lstm_to_rep_params = self._model.add_parameters((word_embedding_dim, hidden_dim * 2)) self.lstm_to_rep_bias = self._model.add_parameters(word_embedding_dim) self.mlp_out = self._model.add_parameters((word_embedding_dim, word_embedding_dim)) self.mlp_out_bias = self._model.add_parameters(word_embedding_dim) else: # read from saved file. c2i mapping to be read by calling function (for now) pc = dy.ParameterCollection() model_members = iter(dy.load(file,pc)) # model_members = iter(self._model.load(file)) self.char_lookup = model_members.next() self.char_fwd_lstm = model_members.next() self.char_bwd_lstm = model_members.next() self.lstm_to_rep_params = model_members.next() self.lstm_to_rep_bias = model_members.next() self.mlp_out = model_members.next() self.mlp_out_bias = model_members.next()
def run_test(args): print("Loading test trees from {}...".format(args.test_path)) test_treebank = trees.load_trees(args.test_path) print("Loaded {:,} test examples.".format(len(test_treebank))) print("Loading model from {}...".format(args.model_path_base)) model = dy.ParameterCollection() [parser] = dy.load(args.model_path_base, model) print("Parsing test sentences...") start_time = time.time() test_predicted = [] for tree in test_treebank: dy.renew_cg() sentence = [(leaf.tag, leaf.word) for leaf in tree.leaves()] predicted, _ = parser.parse(sentence) test_predicted.append(predicted.convert()) test_fscore = evaluate.evalb(args.evalb_dir, test_treebank, test_predicted, args.parser_type) print("test-fscore {} " "test-elapsed {}".format( test_fscore, format_elapsed(start_time), ))
def user_load_model(path, model): edm = dy.load(path + '/edm', model) print "Back from spec" print edm, M #with open(path + '/params.pkl', 'rb') as f: # M = pickle.load(f) edm.set_M(M) return edm
def save_components(args): model = dy.ParameterCollection() [parser] = dy.load(args.model_path_base, model) parser.f_label.param_collection().save(args.save_path, '/f-label', append=False) parser.f_tag.param_collection().save(args.save_path, '/f-tag', append=True) parser.f_encoding.param_collection().save(args.save_path, '/f-encoding', append=True) parser.word_embeddings.save(args.save_path, '/word-embedding', append=True) parser.lstm.param_collection().save(args.save_path, '/lstm', append=True)
def test_save_load(self): self.p.forward() self.p.backward() self.t.update() dy.renew_cg() v1 = self.p.value() dy.save(self.file, [self.p]) [p2] = dy.load(self.file, self.m2) v2 = p2.value() self.assertTrue(np.allclose(v1, v2))
def __init__(self, type, model_file, input_file): self.type = type self.input_file = input_file self.sequences = read_file(input_file) model = dn.Model() self.model = dn.load("model_type" + self.type, model)[0] self.vocab, self.tags, self.chars = pickle.load( open(model_file + self.type + ".vocab", "rb")) self.tags_to_ix = {id: tag for tag, id in self.tags.iteritems()} self.define_data()
def __init__(self, model_path): model_folder = nmt_model_path best_model_path = model_folder + '/bestmodel.txt' hypoparams_file = model_folder + '/best.dev' hypoparams_file_reader = codecs.open(hypoparams_file, 'r', 'utf-8') hyperparams_dict = dict([ line.strip().split(' = ') for line in hypoparams_file_reader.readlines() ]) self.hyperparams = { 'INPUT_DIM': int(hyperparams_dict['INPUT_DIM']), 'HIDDEN_DIM': int(hyperparams_dict['HIDDEN_DIM']), #'FEAT_INPUT_DIM': int(hyperparams_dict['FEAT_INPUT_DIM']), 'LAYERS': int(hyperparams_dict['LAYERS']), 'VOCAB_PATH': hyperparams_dict['VOCAB_PATH'], 'OVER_SEGS': 'OVER_SEGS' in hyperparams_dict } self.pc = dy.ParameterCollection() print 'Loading vocabulary from {}:'.format( self.hyperparams['VOCAB_PATH']) self.vocab = Vocab.from_file(self.hyperparams['VOCAB_PATH']) # BEGIN_CHAR = u'<s>' # STOP_CHAR = u'</s>' # UNK_CHAR = u'<unk>' # self.BEGIN = self.vocab.w2i[BEGIN_CHAR] # self.STOP = self.vocab.w2i[STOP_CHAR] # self.UNK = self.vocab.w2i[UNK_CHAR] self.BEGIN = utils.GO_ID self.STOP = utils.EOS_ID self.UNK = utils.UNK_ID self.hyperparams['VOCAB_SIZE'] = self.vocab.size() print 'Model Hypoparameters:' for k, v in self.hyperparams.items(): print '{:20} = {}'.format(k, v) print print 'Loading model from: {}'.format(best_model_path) self.RNN, self.VOCAB_LOOKUP, self.R, self.bias = dy.load( best_model_path, self.pc) print 'Model dimensions:' print ' * VOCABULARY EMBEDDING LAYER: IN-DIM: {}, OUT-DIM: {}'.format( self.hyperparams['VOCAB_SIZE'], self.hyperparams['INPUT_DIM']) print print ' * LSTM: IN-DIM: {}, OUT-DIM: {}'.format( self.hyperparams['INPUT_DIM'], self.hyperparams['HIDDEN_DIM']) print ' LSTM has {} layer(s)'.format(self.hyperparams['LAYERS']) print print ' * SOFTMAX: IN-DIM: {}, OUT-DIM: {}'.format( self.hyperparams['HIDDEN_DIM'], self.hyperparams['VOCAB_SIZE']) print
def __init__(self, options, train_sentences=None, restore_file=None, statistics=None): self.model = dn.Model() random.seed(1) self.trainer = dn.AdamTrainer(self.model) self.activation = activations[options.activation] # self.decoder = decoders[options.decoder] self.labelsFlag = options.labelsFlag self.costaugFlag = options.cost_augment self.options = options if "func" in options: del options.func if restore_file: self.container, = dn.load(restore_file, self.model) networks = list(self.container.components) self.network = networks.pop(0) self.statistics = statistics self.has_emptys = len(statistics.emptys) > 0 if self.has_emptys: self.network_for_emptys = networks.pop(0) if self.options.use_2nd: self.network3 = networks.pop(0) if self.has_emptys: self.network3_for_emptys_mid = networks.pop(0) self.network3_for_emptys_out = networks.pop(0) assert not networks else: self.container = nn.Container(self.model) self.statistics = statistics = StatisticsWithEmpty.from_sentences( train_sentences) self.has_emptys = len(statistics.emptys) > 0 self.network = EdgeEvaluationNetwork(self.container, statistics, options) if self.has_emptys: self.network_for_emptys = EdgeEvaluation( self.container, options) if options.use_2nd: self.network3 = EdgeSiblingEvaluation(self.container, options) if self.has_emptys: self.network3_for_emptys_mid = EdgeSiblingEvaluation( self.container, options) self.network3_for_emptys_out = EdgeSiblingEvaluation( self.container, options)
def __init__(self, nclass, paramcol, loadname=None): ''' @param nclass: int, number of classes to be classified @param paramcol: parameter collection that is to hold the local parameters in CRF @param loadname: string, default=None, if it is not None, load parameters instead of creating them from scratch, taking loadname as the basename used in dy.load() ''' if loadname is None: self.d = nclass self.pb = paramcol.add_parameters((nclass, )) self.pe = paramcol.add_parameters((nclass, )) self.pT = paramcol.add_parameters((nclass, nclass)) else: self.pb, self.pe, self.pT = dy.load(loadname, paramcol) self.d = self.pT.shape()[0]
def run_test(args): #args.test_path = args.test_path.replace('[*]', args.treetype) print("Loading test trees from {}...".format(args.test_path)) test_treebank = trees.load_trees(args.test_path, args.normal) print("Loaded {:,} test examples.".format(len(test_treebank))) print("Loading model from {}...".format(args.model_path_base)) model = dy.ParameterCollection() [parser] = dy.load(args.model_path_base, model) label_vocab = vocabulary.Vocabulary() label_list = util.load_label_list('../data/labels.txt') for item in label_list: label_vocab.index((item, )) label_vocab.index((parse.EMPTY, )) for item in label_list: label_vocab.index((item + "'", )) label_vocab.freeze() latent_tree = latent.latent_tree_builder(label_vocab, args.RBTlabel) print("Parsing test sentences...") start_time = time.time() test_predicted = [] test_gold = latent_tree.build_latent_trees(test_treebank) for x, chunks in test_treebank: dy.renew_cg() #sentence = [(leaf.tag, leaf.word) for leaf in tree.leaves()] sentence = [(parse.XX, ch) for ch in x] predicted, _ = parser.parse(sentence) test_predicted.append(predicted.convert()) #test_fscore = evaluate.evalb(args.evalb_dir, test_treebank, test_predicted, args.expname + '.test.') test_fscore = evaluate.eval_chunks(args.evalb_dir, test_gold, test_predicted, output_filename=args.expname + '.finaltest.txt') # evalb print("test-fscore {} " "test-elapsed {}".format( test_fscore, format_elapsed(start_time), ))
def run_test2(args): model = dy.ParameterCollection() # [parser] = dy.load(args.model_path_base, model) [parser] = dy.load( "models/chartdyRBTC-model_addr_dytree_giga_0.4_200_1_chartdyRBTC_dytree_1_houseno_0_0_dev=0.90", model) test_chunk_insts = util.read_chunks(args.test_path, args.normal) # ftreelog = open(args.expname + '.test.predtree.txt', 'w', encoding='utf-8') ftreelog = open('aaa' + '.test.predtree.txt', 'w', encoding='utf-8') test_predicted = [] test_start_time = time.time() test_predicted = [] test_gold = [] for inst in test_chunk_insts: chunks = util.inst2chunks(inst) test_gold.append(chunks) for x, chunks in test_chunk_insts: dy.renew_cg() sentence = [(parse.XX, ch) for ch in x] predicted, _ = parser.parse(sentence) pred_tree = predicted.convert() ftreelog.write(pred_tree.linearize() + '\n') test_predicted.append(pred_tree.to_chunks()) ftreelog.close() # test_fscore = evaluate.eval_chunks2(args.evalb_dir, test_gold, test_predicted, output_filename=args.expname + '.test.txt') # evalb test_fscore = evaluate.eval_chunks2(args.evalb_dir, test_gold, test_predicted, output_filename='aaaabbbb' + '.test.txt') # evalb print("test-fscore {} " "test-elapsed {} ".format( test_fscore, format_elapsed(test_start_time), ))
def load_model(path, model_version): full_saving_path = os.path.join(path, model_version) new_model_obj = pickle.load(open(full_saving_path + ".p", "rb")) model_to_load = dy.ParameterCollection() W_emb, W_cnn, b_cnn, W_mlp, b_mlp, V_mlp, a_mlp = dy.load( full_saving_path, model_to_load) new_model_obj.W_emb = W_emb new_model_obj.W_cnn = W_cnn new_model_obj.b_cnn = b_cnn new_model_obj.W_mlp = W_mlp new_model_obj.b_mlp = b_mlp new_model_obj.V_mlp = V_mlp new_model_obj.a_mlp = a_mlp # converting default dict into dict, since pickle can only save dict objects and not defaultdict ones new_model_obj.w2i = defaultdict(lambda: len(new_model_obj.w2i), new_model_obj.w2i) new_model_obj.t2i = defaultdict(lambda: len(new_model_obj.t2i), new_model_obj.t2i) new_model_obj.model = model_to_load return new_model_obj
def run_test(args): if not os.path.exists(args.experiment_directory): os.mkdir(args.experiment_directory) print("Loading test trees from {}...".format(args.input_file)) test_treebank = trees.load_trees(args.input_file) test_tokenized_lines = parse_trees_to_string_lines(test_treebank) test_embeddings_file = compute_elmo_embeddings(test_tokenized_lines, os.path.join( args.experiment_directory, 'test_embeddings')) print("Loaded {:,} test examples.".format(len(test_treebank))) print("Loading model from {}...".format(args.model_path)) model = dy.ParameterCollection() [parser] = dy.load(args.model_path, model) print("Parsing test sentences...") check_performance(parser, test_treebank, test_embeddings_file, args)
def __init__(self, indim, hdim, paramcol, loadname=None): ''' @param indim: int, input dimension of biLSTM @param hdim: int, hidden state dimension of both forward and backward LSTM @param paramcol: parameter collection that is to hold the local parameters in biLSTM @param loadname: string, default=None, if it is not None, load parameters instead of creating them from scratch, taking loadname as the basename used in dy.load() ''' if loadname is None: self.flstm = dy.VanillaLSTMBuilder(1, indim, hdim, paramcol) self.blstm = dy.VanillaLSTMBuilder(1, indim, hdim, paramcol) # self.flstm = dy.LSTMBuilder(1, indim, hdim, paramcol) # self.blstm = dy.LSTMBuilder(1, indim, hdim, paramcol) self.flstm.set_dropouts(config.dropout, config.dropout) self.blstm.set_dropouts(config.dropout, config.dropout) else: self.flstm, self.blstm = dy.load(loadname, paramcol)
def model_load_helper(mode, prefix, model): """ Save/Load helper for backward compatibly. It save/load options and model. """ if mode is None: mode = detect_saved_model_type(prefix) if mode == "dynet": with open(prefix + ".options", "rb") as f: options = pickle.load(f) return options, dn.load(prefix, model)[0] elif mode == "pickle": with open(prefix, "rb") as f: options, picklable = pickle.load(f) return options, DynetSaveable.from_picklable_obj(picklable, model) elif mode == "pickle-gzip": with open(prefix + ".gz", "rb") as f: options, picklable = pickle.load(f) return options, DynetSaveable.from_picklable_obj(picklable, model) else: raise TypeError("Invalid model format.")
def load_model(self, d): self.init_model() param_keys = d["param_keys"] self.max_num_labels = d["max_num_labels"] Config().args.layers = self.layers = d["layers"] Config().args.layer_dim = self.layer_dim = d["layer_dim"] Config().args.output_dim = self.output_dim = d.get( "output_dim", Config().args.output_dim) Config().args.activation = self.activation_str = d["activation"] self.activation = ACTIVATIONS[self.activation_str] Config().args.init = self.init_str = d["init"] self.init = INITIALIZERS[self.init_str] self.load_extra(d) print("Loading model from '%s'... " % self.filename, end="", flush=True) started = time.time() try: param_values = dy.load(self.filename, self.model) print("Done (%.3fs)." % (time.time() - started)) self.params = OrderedDict(zip(param_keys, param_values)) except KeyError as e: print("Failed loading model: %s" % e)
def __init__(self, options, train_sentences=None, restore_file=None): self.model = dn.Model() random.seed(1) self.trainer = dn.AdamTrainer(self.model) self.activation = activations[options.activation] self.decoder = decoders[options.decoder] self.labelsFlag = options.labelsFlag self.costaugFlag = options.cost_augment self.options = options if "func" in options: del options.func if restore_file: self.container, = dn.load(restore_file, self.model) self.network, self.network3 = self.container.components else: self.container = nn.Container(self.model) statistics = Statistics.from_sentences(train_sentences) self.network = EdgeEvaluationNetwork(self.container, statistics, options) self.network3 = EdgeSiblingEvaluation(self.container, options)
def load_proposal_model(self, dir): """Load the proposal model to sample with.""" assert os.path.isdir(dir), dir print(f'Loading proposal model from `{dir}`...') model_checkpoint_path = os.path.join(dir, 'model') state_checkpoint_path = os.path.join(dir, 'state.json') [proposal] = dy.load(model_checkpoint_path, dy.ParameterCollection()) assert (isinstance(proposal, DiscRNNG) or isinstance(proposal, ChartParser)), type(proposal) with open(state_checkpoint_path, 'r') as f: state = json.load(f) epochs = state['epochs'] fscore = state['test-fscore'] print( f'Loaded model trained for {epochs} epochs with test-fscore {fscore}.' ) self.proposal = proposal self.proposal.eval() self.use_loaded_samples = False
def run_train(args): args.numpy_seed = seed if args.numpy_seed is not None: print("Setting numpy random seed to {}...".format(args.numpy_seed)) np.random.seed(args.numpy_seed) if args.trial == 1: args.train_path = 'data/trial.txt' args.dev_path = 'data/trial.txt' args.test_path = 'data/trial.txt' # args.train_path = args.train_path.replace('[*]', args.treetype) # args.dev_path = args.dev_path.replace('[*]', args.treetype) # args.test_path = args.test_path.replace('[*]', args.treetype) print("Loading training trees from {}...".format(args.train_path)) train_chunk_insts = util.read_chunks(args.train_path, args.normal) print("Loaded {:,} training examples.".format(len(train_chunk_insts))) print("Loading development trees from {}...".format(args.dev_path)) dev_chunk_insts = util.read_chunks(args.dev_path, args.normal) print("Loaded {:,} development examples.".format(len(dev_chunk_insts))) print("Loading test trees from {}...".format(args.test_path)) test_chunk_insts = util.read_chunks(args.test_path, args.normal) print("Loaded {:,} test examples.".format(len(test_chunk_insts))) # print("Processing trees for training...") # train_parse = [tree.convert() for tree in train_treebank] print("Constructing vocabularies...") tag_vocab = vocabulary.Vocabulary() tag_vocab.index(parse.START) tag_vocab.index(parse.STOP) tag_vocab.index(parse.XX) word_vocab = vocabulary.Vocabulary() word_vocab.index(parse.START) word_vocab.index(parse.STOP) word_vocab.index(parse.UNK) word_vocab.index(parse.NUM) for x, chunks in train_chunk_insts + dev_chunk_insts + test_chunk_insts: for ch in x: word_vocab.index(ch) label_vocab = vocabulary.Vocabulary() label_vocab.index(()) label_list = util.load_label_list(args.labellist_path) #'data/labels.txt') for item in label_list: label_vocab.index((item, )) if args.nontlabelstyle != 1: for item in label_list: label_vocab.index((item + "'", )) if args.nontlabelstyle == 1: label_vocab.index((parse.EMPTY, )) tag_vocab.freeze() word_vocab.freeze() label_vocab.freeze() latent_tree = latent.latent_tree_builder(label_vocab, args.RBTlabel, args.nontlabelstyle) def print_vocabulary(name, vocab): special = {parse.START, parse.STOP, parse.UNK} print("{} ({:,}): {}".format( name, vocab.size, sorted(value for value in vocab.values if value in special) + sorted(value for value in vocab.values if value not in special))) if args.print_vocabs: print_vocabulary("Tag", tag_vocab) print_vocabulary("Word", word_vocab) print_vocabulary("Label", label_vocab) print("Initializing model...") pretrain = {'giga': 'data/giga.vec100', 'none': 'none'} pretrainemb = util.load_pretrain(pretrain[args.pretrainemb], args.word_embedding_dim, word_vocab) model = dy.ParameterCollection() if args.parser_type == "chartdyRBTC": parser = parse.ChartDynamicRBTConstraintParser( model, tag_vocab, word_vocab, label_vocab, args.tag_embedding_dim, args.word_embedding_dim, args.lstm_layers, args.lstm_dim, args.label_hidden_dim, args.dropout, (args.pretrainemb, pretrainemb), args.chunkencoding, args.trainc == 1, True, (args.zerocostchunk == 1), ) else: print('Model is not valid!') exit() if args.loadmodel != 'none': tmp = dy.load(args.loadmodel, model) parser = tmp[0] print('Model is loaded from ', args.loadmodel) trainer = dy.AdamTrainer(model) total_processed = 0 current_processed = 0 check_every = len(train_chunk_insts) / args.checks_per_epoch best_dev_fscore = -np.inf best_dev_model_path = None start_time = time.time() def check_dev(): nonlocal best_dev_fscore nonlocal best_dev_model_path dev_start_time = time.time() dev_predicted = [] #dev_gold = [] #dev_gold = latent_tree.build_latent_trees(dev_chunk_insts) dev_gold = [] for inst in dev_chunk_insts: chunks = util.inst2chunks(inst) dev_gold.append(chunks) for x, chunks in dev_chunk_insts: dy.renew_cg() #sentence = [(leaf.tag, leaf.word) for leaf in tree.leaves()] sentence = [(parse.XX, ch) for ch in x] predicted, _ = parser.parse(sentence) dev_predicted.append(predicted.convert().to_chunks()) #dev_fscore = evaluate.evalb(args.evalb_dir, dev_gold, dev_predicted, args.expname + '.dev.') #evalb dev_fscore = evaluate.eval_chunks2(args.evalb_dir, dev_gold, dev_predicted, output_filename=args.expname + '.dev.txt') # evalb print("dev-fscore {} " "dev-elapsed {} " "total-elapsed {}".format( dev_fscore, format_elapsed(dev_start_time), format_elapsed(start_time), )) if dev_fscore.fscore > best_dev_fscore: if best_dev_model_path is not None: for ext in [".data", ".meta"]: path = best_dev_model_path + ext if os.path.exists(path): print( "Removing previous model file {}...".format(path)) os.remove(path) best_dev_fscore = dev_fscore.fscore best_dev_model_path = "{}_dev={:.2f}".format( args.model_path_base + "_" + args.expname, dev_fscore.fscore) print("Saving new best model to {}...".format(best_dev_model_path)) dy.save(best_dev_model_path, [parser]) test_start_time = time.time() test_predicted = [] #test_gold = latent_tree.build_latent_trees(test_chunk_insts) test_gold = [] for inst in test_chunk_insts: chunks = util.inst2chunks(inst) test_gold.append(chunks) ftreelog = open(args.expname + '.test.predtree.txt', 'w', encoding='utf-8') for x, chunks in test_chunk_insts: dy.renew_cg() #sentence = [(leaf.tag, leaf.word) for leaf in tree.leaves()] sentence = [(parse.XX, ch) for ch in x] predicted, _ = parser.parse(sentence) pred_tree = predicted.convert() ftreelog.write(pred_tree.linearize() + '\n') test_predicted.append(pred_tree.to_chunks()) ftreelog.close() #test_fscore = evaluate.evalb(args.evalb_dir, test_chunk_insts, test_predicted, args.expname + '.test.') test_fscore = evaluate.eval_chunks2(args.evalb_dir, test_gold, test_predicted, output_filename=args.expname + '.test.txt') # evalb print("epoch {:,} " "test-fscore {} " "test-elapsed {} " "total-elapsed {}".format( epoch, test_fscore, format_elapsed(test_start_time), format_elapsed(start_time), )) train_trees = latent_tree.build_dynamicRBT_trees(train_chunk_insts) train_trees = [(x, tree.convert(), chunks, latentscope) for x, tree, chunks, latentscope in train_trees] for epoch in itertools.count(start=1): if args.epochs is not None and epoch > args.epochs: break np.random.shuffle(train_chunk_insts) epoch_start_time = time.time() for start_index in range(0, len(train_chunk_insts), args.batch_size): dy.renew_cg() batch_losses = [] for x, tree, chunks, latentscope in train_trees[ start_index:start_index + args.batch_size]: discard = False for chunk in chunks: length = chunk[2] - chunk[1] if length > args.maxllimit: discard = True break if discard: continue print('discard') sentence = [(parse.XX, ch) for ch in x] if args.parser_type == "top-down": _, loss = parser.parse(sentence, tree, args.explore) else: _, loss = parser.parse(sentence, tree, chunks, latentscope) batch_losses.append(loss) total_processed += 1 current_processed += 1 batch_loss = dy.average(batch_losses) batch_loss_value = batch_loss.scalar_value() batch_loss.backward() trainer.update() print("Epoch {:,} " "batch {:,}/{:,} " "processed {:,} " "batch-loss {:.4f} " "epoch-elapsed {} " "total-elapsed {}".format( epoch, start_index // args.batch_size + 1, int(np.ceil(len(train_chunk_insts) / args.batch_size)), total_processed, batch_loss_value, format_elapsed(epoch_start_time), format_elapsed(start_time), ), flush=True) if current_processed >= check_every: current_processed -= check_every if epoch > 7: check_dev()
def __init__(self, graphs, embeddings, assoc_mode=BILINEAR_MODE, reg=0.0, dropout=0.0, no_assoc=False, model_path=None, ergm_path=None, path_only_init=False): """ :param graphs: dictionary of {relation:CSR-format graph}s, node-aligned :param embeddings: list of numpy array embeddings, indices aligned to nodes :param model_path: optional path for files with pre-trained association model (read by super) :param ergm_path: optional path for files with pre-trained model :param path_only_init: model_path only used for initialization """ # input validation AssociationModel.__init__(self, graphs, embeddings, assoc_mode, dropout, model_path=model_path) # raw members self.no_assoc = no_assoc self.regularize = reg # cache members self.cache = {} self.edge_counts = self.add_cache_dict('ec') # keys are single relations self.mutual_edge_counts = self.add_cache_dict('mec') # keys are unordered relation pairs self.two_path_counts = self.add_cache_dict('tpc') # keys are ordered relation pairs self.transitive_closure_counts = self.add_cache_dict('tcc') # keys are ordered relation triplets self.directed_triangle_counts = self.add_cache_dict('dtc') # keys are ordered relation triplets self.in_degs = self.add_cache_dict('ins') # keys are single relations, values are big lists self.out_degs = self.add_cache_dict('outs') # keys are single relations, values are big lists self.in_one_star_counts = self.add_cache_dict('i1sc') # keys are single relations self.out_one_star_counts = self.add_cache_dict('o1sc') # keys are single relations self.in_two_star_counts = self.add_cache_dict('i2sc') # keys are unordered relation pairs self.out_two_star_counts = self.add_cache_dict('o2sc') # keys are unordered relation pairs self.in_three_star_counts = self.add_cache_dict('i3sc') # keys are unordered relation triplets self.out_three_star_counts = self.add_cache_dict('o3sc') # keys are unordered relation triplets # 'at least k' stars - 'one/two/three plus' self.in_one_p_star_counts = self.add_cache_dict('i1psc') # keys are single relations self.out_one_p_star_counts = self.add_cache_dict('o1psc') # keys are single relations self.in_two_p_star_counts = self.add_cache_dict('i2psc') # keys are unordered relation pairs self.out_two_p_star_counts = self.add_cache_dict('o2psc') # keys are unordered relation pairs self.in_three_p_star_counts = self.add_cache_dict('i3psc') # keys are unordered relation triplets self.out_three_p_star_counts = self.add_cache_dict('o3psc') # keys are unordered relation triplets self.missing_node_indices = [] # updates during training (NOT SURE IF NEEDED) timeprint('computing ERGM features...') self.init_ergm_features() # populates self.feature_vals timeprint('finished! computed {} features'.format(len(self.feature_vals))) timeprint('{} non-zero features'.format(np.count_nonzero(list(self.feature_vals.values())))) # documentationy again, for efficient updates encountered_features = list(self.feature_vals.keys()) # canonical ordering from now on if ergm_path is not None: ergm_model_path = ergm_path elif (model_path is not None) and (not path_only_init): ergm_model_path = model_path else: ergm_model_path = None if ergm_model_path is None: self.feature_set = encountered_features else: self.feature_set = pickle.load(open(ergm_model_path + '.feats')) assert sorted(self.feature_set) == sorted(encountered_features) if ergm_model_path is None: self.ergm_weights = self.model.add_parameters(len(self.feature_set)) if model_path is None and ergm_model_path is None: # 'model_path is not None' is initialized in super() # TODO support other association modes (affects downstream) if self.no_assoc: self.word_assoc_weights = {r:self.model.add_parameters((self.emb_dim, self.emb_dim), init=dy.ConstInitializer(0.0)) for r in self.relation_names} else: self.word_assoc_weights = {r:self.model.add_parameters((self.emb_dim, self.emb_dim)) for r in self.relation_names} elif ergm_model_path is not None: pc = dy.ParameterCollection() dy.load(ergm_model_path + '.dyn', pc) pc_list = pc.parameters_list() i = 0 self.ergm_weights = pc_list[i] if not path_only_init: self.word_assoc_weights = {} rel_order = self.relation_names for r in rel_order: i += 1 self.word_assoc_weights[r] = pc_list[i] i += 1 assert i == len(pc_list),\ '{} relation params read but length is {}'.format(i, len(pc_list)) self.dy_score = self.ergm_score() self.score = self.dy_score.scalar_value() self.score_is_stale = False timeprint('finished initialization. initial ERGM score = {}'.format(self.score))
def test_save_load(self): dy.save(self.file, [self.b]) [b] = dy.load(self.file, self.m2)
def load_model(mode_file_name): m = dy.ParameterCollection() return [m] + list(dy.load(mode_file_name, m))
LAS, UAS = [ float(line.strip().split()[-2]) for line in open('score_tmp').readlines()[:2] ] print('LAS %.2f, UAS %.2f' % (LAS, UAS)) #os.system('rm tmp score_tmp') return LAS, UAS if __name__ == "__main__": args, config = parser_arg_cfg() # load model with high level save/load API load_model_path = config.get("load", "load_model_path") pc = dy.ParameterCollection() biaffine_parser = dy.load(load_model_path, pc)[0] # get vocabs from the model, which is then used for create fields vocab_form, vocab_upos, vocab_deprel = biaffine_parser.vocab_form, biaffine_parser.vocab_pos, biaffine_parser.vocab_deprel # create data fields for building test dataset, vocabs is extracted from model # instead of built from data itself f_form = pytext.data.Field(lower=True, tokenize=list, include_lengths=True) f_upos = pytext.data.Field(tokenize=list) f_head = pytext.data.Field(use_vocab=False, pad_token=0) f_deprel = pytext.data.Field(tokenize=list) f_form.vocab = vocab_form f_upos.vocab = vocab_upos f_deprel.vocab = vocab_deprel # build test dataset
def main(): # Configuration file processing argparser = argparse.ArgumentParser() argparser.add_argument('--config_file', default='../configs/debug.cfg') argparser.add_argument('--continue_training', action='store_true', help='Load model Continue Training') argparser.add_argument('--name', default='experiment', help='The name of the experiment.') argparser.add_argument('--model', default='s2s', help='s2s: seq2seq-head-selection-model' 's2tBFS: seq2tree-BFS-decoder-model' 's2tDFS: seq2tree-DFS-decoder-model') argparser.add_argument('--gpu', default='0', help='GPU ID (-1 to cpu)') args, extra_args = argparser.parse_known_args() cfg = IniConfigurator(args.config_file, extra_args) # Logger setting logger = dual_channel_logger( __name__, file_path=cfg.LOG_FILE, file_model='w', formatter='%(asctime)s - %(levelname)s - %(message)s', time_formatter='%m-%d %H:%M') from eval.script_evaluator import ScriptEvaluator # DyNet setting os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu import dynet_config dynet_config.set(mem=cfg.DYNET_MEM, random_seed=cfg.DYNET_SEED) dynet_config.set_gpu() import dynet as dy from models.token_representation import TokenRepresentation from antu.nn.dynet.seq2seq_encoders import DeepBiRNNBuilder, orthonormal_VanillaLSTMBuilder from models.graph_nn_decoder import GraphNNDecoder # Build the dataset of the training process # Build data reader data_reader = PTBReader( field_list=['word', 'tag', 'head', 'rel'], root='0\t**root**\t_\t**rcpos**\t**rpos**\t_\t0\t**rrel**\t_\t_', spacer=r'[\t]', ) # Build vocabulary with pretrained glove vocabulary = Vocabulary() g_word, _ = glove_reader(cfg.GLOVE) pretrained_vocabs = {'glove': g_word} vocabulary.extend_from_pretrained_vocab(pretrained_vocabs) # Setup datasets datasets_settings = { 'train': DatasetSetting(cfg.TRAIN, True), 'dev': DatasetSetting(cfg.DEV, False), 'test': DatasetSetting(cfg.TEST, False), } datasets = PTBDataset(vocabulary, datasets_settings, data_reader) counters = {'word': Counter(), 'tag': Counter(), 'rel': Counter()} datasets.build_dataset(counters, no_pad_namespace={'rel'}, no_unk_namespace={'rel'}) # Build model # Parameter pc = dy.ParameterCollection() trainer = dy.AdamTrainer(pc, alpha=cfg.LR, beta_1=cfg.ADAM_BETA1, beta_2=cfg.ADAM_BETA2, eps=cfg.EPS) # Token Representation Layer token_repre = TokenRepresentation(pc, cfg, datasets.vocabulary) # BiLSTM Encoder Layer encoder = DeepBiRNNBuilder(pc, cfg.ENC_LAYERS, token_repre.token_dim, cfg.ENC_H_DIM, orthonormal_VanillaLSTMBuilder) # GNN Decoder Layer decoder = GraphNNDecoder(pc, cfg, datasets.vocabulary) # PTB Evaluator my_eval = ScriptEvaluator(['Valid', 'Test'], datasets.vocabulary) # Build Training Batch def cmp(ins): return len(ins['word']) train_batch = datasets.get_batches('train', cfg.TRAIN_BATCH_SIZE, True, cmp, True) valid_batch = list( datasets.get_batches('dev', cfg.TEST_BATCH_SIZE, False, cmp, False)) test_batch = list( datasets.get_batches('test', cfg.TEST_BATCH_SIZE, False, cmp, False)) # Train model BEST_DEV_LAS = BEST_DEV_UAS = BEST_ITER = cnt_iter = 0 valid_loss = [[] for i in range(cfg.GRAPH_LAYERS + 3)] logger.info("Experiment name: %s" % args.name) SHA = os.popen('git log -1 | head -n 1 | cut -c 8-13').readline().rstrip() logger.info('Git SHA: %s' % SHA) while cnt_iter < cfg.MAX_ITER: dy.renew_cg() cnt_iter += 1 indexes, masks, truth = train_batch.__next__() vectors = token_repre(indexes, True) vectors = encoder(vectors, None, cfg.RNN_DROP, cfg.RNN_DROP, np.array(masks['1D']).T, True) loss, part_loss = decoder(vectors, masks, truth, True, True) for i, l in enumerate([loss] + part_loss): valid_loss[i].append(l.value()) loss.backward() trainer.learning_rate = cfg.LR * cfg.LR_DECAY**(cnt_iter / cfg.LR_ANNEAL) trainer.update() if cnt_iter % cfg.VALID_ITER: continue # Validation for i in range(len(valid_loss)): valid_loss[i] = str(round(np.mean(valid_loss[i]), 2)) avg_loss = ', '.join(valid_loss) logger.info("") logger.info("Iter: %d-%d, Avg_loss: %s, LR (%f), Best (%d)" % (cnt_iter / cfg.VALID_ITER, cnt_iter, avg_loss, trainer.learning_rate, BEST_ITER)) valid_loss = [[] for i in range(cfg.GRAPH_LAYERS + 3)] my_eval.clear('Valid') for indexes, masks, truth in valid_batch: dy.renew_cg() vectors = token_repre(indexes, False) vectors = encoder(vectors, None, cfg.RNN_DROP, cfg.RNN_DROP, np.array(masks['1D']).T, False) pred = decoder(vectors, masks, None, False, True) my_eval.add_truth('Valid', truth) my_eval.add_pred('Valid', pred) dy.save(cfg.LAST_FILE, [token_repre, encoder, decoder]) if my_eval.evaluation('Valid', cfg.PRED_DEV, cfg.DEV): BEST_ITER = cnt_iter / cfg.VALID_ITER os.system('cp %s.data %s.data' % (cfg.LAST_FILE, cfg.BEST_FILE)) os.system('cp %s.meta %s.meta' % (cfg.LAST_FILE, cfg.BEST_FILE)) # Just record test result my_eval.clear('Test') for indexes, masks, truth in test_batch: dy.renew_cg() vectors = token_repre(indexes, False) vectors = encoder(vectors, None, cfg.RNN_DROP, cfg.RNN_DROP, np.array(masks['1D']).T, False) pred = decoder(vectors, masks, None, False, True) my_eval.add_truth('Test', truth) my_eval.add_pred('Test', pred) my_eval.evaluation('Test', cfg.PRED_TEST, cfg.TEST) my_eval.print_best_result('Valid') test_pc = dy.ParameterCollection() token_repre, encoder, decoder = dy.load(cfg.BEST_FILE, test_pc) my_eval.clear('Test') test_batch = datasets.get_batches('test', cfg.TEST_BATCH_SIZE, False, cmp, False) for indexes, masks, truth in test_batch: dy.renew_cg() vectors = token_repre(indexes, False) vectors = encoder(vectors, None, cfg.RNN_DROP, cfg.RNN_DROP, np.array(masks['1D']).T, False) pred = decoder(vectors, masks, None, False, True) my_eval.add_truth('Test', truth) my_eval.add_pred('Test', pred) my_eval.evaluation('Test', cfg.PRED_TEST, cfg.TEST)
def load_from_disk(self, filename): (self.builder, self.lookup, self.R, self.bias) = dy.load(filename, model)
def load_or_create_model(args, parses_for_vocab): components = args.model_path_base.split('/') directory = '/'.join(components[:-1]) if os.path.isdir(directory): relevant_files = [f for f in os.listdir(directory) if f.startswith(components[-1])] else: relevant_files = [] assert len(relevant_files) <= 2, "Multiple possibilities {}".format(relevant_files) if len(relevant_files) > 0: print("Loading model from {}...".format(args.model_path_base)) model = dy.ParameterCollection() [parser] = dy.load(args.model_path_base, model) else: assert parses_for_vocab is not None print("Constructing vocabularies using train parses...") tag_vocab = vocabulary.Vocabulary() tag_vocab.index(parse.START) tag_vocab.index(parse.STOP) word_vocab = vocabulary.Vocabulary() word_vocab.index(parse.START) word_vocab.index(parse.STOP) word_vocab.index(parse.UNK) label_vocab = vocabulary.Vocabulary() label_vocab.index(()) for tree in parses_for_vocab: nodes = [tree] while nodes: node = nodes.pop() if isinstance(node, trees.InternalParseNode): label_vocab.index(node.label) nodes.extend(reversed(node.children)) else: assert isinstance(node, LeafParseNode) tag_vocab.index(node.tag) word_vocab.index(node.word) tag_vocab.freeze() word_vocab.freeze() label_vocab.freeze() print("Initializing model...") model = dy.ParameterCollection() parser = parse.Parser( model, tag_vocab, word_vocab, label_vocab, None, args.word_embedding_dim, args.lstm_layers, args.lstm_dim, args.label_hidden_dim, None, args.dropout, not args.no_elmo ) return parser, model
def test_on_parses(args): if not os.path.exists(args.experiment_directory): os.mkdir(args.experiment_directory) model = dy.ParameterCollection() [parser] = dy.load(args.model_path_base, model) treebank = trees.load_trees(args.input_file, strip_top=True, filter_none=True) output = [tree.linearize() for tree in treebank] with open(os.path.join(args.experiment_directory, 'parses.txt'), 'w') as f: f.write('\n'.join(output)) sentence_embeddings = h5py.File(args.elmo_embeddings_file_path, 'r') test_predicted = [] start_time = time.time() total_log_likelihood = 0 total_confusion_matrix = {} total_turned_off = 0 ranks = [] num_correct = 0 total = 0 for tree_index, tree in enumerate(treebank): if tree_index % 100 == 0: print(tree_index) dy.renew_cg() sentence = [(leaf.tag, leaf.word) for leaf in tree.leaves] elmo_embeddings_np = sentence_embeddings[str(tree_index)][:, :, :] assert elmo_embeddings_np.shape[1] == len(sentence), ( elmo_embeddings_np.shape[1], len(sentence), [word for pos, word in sentence]) elmo_embeddings = dy.inputTensor(elmo_embeddings_np) predicted, (additional_info, c, t) = parser.span_parser(sentence, is_train=False, elmo_embeddings=elmo_embeddings) num_correct += c total += t rank = additional_info[3] ranks.append(rank) total_log_likelihood += additional_info[-1] test_predicted.append(predicted.convert()) print('pos accuracy', num_correct / total) print("total time", time.time() - start_time) print("total loglikelihood", total_log_likelihood) print("total turned off", total_turned_off) print(total_confusion_matrix) print(ranks) print("avg", np.mean(ranks), "median", np.median(ranks)) dev_fscore_without_labels = evaluate.evalb('EVALB/', treebank, test_predicted, args=args, erase_labels=True, name="without-labels") print("dev-fscore without labels", dev_fscore_without_labels) dev_fscore_without_labels = evaluate.evalb('EVALB/', treebank, test_predicted, args=args, erase_labels=True, flatten=True, name="without-label-flattened") print("dev-fscore without labels and flattened", dev_fscore_without_labels) dev_fscore_without_labels = evaluate.evalb('EVALB/', treebank, test_predicted, args=args, erase_labels=False, flatten=True, name="flattened") print("dev-fscore with labels and flattened", dev_fscore_without_labels) test_fscore = evaluate.evalb('EVALB/', treebank, test_predicted, args=args, name="regular") print( "test-fscore {} " "test-elapsed {}".format( test_fscore, format_elapsed(start_time), ) ) with open(os.path.join(args.experiment_directory, "confusion_matrix.pickle"), "wb") as f: pickle.dump(total_confusion_matrix, f)
def main(): # Configuration file processing argparser = argparse.ArgumentParser() argparser.add_argument('--config_file', default='../configs/debug.cfg') argparser.add_argument('--continue_training', action='store_true', help='Load model Continue Training') argparser.add_argument('--name', default='experiment', help='The name of the experiment.') argparser.add_argument('--model', default='s2s', help='s2s: seq2seq-head-selection-model' 's2tDFS: seq2tree-DFS-decoder-model') argparser.add_argument('--gpu', default='0', help='GPU ID (-1 to cpu)') args, extra_args = argparser.parse_known_args() cfg = IniConfigurator(args.config_file, extra_args) # Logger setting logger = dual_channel_logger( __name__, file_path=cfg.LOG_FILE, file_model='w', formatter='%(asctime)s - %(levelname)s - %(message)s', time_formatter='%m-%d %H:%M') from eval.script_evaluator import ScriptEvaluator # DyNet setting os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu import dynet_config dynet_config.set(mem=cfg.DYNET_MEM, random_seed=cfg.DYNET_SEED) dynet_config.set_gpu() import dynet as dy from models.token_representation import TokenRepresentation from antu.nn.dynet.seq2seq_encoders import DeepBiRNNBuilder, orthonormal_VanillaLSTMBuilder from models.graph_nn_decoder import GraphNNDecoder from models.jackknife_decoder import JackKnifeGraphNNDecoder # Build the dataset of the training process # Build data reader data_reader = PTBReader( field_list=['word', 'tag', 'head', 'rel'], root='0\t**root**\t_\t**rcpos**\t**rpos**\t_\t0\t**rrel**\t_\t_', spacer=r'[\t]',) # Build vocabulary with pretrained glove vocabulary = Vocabulary() g_word, _ = glove_reader(cfg.GLOVE) pretrained_vocabs = {'glove': g_word} vocabulary.extend_from_pretrained_vocab(pretrained_vocabs) # Setup datasets datasets_settings = {'train': DatasetSetting(cfg.TRAIN, True), 'dev': DatasetSetting(cfg.DEV, False), 'test': DatasetSetting(cfg.TEST, False), } datasets = PTBDataset(vocabulary, datasets_settings, data_reader) counters = {'word': Counter(), 'tag': Counter(), 'rel': Counter()} datasets.build_dataset(counters, no_pad_namespace={'rel'}, no_unk_namespace={'rel'}) # Build model # Parameter pc = dy.ParameterCollection() LR = 0.0005 trainer = dy.AdamTrainer(pc, LR, cfg.ADAM_BETA1, cfg.ADAM_BETA2, cfg.EPS) # Token Representation Layer token_repre = TokenRepresentation(pc, cfg, datasets.vocabulary, include_pos=True) # BiLSTM Encoder Layer #encoder = BiaffineAttention() #encoder = MultiHeadedAttention(pc, 10, token_repre.token_dim) #encoder = MultiLayerMultiHeadAttention(pc, 10, token_repre.token_dim, num_layers=1) #encoder = MyMultiHeadAttention(None, 6, token_repre.token_dim, 32, 32, model=pc) #encoder = LabelAttention(None, token_repre.token_dim, 128, 128, 112, 128, use_resdrop=True, q_as_matrix=False, residual_dropout=0.1, attention_dropout=0.1, d_positional=None, model=pc) # encoder = Encoder(None, token_repre.token_dim, # num_layers=1, num_heads=2, d_kv = 32, d_ff=1024, d_l=112, # d_positional=None, # num_layers_position_only=0, # relu_dropout=0.1, residual_dropout=0.1, attention_dropout=0.1, # use_lal=True, # lal_d_kv=128, # lal_d_proj=128, # lal_resdrop=True, # lal_pwff=True, # lal_q_as_matrix=False, # lal_partitioned=True, # model=pc) #encoder = ScaledDotProductAttention(pc, 10) encoder = DeepBiRNNBuilder(pc, cfg.ENC_LAYERS, token_repre.token_dim, cfg.ENC_H_DIM, orthonormal_VanillaLSTMBuilder) # GNN Decoder Layer decoder = GraphNNDecoder(pc, cfg, datasets.vocabulary) #decoder = JackKnifeGraphNNDecoder(pc, cfg, datasets.vocabulary) # PTB Evaluator my_eval = ScriptEvaluator(['Valid', 'Test'], datasets.vocabulary) #dy.save(cfg.LAST_FILE, [token_repre, encoder, decoder]) #exit(0) # Build Training Batch def cmp(ins): return len(ins['word']) train_batch = datasets.get_batches('train', cfg.TRAIN_BATCH_SIZE, True, cmp, True) valid_batch = list(datasets.get_batches('dev', cfg.TEST_BATCH_SIZE, False, cmp, False)) test_batch = list(datasets.get_batches('test', cfg.TEST_BATCH_SIZE, False, cmp, False)) #print('-----------------------') # print('TRAIN BATCH IS: ') # # print(train_batch) # indexes, masks, truth = train_batch.__next__() # print(indexes) # print('------------------',end='\n\n\n\n\n\n\n') # print(len(indexes)) # exit(0) # exit(0) # for k in indexes: # print(k) #print(indexes) #print(masks) # Train model BEST_DEV_LAS = BEST_DEV_UAS = BEST_ITER = 0 cnt_iter = -cfg.WARM * cfg.GRAPH_LAYERS valid_loss = [[] for i in range(cfg.GRAPH_LAYERS+3)] logger.info("Experiment name: %s" % args.name) SHA = os.popen('git log -1 | head -n 1 | cut -c 8-13').readline().rstrip() logger.info('Git SHA: %s' % SHA) while cnt_iter < cfg.MAX_ITER: print(cnt_iter, cfg.MAX_ITER) #dy.renew_cg() dy.renew_cg(immediate_compute = True, check_validity = True) cnt_iter += 1 indexes, masks, truth = train_batch.__next__() vectors = token_repre(indexes, True) #vectors = encoder(vectors, np.array(masks['1D']).T) #print(vectors.npvalue) #vectors= encoder(vectors, vectors, vectors, np.array(masks['1D']).T) #vectors= encoder(vectors, vectors, vectors, np.array(masks['1D']).T, cfg.RNN_DROP) vectors = encoder(vectors, None, cfg.RNN_DROP, cfg.RNN_DROP, np.array(masks['1D']).T, False, True) loss, part_loss = decoder(vectors, masks, truth, cnt_iter, True, True) for i, l in enumerate([loss]+part_loss): valid_loss[i].append(l.value()) loss.backward() trainer.learning_rate = LR*cfg.LR_DECAY**(max(cnt_iter, 0)/cfg.LR_ANNEAL) #trainer.learning_rate = cfg.LR*cfg.LR_DECAY**(max(cnt_iter, 0)/cfg.LR_ANNEAL) trainer.update() if cnt_iter % cfg.VALID_ITER: continue # Validation for i in range(len(valid_loss)): valid_loss[i] = str(round(np.mean(valid_loss[i]), 2)) avg_loss = ', '.join(valid_loss) logger.info("") logger.info("Iter: %d-%d, Avg_loss: %s, LR (%f), Best (%d)" % (cnt_iter/cfg.VALID_ITER, cnt_iter, avg_loss, trainer.learning_rate, BEST_ITER)) valid_loss = [[] for i in range(cfg.GRAPH_LAYERS+3)] my_eval.clear('Valid') for indexes, masks, truth in valid_batch: dy.renew_cg() vectors = token_repre(indexes, False) vectors = encoder(vectors, np.array(masks['1D']).T) #vectors= encoder(vectors, vectors, vectors, np.array(masks['1D']).T) #vectors = encoder(vectors, vectors, vectors, np.array(masks['1D']).T, cfg.RNN_DROP) #vectors = encoder(vectors, None, cfg.RNN_DROP, cfg.RNN_DROP, np.array(masks['1D']).T, False, False) pred = decoder(vectors, masks, None, cnt_iter, False, True) my_eval.add_truth('Valid', truth) my_eval.add_pred('Valid', pred) dy.save(cfg.LAST_FILE, [token_repre, encoder, decoder]) if my_eval.evaluation('Valid', cfg.PRED_DEV, cfg.DEV): BEST_ITER = cnt_iter/cfg.VALID_ITER os.system('cp %s.data %s.data' % (cfg.LAST_FILE, cfg.BEST_FILE)) os.system('cp %s.meta %s.meta' % (cfg.LAST_FILE, cfg.BEST_FILE)) # Just record test result my_eval.clear('Test') for indexes, masks, truth in test_batch: dy.renew_cg() vectors = token_repre(indexes, False) vectors = encoder(vectors, np.array(masks['1D']).T) #vectors= encoder(vectors, vectors, vectors, np.array(masks['1D']).T) #vectors = encoder(vectors, vectors, vectors, np.array(masks['1D']).T, cfg.RNN_DROP) #vectors = encoder(vectors, None, cfg.RNN_DROP, cfg.RNN_DROP, np.array(masks['1D']).T, False, False) pred = decoder(vectors, masks, None, cnt_iter, False, True) my_eval.add_truth('Test', truth) my_eval.add_pred('Test', pred) my_eval.evaluation('Test', cfg.PRED_TEST, cfg.TEST) my_eval.print_best_result('Valid') # Final Test test_pc = dy.ParameterCollection() token_repre, encoder, decoder = dy.load(cfg.BEST_FILE, test_pc) my_eval.clear('Test') for indexes, masks, truth in test_batch: dy.renew_cg() vectors = token_repre(indexes, False) vectors = encoder(vectors, np.array(masks['1D']).T) #vectors= encoder(vectors, vectors, vectors, np.array(masks['1D']).T) #vectors = encoder(vectors, vectors, vectors, np.array(masks['1D']).T, cfg.RNN_DROP) #vectors = encoder(vectors, None, cfg.RNN_DROP, cfg.RNN_DROP, np.array(masks['1D']).T, False, False) pred = decoder(vectors, masks, None, 0, False, True) my_eval.add_truth('Test', truth) my_eval.add_pred('Test', pred) my_eval.evaluation('Test', cfg.PRED_TEST, cfg.TEST)