Example #1
0
 def save_shared_parameters(self):
     print("Saving the encoder parameter")
     # self.word_embedding.save("models/word_embedding.m")
     dy.save("basename", [
         self.char_rnn.char_emb, self.char_rnn.fw_lstm,
         self.char_rnn.bw_lstm, self.word_embedding, self.bilstm
     ])
Example #2
0
    def save_checkpoint(self, save_model=True):
        assert self.model is not None, 'no model built'

        if save_model:
            dy.save(self.model_checkpoint_path, [self.lm])

        self.word_vocab.save(self.word_vocab_path)
        self.label_vocab.save(self.label_vocab_path)

        with open(self.state_checkpoint_path, 'w') as f:
            state = {
                'model': 'rnn-lm',
                'multitask': self.multitask,
                'num-params': int(self.lm.num_params),

                'num-epochs': self.current_epoch,
                'num-updates': self.num_updates,
                'elapsed': self.timer.format_elapsed(),
                'current-lr': self.get_lr(),

                'best-dev-perplexity': self.best_dev_perplexity,
                'best-dev-perplexity-epoch': self.best_dev_epoch,
                'test-perplexity': self.test_perplexity,
            }
            json.dump(state, f, indent=4)
Example #3
0
def check_performance_and_save(parser,
                               best_dev_fscore,
                               best_dev_model_path,
                               treebank,
                               sentence_embeddings,
                               args):
    dev_fscore, dev_start_time = check_performance(parser,
                                                   treebank,
                                                   sentence_embeddings,
                                                   args)

    print('dev-fscore {}\ndev-elapsed {}'.format(dev_fscore, format_elapsed(dev_start_time)))

    if dev_fscore.fscore > best_dev_fscore:
        if best_dev_model_path is not None:
            for ext in [".data", ".meta"]:
                path = best_dev_model_path + ext
                if os.path.exists(path):
                    print("Removing previous model file {}...".format(path))
                    os.remove(path)

        best_dev_fscore = dev_fscore.fscore
        best_dev_model_path = "{}_dev={:.2f}".format(args.model_path_base, dev_fscore.fscore)
        print("Saving new best model to {}...".format(best_dev_model_path))
        dy.save(best_dev_model_path, [parser])
    return best_dev_fscore, best_dev_model_path
Example #4
0
 def save_model(self):
     self.finalize()
     d = {
         "param_keys": list(self.params.keys()),
         "max_num_labels": self.max_num_labels,
         "layers": self.layers,
         "layer_dim": self.layer_dim,
         "output_dim": self.output_dim,
         "activation": self.activation_str,
         "init": self.init_str,
     }
     d.update(self.save_extra())
     started = time.time()
     try:
         os.remove(self.filename)
         print("Removed existing '%s'." % self.filename)
     except OSError:
         pass
     print("Saving model to '%s'... " % self.filename, end="", flush=True)
     try:
         dy.save(self.filename, self.params.values())
         print("Done (%.3fs)." % (time.time() - started))
     except ValueError as e:
         print("Failed saving model: %s" % e)
     return d
def train_network(config, saver, parser, embeddings, train_examples, dev_set,
                  test_set):
    best_dev_UAS = 0
    model = ParserModel(config, embeddings, parser)
    parser.model = model
    for epoch in range(config.n_epochs):
        print "Epoch {:} out of {:}".format(epoch + 1, config.n_epochs)
        dev_UAS = run_epoch(model, config, parser, train_examples, dev_set)
        if dev_UAS > best_dev_UAS:
            best_dev_UAS = dev_UAS
            if not saver:
                print "New best dev UAS! Saving model in ./data/weights/parser.weights"
                dy.save('./data/weights/parser.weights',
                        [model.pW, model.pB1, model.pU, model.pB2])

    if saver:
        print 80 * "="
        print "TESTING"
        print 80 * "="
        print "Restoring the best model weights found on the dev set"
        model.pW, model.pB1, model.pU, model.pB2 = dy.load(
            './data/weights/parser.weights', model.m)
        print "Final evaluation on test set",
        UAS, dependencies = parser.parse(test_set)
        print "- test UAS: {:.2f}".format(UAS * 100.0)
        print "Writing predictions"
        with open('q2_test.predicted.pkl', 'w') as f:
            cPickle.dump(dependencies, f, -1)
        print "Done!"
Example #6
0
 def save(self, basename):
     '''save parameters, lookup parameters and builder objects to disk
     @param basename: string, file to save into, used in dy.save()
     @return: None
     '''
     obs = [self.pb, self.pe, self.pT]
     dy.save(basename, obs)
Example #7
0
    def save_checkpoint(self):
        assert self.model is not None, 'no model built'

        dy.save(self.model_checkpoint_path, [self.parser])

        self.word_vocab.save(self.word_vocab_path)
        self.label_vocab.save(self.label_vocab_path)
        self.action_vocab.save(self.action_vocab_path)

        with open(self.state_checkpoint_path, 'w') as f:
            state = {
                'model': self.model_type,
                'num-params': int(self.parser.num_params),
                'num-epochs': self.current_epoch,
                'num-updates': self.num_updates,
                'elapsed': self.timer.format_elapsed(),
                'current-lr': self.get_lr(),
                'best-dev-epoch': self.best_dev_epoch,
                'current-dev-fscore': self.current_dev_fscore,
                'best-dev-fscore': self.best_dev_fscore,
                'test-fscore': self.test_fscore,
                'current-dev-perplexity': self.current_dev_perplexity,
                'best-dev-perplexity': self.best_dev_perplexity,
                'test-perplexity': self.test_perplexity
            }
            json.dump(state, f, indent=4)
Example #8
0
 def save_to_disk(self, filename):
     dy.save(filename, [
         self.enc_fwd_lstm, self.enc_bwd_lstm, self.dec_lstm,
         self.input_lookup, self.attention_w1, self.attention_w2,
         self.attention_v, self.decoder_w, self.decoder_b,
         self.output_lookup
     ])
Example #9
0
 def save_model(self, path, model_version, fold=None):
     full_saving_path = os.path.join(path, "model_" + model_version)
     if not os.path.exists(full_saving_path):
         os.makedirs(full_saving_path)
     # list of all the vars we need to remove since these are dynet features (they will be saved soon separately
     nn_vars = [
         'W_emb', 'W_cnn', 'b_cnn', 'W_mlp', 'b_mlp', 'V_mlp', 'a_mlp'
     ]
     # saving these features (it will be saved in the desired directory under the model name)
     dy.save(
         os.path.join(full_saving_path,
                      'model_' + model_version + '_fold' + str(fold)),
         [getattr(self, i) for i in nn_vars])
     obj_to_save = copy.copy(self)
     # now setting these to None, since we cannot save them as is in a pickle type
     for n in nn_vars:
         setattr(obj_to_save, n, None)
     obj_to_save.model = None
     # converting default dict into dict, since pickle can only save dict objects and not defaultdict ones
     obj_to_save.w2i = dict(obj_to_save.w2i)
     obj_to_save.t2i = dict(obj_to_save.t2i)
     pickle.dump(
         obj_to_save,
         open(
             os.path.join(
                 full_saving_path,
                 'model_' + model_version + '_fold' + str(fold) + ".p"),
             "wb"))
Example #10
0
 def save_param_values(self, filename, values):
     remove_existing(filename + ".data", filename + ".meta")
     try:
         self.set_weight_decay_lambda(0.0)  # Avoid applying weight decay due to clab/dynet#1206, we apply it on load
         dy.save(filename, tqdm(values, desc="Saving model to '%s'" % filename, unit="param", file=sys.stdout))
         self.set_weight_decay_lambda()
     except ValueError as e:
         print("Failed saving model: %s" % e)
Example #11
0
 def save_model(self, filename):
     dy.renew_cg()
     with open(filename + "_output_w.txt", "w") as f:
         f.write(np.array_str(dy.parameter(self.output_w).npvalue()) + "\n")
     with open(filename + "_output_b.txt", "w") as f:
         f.write(np.array_str(dy.parameter(self.output_b).npvalue()) + "\n")
     dy.save("models/" + filename + "_train.model",
             [self.output_w, self.output_b, self.embeddings, self.RNN])
Example #12
0
 def save_param_values(self, filename, values):
     remove_existing(filename + ".data", filename + ".meta")
     try:
         self.set_weight_decay_lambda(0.0)  # Avoid applying weight decay due to clab/dynet#1206, we apply it on load
         dy.save(filename, tqdm(values, desc="Saving model to '%s'" % filename, unit="param", file=sys.stdout))
         self.set_weight_decay_lambda()
     except ValueError as e:
         print("Failed saving model: %s" % e)
Example #13
0
def save_latest_model(model_path_base, parser):
    latest_model_path = "{}_latest_model".format(model_path_base)
    for ext in [".data", ".meta"]:
        path = latest_model_path + ext
        if os.path.exists(path):
            print("Removing previous model file {}...".format(path))
            os.remove(path)

    print("Saving new model to {}...".format(latest_model_path))
    dy.save(latest_model_path, [parser])
Example #14
0
 def test_save_load(self):
     self.p.forward()
     self.p.backward()
     self.t.update()
     dy.renew_cg()
     v1 = self.p.value()
     dy.save(self.file, [self.p])
     [p2] = dy.load(self.file, self.m2)
     v2 = p2.value()
     self.assertTrue(np.allclose(v1, v2))
Example #15
0
 def test_save_load(self):
     self.p.forward()
     self.p.backward()
     self.t.update()
     dy.renew_cg()
     v1 = self.p.value()
     dy.save(self.file, [self.p])
     [p2] = dy.load(self.file, self.m2)
     v2 = p2.value()
     self.assertTrue(np.allclose(v1, v2))
Example #16
0
def train_network(params, ntags, train_data, dev_set):
    global telemetry_file, randstring, MIN_ACC
    prev_acc = 0
    m = params[0]
    t0 = time.clock()
    # train the network
    trainer = dy.SimpleSGDTrainer(m)
    total_loss = 0
    seen_instances = 0
    train_good = 0
    for train_x, train_y in train_data:
        dy.renew_cg()
        output = build_network(params, train_x)
        # l2 regularization did not look promising at all, so it's commented out
        loss = -dy.log(output[train_y]) + REG_LAMBDA * sum(
            [dy.l2_norm(p) for p in params[2:]])
        if train_y == np.argmax(output.npvalue()):
            train_good += 1
        seen_instances += 1
        total_loss += loss.value()
        loss.backward()
        trainer.update()

        if seen_instances % 20000 == 0:
            # measure elapsed seconds
            secs = time.clock() - t0
            t0 = time.clock()
            good = case = 0
            max_dev_instances = 70 * 1000
            dev_instances = 0
            for x_tuple, dev_y in dev_set:
                output = build_network(params, x_tuple)
                if np.argmax(output.npvalue()) == dev_y:
                    good += 1
                case += 1
                dev_instances += 1
                if dev_instances >= max_dev_instances:
                    break
            acc = float(good) / case
            print(
                "iterations: {}. train_accuracy: {} accuracy: {} avg loss: {} secs per 1000:{}"
                .format(seen_instances,
                        float(train_good) / 20000, acc,
                        total_loss / (seen_instances + 1), secs / 20))
            train_good = 0
            if acc > MIN_ACC and acc > prev_acc:
                print("saving.")
                dy.save("params_" + randstring, list(params)[1:])
                prev_acc = acc

            telemetry_file.write("{}\t{}\t{}\t{}\n".format(
                seen_instances, acc, total_loss / (seen_instances + 1),
                secs / 20))
    MIN_ACC = max(prev_acc, MIN_ACC)
 def fit(self, saver, parser, train_examples, dev_set):
     best_dev_UAS = 0
     for epoch in range(self.config.n_epochs):
         print "Epoch {:} out of {:}".format(epoch + 1,
                                             self.config.n_epochs)
         dev_UAS = self.run_epoch(parser, train_examples, dev_set)
         if dev_UAS > best_dev_UAS:
             best_dev_UAS = dev_UAS
             if saver:
                 print "New best dev UAS! Saving model in ./data/weights/parser.weights"
                 dy.save('./data/weights/parser.weights')
         print
Example #18
0
def predit_sql(sentence):
    tokens = sentence.strip().split()
    word_ids = [vocab_words.w2i.get(word, UNK) for word in tokens]
    tag_ids = [0 for t in tokens]
    pred_tags, pred_template, _ = build_tagging_graph(word_ids, tag_ids, 0,
                                                      builders, False)
    pred_complete = insert_tagged_tokens(tokens, pred_tags, pred_template)
    dy.save("model_new", [
        pEmbedding, pOutput, builders[0], builders[1], pHiddenTemplate,
        pOutputTemplate
    ])
    print(pred_tags)
    print("SQL query generated: ", pred_complete)
Example #19
0
    def save(self, filename, initial_weights=None, save_with_embeddings=True):
        # model payload
        if save_with_embeddings:
            np.save(filename + '-embs.npy', self.embeddings.as_array())
            # self.model.save(filename + '.dyn') # saves all embeddings - move next row to else
        dy.save(filename + '.dyn', [self.ergm_weights] +\
                [self.word_assoc_weights[r] for r in self.relation_names]) # order matters for loading

        # feature ordering
        pickle.dump(self.feature_set, open(filename + '.feats', 'wb'))

        # nice-to-read score summary
        if initial_weights is not None:
            self.save_weights(filename, initial_weights)
Example #20
0
    def save(self, filename, initial_weights=None, save_with_embeddings=True):
        # model payload
        if save_with_embeddings:
            np.save(filename + '-embs.npy', self.embeddings.as_array())
            # self.model.save(filename + '.dyn') # saves all embeddings - move next row to else
        dy.save(filename + '.dyn', [self.ergm_weights] +\
                [self.word_assoc_weights[r] for r in self.relation_names]) # order matters for loading

        # feature ordering
        pickle.dump(self.feature_set, open(filename + '.feats', 'wb'))
        
        # nice-to-read score summary
        if initial_weights is not None:
            self.save_weights(filename, initial_weights)
Example #21
0
    def save(self, file_name):
        members_to_save = []
        members_to_save.append(self.char_lookup)
        members_to_save.append(self.char_fwd_lstm)
        members_to_save.append(self.char_bwd_lstm)
        members_to_save.append(self.lstm_to_rep_params)
        members_to_save.append(self.lstm_to_rep_bias)
        members_to_save.append(self.mlp_out)
        members_to_save.append(self.mlp_out_bias)
        # self._model.save(file_name, members_to_save)
        dy.save(file_name, members_to_save)

        # character mapping saved separately
        cPickle.dump(self.c2i, open(file_name[:-4] + '.c2i', 'w'))
Example #22
0
def model_save_helper(mode, prefix, savable, options):
    if mode == "dynet":
        # noinspection PyArgumentList
        dn.save(prefix, [savable])
        with open(prefix + ".options", "wb") as f:
            pickle.dump(options, f)
    elif mode == "pickle":
        picklable = savable.get_picklable_obj()
        with open(prefix, "wb") as f:
            pickle.dump((options, picklable), f)
    elif mode == "pickle-gzip":
        picklable = savable.get_picklable_obj()
        with gzip.open(prefix, "wb") as f:
            pickle.dump((options, picklable), f)
    else:
        raise TypeError("Invalid model format.")
Example #23
0
    def check_dev():
        nonlocal best_dev_fscore
        nonlocal best_dev_model_path

        dev_start_time = time.time()

        dev_predicted = []
        for tree in dev_treebank:
            dy.renew_cg()
            sentence = [(leaf.tag, leaf.word) for leaf in tree.leaves()]
            predicted, _, _ = parser.parse(sentence)
            if args.parser_type == "bottom-up":
                dev_predicted.append(predicted)
            else:
                dev_predicted.append(predicted.convert())

        dev_fscore = evaluate.evalb(args.evalb_dir, dev_treebank,
                                    dev_predicted, args.parser_type)

        print("dev-fscore {} "
              "dev-elapsed {} "
              "total-elapsed {}".format(
                  dev_fscore,
                  format_elapsed(dev_start_time),
                  format_elapsed(start_time),
              ))

        if dev_fscore.fscore > best_dev_fscore:
            if best_dev_model_path is not None:
                for ext in [".data", ".meta"]:
                    path = best_dev_model_path + ext
                    if os.path.exists(path):
                        print(
                            "Removing previous model file {}...".format(path))
                        os.remove(path)

            best_dev_fscore = dev_fscore.fscore
            best_dev_model_path = "{}_dev={:.2f}".format(
                args.model_path_base, dev_fscore.fscore)
            print("Saving new best model to {}...".format(best_dev_model_path))
            dy.save(best_dev_model_path, [parser])
Example #24
0
    def check_dev():
        nonlocal best_dev_fscore
        nonlocal best_dev_model_path

        dev_start_time = time.time()

        dev_predicted = []
        #dev_gold = []

        #dev_gold = latent_tree.build_latent_trees(dev_chunk_insts)
        dev_gold = []
        for inst in dev_chunk_insts:
            chunks = util.inst2chunks(inst)
            dev_gold.append(chunks)

        for x, chunks in dev_chunk_insts:
            dy.renew_cg()
            #sentence = [(leaf.tag, leaf.word) for leaf in tree.leaves()]
            sentence = [(parse.XX, ch) for ch in x]
            predicted, _ = parser.parse(sentence)
            dev_predicted.append(predicted.convert().to_chunks())

        #dev_fscore = evaluate.evalb(args.evalb_dir, dev_gold, dev_predicted, args.expname + '.dev.') #evalb
        dev_fscore = evaluate.eval_chunks2(args.evalb_dir,
                                           dev_gold,
                                           dev_predicted,
                                           output_filename=args.expname +
                                           '.dev.txt')  # evalb

        print("dev-fscore {} "
              "dev-elapsed {} "
              "total-elapsed {}".format(
                  dev_fscore,
                  format_elapsed(dev_start_time),
                  format_elapsed(start_time),
              ))

        if dev_fscore.fscore > best_dev_fscore:
            if best_dev_model_path is not None:
                for ext in [".data", ".meta"]:
                    path = best_dev_model_path + ext
                    if os.path.exists(path):
                        print(
                            "Removing previous model file {}...".format(path))
                        os.remove(path)

            best_dev_fscore = dev_fscore.fscore
            best_dev_model_path = "{}_dev={:.2f}".format(
                args.model_path_base + "_" + args.expname, dev_fscore.fscore)
            print("Saving new best model to {}...".format(best_dev_model_path))
            dy.save(best_dev_model_path, [parser])

            test_start_time = time.time()
            test_predicted = []
            #test_gold = latent_tree.build_latent_trees(test_chunk_insts)
            test_gold = []
            for inst in test_chunk_insts:
                chunks = util.inst2chunks(inst)
                test_gold.append(chunks)

            ftreelog = open(args.expname + '.test.predtree.txt',
                            'w',
                            encoding='utf-8')

            for x, chunks in test_chunk_insts:
                dy.renew_cg()
                #sentence = [(leaf.tag, leaf.word) for leaf in tree.leaves()]
                sentence = [(parse.XX, ch) for ch in x]
                predicted, _ = parser.parse(sentence)
                pred_tree = predicted.convert()
                ftreelog.write(pred_tree.linearize() + '\n')
                test_predicted.append(pred_tree.to_chunks())

            ftreelog.close()

            #test_fscore = evaluate.evalb(args.evalb_dir, test_chunk_insts, test_predicted, args.expname + '.test.')
            test_fscore = evaluate.eval_chunks2(args.evalb_dir,
                                                test_gold,
                                                test_predicted,
                                                output_filename=args.expname +
                                                '.test.txt')  # evalb

            print("epoch {:,} "
                  "test-fscore {} "
                  "test-elapsed {} "
                  "total-elapsed {}".format(
                      epoch,
                      test_fscore,
                      format_elapsed(test_start_time),
                      format_elapsed(start_time),
                  ))
Example #25
0
def tagger(repre,
           mission,
           char_lstm,
           lin_dim,
           bi_lstm1,
           bi_lstm2,
           embed_size,
           epochs,
           lr,
           batch_size,
           train_file,
           dev_file,
           per_log,
           dev_per_log,
           saved_model_path,
           test_model_after_train=False):

    # read train and dev data sets
    train, vocab, labels = get_train_dataset(train_file, mission, repre)
    dev = read_data(dev_file, mission)

    # define vocabulary and help structures
    word2int = {w: i for i, w in enumerate(vocab)}
    label2int = {l: i for i, l in enumerate(labels)}
    vocab_size = len(vocab)
    num_labels = len(labels)

    # create a transducer classifier
    m = dy.ParameterCollection()
    trainer = dy.AdamTrainer(m, lr)  # define trainer # lr
    if repre == "a":
        transducer = biLSTMTaggerA(vocab_size, bi_lstm1, bi_lstm2, embed_size,
                                   num_labels, m)  # create classifier
        dev_accuracy = train_model(mission, train, dev, vocab, epochs,
                                   batch_size, trainer, transducer, word2int,
                                   label2int, per_log, dev_per_log)
        dy.save(saved_model_path + "_" + dev_accuracy, [
            transducer.params["lookup"], transducer.fw_builder1,
            transducer.bw_builder1, transducer.fw_builder2,
            transducer.bw_builder2, transducer.params["W"],
            transducer.params["b"]
        ])

    elif repre == "b":
        transducer = biLSTMTaggerB(vocab_size, char_lstm, bi_lstm1, bi_lstm2,
                                   embed_size, num_labels,
                                   m)  # create classifier
        dev_accuracy = train_model(mission, train, dev, vocab, epochs,
                                   batch_size, trainer, transducer, word2int,
                                   label2int, per_log, dev_per_log)
        dy.save(saved_model_path + "_" + dev_accuracy, [
            transducer.params["lookup"], transducer.char_builder,
            transducer.fw_builder1, transducer.bw_builder1,
            transducer.fw_builder2, transducer.bw_builder2,
            transducer.params["W"], transducer.params["b"]
        ])

    elif repre == "c":
        transducer = biLSTMTaggerC(vocab_size, bi_lstm1, bi_lstm2, embed_size,
                                   num_labels, m)  # create classifier
        dev_accuracy = train_model(mission, train, dev, vocab, epochs,
                                   batch_size, trainer, transducer, word2int,
                                   label2int, per_log, dev_per_log)
        dy.save(saved_model_path + "_" + dev_accuracy, [
            transducer.params["lookup"], transducer.fw_builder1,
            transducer.bw_builder1, transducer.fw_builder2,
            transducer.bw_builder2, transducer.params["W"],
            transducer.params["b"]
        ])

    else:
        transducer = biLSTMTaggerD(vocab_size, char_lstm, lin_dim, bi_lstm1,
                                   bi_lstm2, embed_size, num_labels,
                                   m)  # create classifier
        dev_accuracy = train_model(mission, train, dev, vocab, epochs,
                                   batch_size, trainer, transducer, word2int,
                                   label2int, per_log, dev_per_log)
        dy.save(saved_model_path + "_" + dev_accuracy, [
            transducer.params["lookup"], transducer.char_builder,
            transducer.params["W_con"], transducer.params["b_con"],
            transducer.fw_builder1, transducer.bw_builder1,
            transducer.fw_builder2, transducer.bw_builder2,
            transducer.params["W"], transducer.params["b"]
        ])

    if test_model_after_train == True:
        if mission == "NER":
            test_model_on_blind_set(
                "./ner/test", saved_model_path + "_" + dev_accuracy + ".ner",
                transducer, word2int, label2int, vocab)
        else:
            test_model_on_blind_set(
                "./pos/test", saved_model_path + "_" + dev_accuracy + ".pos",
                transducer, word2int, label2int, vocab)
Example #26
0
 def test_save_load(self):
     dy.save(self.file, [self.b])
     [b] = dy.load(self.file, self.m2)
Example #27
0
 def test_save_load_generator(self):
     dy.save(self.file, (x for x in [self.b]))
     [b] = list(dy.load_generator(self.file, self.m2))
Example #28
0
 def save(self, prefix):
     with open(prefix + ".options", "wb") as f:
         pickle.dump(self.options, f)
     # noinspection PyArgumentList
     dn.save(prefix, [self.network])
Example #29
0
 def save_to_disk(self, filename):
     dy.save(filename, [self.builder, self.lookup, self.R, self.bias])
Example #30
0
 def save(self, prefix):
     with open(prefix + ".options", "wb") as f:
         pickle.dump((self.options, self.statistics), f)
     # noinspection PyArgumentList
     dn.save(prefix, [self.container])
Example #31
0
 def save_to_disk(self, filename):
     dy.save(filename, [self.builder, self.lookup, self.R, self.bias])
Example #32
0
def main():
    # Configuration file processing
    argparser = argparse.ArgumentParser()
    argparser.add_argument('--config_file', default='../configs/debug.cfg')
    argparser.add_argument('--continue_training',
                           action='store_true',
                           help='Load model Continue Training')
    argparser.add_argument('--name',
                           default='experiment',
                           help='The name of the experiment.')
    argparser.add_argument('--model',
                           default='s2s',
                           help='s2s: seq2seq-head-selection-model'
                           's2tBFS: seq2tree-BFS-decoder-model'
                           's2tDFS: seq2tree-DFS-decoder-model')
    argparser.add_argument('--gpu', default='0', help='GPU ID (-1 to cpu)')
    args, extra_args = argparser.parse_known_args()
    cfg = IniConfigurator(args.config_file, extra_args)

    # Logger setting
    logger = dual_channel_logger(
        __name__,
        file_path=cfg.LOG_FILE,
        file_model='w',
        formatter='%(asctime)s - %(levelname)s - %(message)s',
        time_formatter='%m-%d %H:%M')
    from eval.script_evaluator import ScriptEvaluator

    # DyNet setting
    os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
    import dynet_config
    dynet_config.set(mem=cfg.DYNET_MEM, random_seed=cfg.DYNET_SEED)
    dynet_config.set_gpu()
    import dynet as dy
    from models.token_representation import TokenRepresentation
    from antu.nn.dynet.seq2seq_encoders import DeepBiRNNBuilder, orthonormal_VanillaLSTMBuilder
    from models.graph_nn_decoder import GraphNNDecoder

    # Build the dataset of the training process
    # Build data reader
    data_reader = PTBReader(
        field_list=['word', 'tag', 'head', 'rel'],
        root='0\t**root**\t_\t**rcpos**\t**rpos**\t_\t0\t**rrel**\t_\t_',
        spacer=r'[\t]',
    )
    # Build vocabulary with pretrained glove
    vocabulary = Vocabulary()
    g_word, _ = glove_reader(cfg.GLOVE)
    pretrained_vocabs = {'glove': g_word}
    vocabulary.extend_from_pretrained_vocab(pretrained_vocabs)
    # Setup datasets
    datasets_settings = {
        'train': DatasetSetting(cfg.TRAIN, True),
        'dev': DatasetSetting(cfg.DEV, False),
        'test': DatasetSetting(cfg.TEST, False),
    }
    datasets = PTBDataset(vocabulary, datasets_settings, data_reader)
    counters = {'word': Counter(), 'tag': Counter(), 'rel': Counter()}
    datasets.build_dataset(counters,
                           no_pad_namespace={'rel'},
                           no_unk_namespace={'rel'})

    # Build model
    # Parameter
    pc = dy.ParameterCollection()
    trainer = dy.AdamTrainer(pc,
                             alpha=cfg.LR,
                             beta_1=cfg.ADAM_BETA1,
                             beta_2=cfg.ADAM_BETA2,
                             eps=cfg.EPS)

    # Token Representation Layer
    token_repre = TokenRepresentation(pc, cfg, datasets.vocabulary)
    # BiLSTM Encoder Layer
    encoder = DeepBiRNNBuilder(pc, cfg.ENC_LAYERS, token_repre.token_dim,
                               cfg.ENC_H_DIM, orthonormal_VanillaLSTMBuilder)
    # GNN Decoder Layer
    decoder = GraphNNDecoder(pc, cfg, datasets.vocabulary)
    # PTB Evaluator
    my_eval = ScriptEvaluator(['Valid', 'Test'], datasets.vocabulary)

    # Build Training Batch
    def cmp(ins):
        return len(ins['word'])

    train_batch = datasets.get_batches('train', cfg.TRAIN_BATCH_SIZE, True,
                                       cmp, True)
    valid_batch = list(
        datasets.get_batches('dev', cfg.TEST_BATCH_SIZE, False, cmp, False))
    test_batch = list(
        datasets.get_batches('test', cfg.TEST_BATCH_SIZE, False, cmp, False))

    # Train model
    BEST_DEV_LAS = BEST_DEV_UAS = BEST_ITER = cnt_iter = 0
    valid_loss = [[] for i in range(cfg.GRAPH_LAYERS + 3)]
    logger.info("Experiment name: %s" % args.name)
    SHA = os.popen('git log -1 | head -n 1 | cut -c 8-13').readline().rstrip()
    logger.info('Git SHA: %s' % SHA)
    while cnt_iter < cfg.MAX_ITER:
        dy.renew_cg()
        cnt_iter += 1
        indexes, masks, truth = train_batch.__next__()
        vectors = token_repre(indexes, True)
        vectors = encoder(vectors, None, cfg.RNN_DROP, cfg.RNN_DROP,
                          np.array(masks['1D']).T, True)
        loss, part_loss = decoder(vectors, masks, truth, True, True)
        for i, l in enumerate([loss] + part_loss):
            valid_loss[i].append(l.value())
        loss.backward()
        trainer.learning_rate = cfg.LR * cfg.LR_DECAY**(cnt_iter /
                                                        cfg.LR_ANNEAL)
        trainer.update()

        if cnt_iter % cfg.VALID_ITER:
            continue

        # Validation
        for i in range(len(valid_loss)):
            valid_loss[i] = str(round(np.mean(valid_loss[i]), 2))
        avg_loss = ', '.join(valid_loss)
        logger.info("")
        logger.info("Iter: %d-%d, Avg_loss: %s, LR (%f), Best (%d)" %
                    (cnt_iter / cfg.VALID_ITER, cnt_iter, avg_loss,
                     trainer.learning_rate, BEST_ITER))

        valid_loss = [[] for i in range(cfg.GRAPH_LAYERS + 3)]
        my_eval.clear('Valid')
        for indexes, masks, truth in valid_batch:
            dy.renew_cg()
            vectors = token_repre(indexes, False)
            vectors = encoder(vectors, None, cfg.RNN_DROP, cfg.RNN_DROP,
                              np.array(masks['1D']).T, False)
            pred = decoder(vectors, masks, None, False, True)
            my_eval.add_truth('Valid', truth)
            my_eval.add_pred('Valid', pred)
        dy.save(cfg.LAST_FILE, [token_repre, encoder, decoder])
        if my_eval.evaluation('Valid', cfg.PRED_DEV, cfg.DEV):
            BEST_ITER = cnt_iter / cfg.VALID_ITER
            os.system('cp %s.data %s.data' % (cfg.LAST_FILE, cfg.BEST_FILE))
            os.system('cp %s.meta %s.meta' % (cfg.LAST_FILE, cfg.BEST_FILE))

        # Just record test result
        my_eval.clear('Test')
        for indexes, masks, truth in test_batch:
            dy.renew_cg()
            vectors = token_repre(indexes, False)
            vectors = encoder(vectors, None, cfg.RNN_DROP, cfg.RNN_DROP,
                              np.array(masks['1D']).T, False)
            pred = decoder(vectors, masks, None, False, True)
            my_eval.add_truth('Test', truth)
            my_eval.add_pred('Test', pred)
        my_eval.evaluation('Test', cfg.PRED_TEST, cfg.TEST)
    my_eval.print_best_result('Valid')

    test_pc = dy.ParameterCollection()
    token_repre, encoder, decoder = dy.load(cfg.BEST_FILE, test_pc)

    my_eval.clear('Test')
    test_batch = datasets.get_batches('test', cfg.TEST_BATCH_SIZE, False, cmp,
                                      False)
    for indexes, masks, truth in test_batch:
        dy.renew_cg()
        vectors = token_repre(indexes, False)
        vectors = encoder(vectors, None, cfg.RNN_DROP, cfg.RNN_DROP,
                          np.array(masks['1D']).T, False)
        pred = decoder(vectors, masks, None, False, True)
        my_eval.add_truth('Test', truth)
        my_eval.add_pred('Test', pred)
    my_eval.evaluation('Test', cfg.PRED_TEST, cfg.TEST)
Example #33
0
 def test_save_load(self):
     dy.save(self.file, [self.b])
     [b] = dy.load(self.file, self.m2)
Example #34
0
 def test_save_load_generator(self):
     dy.save(self.file, (x for x in [self.b]))
     [b] = list(dy.load_generator(self.file, self.m2))
Example #35
0
def main():
    # Configuration file processing
    argparser = argparse.ArgumentParser()
    argparser.add_argument('--config_file', default='../configs/debug.cfg')
    argparser.add_argument('--continue_training', action='store_true',
                           help='Load model Continue Training')
    argparser.add_argument('--name', default='experiment',
                           help='The name of the experiment.')
    argparser.add_argument('--model', default='s2s',
                           help='s2s: seq2seq-head-selection-model'
                           's2tDFS: seq2tree-DFS-decoder-model')
    argparser.add_argument('--gpu', default='0', help='GPU ID (-1 to cpu)')
    args, extra_args = argparser.parse_known_args()
    cfg = IniConfigurator(args.config_file, extra_args)

    # Logger setting
    logger = dual_channel_logger(
        __name__,
        file_path=cfg.LOG_FILE,
        file_model='w',
        formatter='%(asctime)s - %(levelname)s - %(message)s',
        time_formatter='%m-%d %H:%M')
    from eval.script_evaluator import ScriptEvaluator

    # DyNet setting
    os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
    import dynet_config
    dynet_config.set(mem=cfg.DYNET_MEM, random_seed=cfg.DYNET_SEED)
    dynet_config.set_gpu()
    import dynet as dy
    from models.token_representation import TokenRepresentation
    from antu.nn.dynet.seq2seq_encoders import DeepBiRNNBuilder, orthonormal_VanillaLSTMBuilder
    from models.graph_nn_decoder import GraphNNDecoder
    from models.jackknife_decoder import JackKnifeGraphNNDecoder

    

    # Build the dataset of the training process
    # Build data reader
    data_reader = PTBReader(
        field_list=['word', 'tag', 'head', 'rel'],
        root='0\t**root**\t_\t**rcpos**\t**rpos**\t_\t0\t**rrel**\t_\t_',
        spacer=r'[\t]',)
    # Build vocabulary with pretrained glove
    vocabulary = Vocabulary()
    g_word, _ = glove_reader(cfg.GLOVE)
    pretrained_vocabs = {'glove': g_word}
    vocabulary.extend_from_pretrained_vocab(pretrained_vocabs)
    # Setup datasets
    datasets_settings = {'train': DatasetSetting(cfg.TRAIN, True),
                         'dev': DatasetSetting(cfg.DEV, False),
                         'test': DatasetSetting(cfg.TEST, False), }
    datasets = PTBDataset(vocabulary, datasets_settings, data_reader)
    counters = {'word': Counter(), 'tag': Counter(), 'rel': Counter()}
    datasets.build_dataset(counters, no_pad_namespace={'rel'}, no_unk_namespace={'rel'})

    # Build model
    # Parameter
    pc = dy.ParameterCollection()
    LR = 0.0005
    trainer = dy.AdamTrainer(pc, LR, cfg.ADAM_BETA1, cfg.ADAM_BETA2, cfg.EPS)

    # Token Representation Layer
    token_repre = TokenRepresentation(pc, cfg, datasets.vocabulary, include_pos=True)
    # BiLSTM Encoder Layer
    #encoder = BiaffineAttention()
    #encoder = MultiHeadedAttention(pc, 10, token_repre.token_dim)
    #encoder = MultiLayerMultiHeadAttention(pc, 10, token_repre.token_dim, num_layers=1)
    #encoder = MyMultiHeadAttention(None, 6, token_repre.token_dim, 32, 32, model=pc)
    
    #encoder = LabelAttention(None, token_repre.token_dim, 128, 128, 112, 128, use_resdrop=True, q_as_matrix=False, residual_dropout=0.1, attention_dropout=0.1, d_positional=None, model=pc)
    # encoder = Encoder(None, token_repre.token_dim,
    #                 num_layers=1, num_heads=2, d_kv = 32, d_ff=1024, d_l=112,
    #                 d_positional=None,
    #                 num_layers_position_only=0,
    #                 relu_dropout=0.1, residual_dropout=0.1, attention_dropout=0.1,
    #                 use_lal=True,
    #                 lal_d_kv=128,
    #                 lal_d_proj=128,
    #                 lal_resdrop=True,
    #                 lal_pwff=True,
    #                 lal_q_as_matrix=False,
    #                 lal_partitioned=True,
    #                 model=pc)
    #encoder = ScaledDotProductAttention(pc, 10)
    encoder = DeepBiRNNBuilder(pc, cfg.ENC_LAYERS, token_repre.token_dim, cfg.ENC_H_DIM, orthonormal_VanillaLSTMBuilder)
    # GNN Decoder Layer
    decoder = GraphNNDecoder(pc, cfg, datasets.vocabulary)

    #decoder = JackKnifeGraphNNDecoder(pc, cfg, datasets.vocabulary)
    # PTB Evaluator
    my_eval = ScriptEvaluator(['Valid', 'Test'], datasets.vocabulary)

    #dy.save(cfg.LAST_FILE, [token_repre, encoder, decoder])
    #exit(0)

    # Build Training Batch
    def cmp(ins):
        return len(ins['word'])
    train_batch = datasets.get_batches('train', cfg.TRAIN_BATCH_SIZE, True, cmp, True)
    valid_batch = list(datasets.get_batches('dev', cfg.TEST_BATCH_SIZE, False, cmp, False))
    test_batch = list(datasets.get_batches('test', cfg.TEST_BATCH_SIZE, False, cmp, False))

    #print('-----------------------')
    # print('TRAIN BATCH IS: ')
    # # print(train_batch)
    # indexes, masks, truth = train_batch.__next__()
    # print(indexes)
    # print('------------------',end='\n\n\n\n\n\n\n')
    # print(len(indexes))
    # exit(0)
    # exit(0)
    # for k in indexes:
    #     print(k)
    #print(indexes)
    #print(masks)


    # Train model
    BEST_DEV_LAS = BEST_DEV_UAS = BEST_ITER = 0
    cnt_iter = -cfg.WARM * cfg.GRAPH_LAYERS
    valid_loss = [[] for i in range(cfg.GRAPH_LAYERS+3)]
    logger.info("Experiment name: %s" % args.name)
    SHA = os.popen('git log -1 | head -n 1 | cut -c 8-13').readline().rstrip()
    logger.info('Git SHA: %s' % SHA)
    while cnt_iter < cfg.MAX_ITER:
        print(cnt_iter, cfg.MAX_ITER)
        #dy.renew_cg()
        dy.renew_cg(immediate_compute = True, check_validity = True)
        cnt_iter += 1
        indexes, masks, truth = train_batch.__next__()
        vectors = token_repre(indexes, True)
        
        

        #vectors = encoder(vectors, np.array(masks['1D']).T)
        
        #print(vectors.npvalue)
        #vectors= encoder(vectors, vectors, vectors, np.array(masks['1D']).T)
        #vectors= encoder(vectors, vectors, vectors, np.array(masks['1D']).T, cfg.RNN_DROP)

        vectors = encoder(vectors, None, cfg.RNN_DROP, cfg.RNN_DROP, np.array(masks['1D']).T, False, True)
       
        loss, part_loss = decoder(vectors, masks, truth, cnt_iter, True, True)
        for i, l in enumerate([loss]+part_loss):
            valid_loss[i].append(l.value())
        loss.backward()
        trainer.learning_rate = LR*cfg.LR_DECAY**(max(cnt_iter, 0)/cfg.LR_ANNEAL)
        #trainer.learning_rate = cfg.LR*cfg.LR_DECAY**(max(cnt_iter, 0)/cfg.LR_ANNEAL)
        trainer.update()

        if cnt_iter % cfg.VALID_ITER: continue
        # Validation
        for i in range(len(valid_loss)):
            valid_loss[i] = str(round(np.mean(valid_loss[i]), 2))
        avg_loss = ', '.join(valid_loss)
        logger.info("")
        logger.info("Iter: %d-%d, Avg_loss: %s, LR (%f), Best (%d)" %
                    (cnt_iter/cfg.VALID_ITER, cnt_iter, avg_loss,
                     trainer.learning_rate, BEST_ITER))

        valid_loss = [[] for i in range(cfg.GRAPH_LAYERS+3)]
        my_eval.clear('Valid')
        for indexes, masks, truth in valid_batch:
            dy.renew_cg()
            vectors = token_repre(indexes, False)

            vectors = encoder(vectors, np.array(masks['1D']).T)
            #vectors= encoder(vectors, vectors, vectors, np.array(masks['1D']).T)
            #vectors = encoder(vectors, vectors, vectors, np.array(masks['1D']).T, cfg.RNN_DROP)
            #vectors = encoder(vectors, None, cfg.RNN_DROP, cfg.RNN_DROP, np.array(masks['1D']).T, False, False)

            pred = decoder(vectors, masks, None, cnt_iter, False, True)
            my_eval.add_truth('Valid', truth)
            my_eval.add_pred('Valid', pred)
        dy.save(cfg.LAST_FILE, [token_repre, encoder, decoder])
        if my_eval.evaluation('Valid', cfg.PRED_DEV, cfg.DEV):
            BEST_ITER = cnt_iter/cfg.VALID_ITER
            os.system('cp %s.data %s.data' % (cfg.LAST_FILE, cfg.BEST_FILE))
            os.system('cp %s.meta %s.meta' % (cfg.LAST_FILE, cfg.BEST_FILE))

        # Just record test result
        my_eval.clear('Test')
        for indexes, masks, truth in test_batch:
            dy.renew_cg()
            vectors = token_repre(indexes, False)

            vectors = encoder(vectors, np.array(masks['1D']).T)
            #vectors= encoder(vectors, vectors, vectors, np.array(masks['1D']).T)
            #vectors = encoder(vectors, vectors, vectors, np.array(masks['1D']).T, cfg.RNN_DROP)
            #vectors = encoder(vectors, None, cfg.RNN_DROP, cfg.RNN_DROP, np.array(masks['1D']).T, False, False)

            pred = decoder(vectors, masks, None, cnt_iter, False, True)
            my_eval.add_truth('Test', truth)
            my_eval.add_pred('Test', pred)
        my_eval.evaluation('Test', cfg.PRED_TEST, cfg.TEST)
    my_eval.print_best_result('Valid')

    # Final Test
    test_pc = dy.ParameterCollection()
    token_repre, encoder, decoder = dy.load(cfg.BEST_FILE, test_pc)
    my_eval.clear('Test')
    for indexes, masks, truth in test_batch:
        dy.renew_cg()
        vectors = token_repre(indexes, False)

        vectors = encoder(vectors, np.array(masks['1D']).T)
        #vectors= encoder(vectors, vectors, vectors, np.array(masks['1D']).T)
        #vectors = encoder(vectors, vectors, vectors, np.array(masks['1D']).T, cfg.RNN_DROP)
        #vectors = encoder(vectors, None, cfg.RNN_DROP, cfg.RNN_DROP, np.array(masks['1D']).T, False, False)

        pred = decoder(vectors, masks, None, 0, False, True)
        my_eval.add_truth('Test', truth)
        my_eval.add_pred('Test', pred)
    my_eval.evaluation('Test', cfg.PRED_TEST, cfg.TEST)
Example #36
0
        total_loss = sum(batch_loss_ls) / len(batch_loss_ls)
        time2 = time.time()
        # print info
        print("total loss: {}".format(total_loss))
        print("time consumed for training: {}s".format(time2 - time1))
        # showing evaluation on dev
        if not config.use_crf:
            acc, f1 = evaluate(config.trim_dev_path,
                               char_acceptor ,word_acceptor,
                               char_embed, word_embed)
        else:
            acc, f1 = evaluate(config.trim_dev_path,
                               char_acceptor ,word_acceptor,
                               char_embed, word_embed, 
                               crf_acceptor)
        time3 = time.time()
        # print info
        print("acc: {}%\nf1 score: {}%".format(100 * acc, 100 * f1))
        print("time consumed for evaluating: {}s".format(time3 - time2))
        print("epoch done.")
        
    # save model parameters, lookup parameters and builder objects to disk
    obs = [word_embed, char_embed]
    basename = config.model_basename
    dy.save(basename, obs)
    char_acceptor.save(basename + ".charBilstm")
    word_acceptor.save(basename + ".wordBilstm")
    if config.use_crf:
        crf_acceptor.save(basename + ".crf")