Beispiel #1
0
def main():
    print 'Starting at: {}\n'.format(datetime.now())
    s_time = time.time()
    df = read_df(args.df_path)
    df = df.fillna(u'')

    label_tags = pickle.load(open(args.tags_file, 'rb'))
    print '\nloaded {} tags'.format(len(label_tags))

    raw_corpus = myio.read_corpus(args.corpus_w_tags, with_tags=True)

    embedding_layer = create_embedding_layer(
        n_d=200,
        embs=load_embedding_iterator(args.embeddings),
        only_words=False if args.use_embeddings else True,
        # only_words will take the words from embedding file and make random initial embeddings
        trainable=args.trainable
    )

    ids_corpus = myio.map_corpus(raw_corpus, embedding_layer, label_tags, max_len=args.max_seq_len)

    print("vocab size={}, corpus size={}\n".format(embedding_layer.n_V, len(raw_corpus)))

    padding_id = embedding_layer.vocab_map["<padding>"]

    if args.reweight:
        weights = myio.create_idf_weights(args.corpus_w_tags, embedding_layer, with_tags=True)

    if args.layer.lower() == "lstm":
        from models import LstmMultiTagsClassifier as Model
    elif args.layer.lower() in ["bilstm", "bigru"]:
        from models import BiRNNMultiTagsClassifier as Model
    elif args.layer.lower() == "cnn":
        from models import CnnMultiTagsClassifier as Model
    elif args.layer.lower() == "gru":
        from models import GruMultiTagsClassifier as Model
    else:
        raise Exception("no correct layer given")

    if args.cross_val:
        train, dev, test = myio.create_cross_val_batches(df, ids_corpus, args.batch_size, padding_id)
    else:
        dev = list(myio.create_batches(
            df, ids_corpus, 'dev', args.batch_size, padding_id, N_neg=args.n_neg, samples_file=args.samples_file))
        test = list(myio.create_batches(
            df, ids_corpus, 'test', args.batch_size, padding_id, N_neg=args.n_neg, samples_file=args.samples_file))
    # baselines_eval(train, dev, test)

    model = Model(args, embedding_layer, len(label_tags), weights=weights if args.reweight else None)
    model.ready()

    print 'total (non) trainable params: ', model.num_parameters()

    if args.load_pre_trained_part:
        # need to remove the old assigns to embeddings
        model.init_assign_ops = model.load_pre_trained_part(args.load_pre_trained_part)
    print '\nmodel init_assign_ops: {}\n'.format(model.init_assign_ops)

    model.train_model(df, ids_corpus, dev=dev, test=test)
    print '\nEnded at: {}'.format(datetime.now())
Beispiel #2
0
def main(args):
    raw_corpus = myio.read_corpus(args.corpus)
    embedding_layer = myio.create_embedding_layer(
                raw_corpus,
                n_d = args.hidden_dim,
                cut_off = args.cut_off,
                embs = load_embedding_iterator(args.embeddings) if args.embeddings else None
            )
    ids_corpus = myio.map_corpus(raw_corpus, embedding_layer)
    say("vocab size={}, corpus size={}\n".format(
            embedding_layer.n_V,
            len(raw_corpus)
        ))
    padding_id = embedding_layer.vocab_map["<padding>"]
    bos_id = embedding_layer.vocab_map["<s>"]
    eos_id = embedding_layer.vocab_map["</s>"]

    if args.reweight:
        weights = myio.create_idf_weights(args.corpus, embedding_layer)

    if args.dev:
        dev = myio.read_annotations(args.dev, K_neg=20, prune_pos_cnt=-1)
        dev = myio.create_eval_batches(ids_corpus, dev, padding_id)
    if args.test:
        test = myio.read_annotations(args.test, K_neg=20, prune_pos_cnt=-1)
        test = myio.create_eval_batches(ids_corpus, test, padding_id)

    if args.heldout:
        with open(args.heldout) as fin:
            heldout_ids = fin.read().split()
        heldout_corpus = dict((id, ids_corpus[id]) for id in heldout_ids if id in ids_corpus)
        train_corpus = dict((id, ids_corpus[id]) for id in ids_corpus
                                                if id not in heldout_corpus)
        heldout = myio.create_batches(heldout_corpus, [ ], args.batch_size,
                    padding_id, bos_id, eos_id, auto_encode=True)
        heldout = [ myio.create_one_batch(b1, t2, padding_id) for t1, b1, t2 in heldout ]
        say("heldout examples={}\n".format(len(heldout_corpus)))

    if args.train:
        model = Model(args, embedding_layer,
                      weights=weights if args.reweight else None)

        start_time = time.time()
        train = myio.read_annotations(args.train)
        if not args.use_anno: train = [ ]
        train_batches = myio.create_batches(ids_corpus, train, args.batch_size,
                    model.padding_id, model.bos_id, model.eos_id, auto_encode=True)
        say("{} to create batches\n".format(time.time()-start_time))

        model.ready()
        model.train(
                ids_corpus if not args.heldout else train_corpus,
                train,
                dev if args.dev else None,
                test if args.test else None,
                heldout if args.heldout else None
            )
Beispiel #3
0
def main(args):
    raw_corpus = myio.read_corpus(args.corpus)
    embedding_layer = myio.create_embedding_layer(
        raw_corpus,
        n_d=args.hidden_dim,
        embs=load_embedding_iterator(args.embeddings)
        if args.embeddings else None)
    ids_corpus = myio.map_corpus(raw_corpus,
                                 embedding_layer,
                                 max_len=args.max_seq_len)
    say("vocab size={}, corpus size={}\n".format(embedding_layer.n_V,
                                                 len(raw_corpus)))
    padding_id = embedding_layer.vocab_map["<padding>"]

    if args.reweight:
        weights = myio.create_idf_weights(args.corpus, embedding_layer)

    if args.dev:
        dev_raw = myio.read_annotations(args.dev, K_neg=-1, prune_pos_cnt=-1)
        dev = myio.create_eval_batches(ids_corpus,
                                       dev_raw,
                                       padding_id,
                                       pad_left=not args.average,
                                       merge=args.merge)
    if args.test:
        test_raw = myio.read_annotations(args.test, K_neg=-1, prune_pos_cnt=-1)
        test = myio.create_eval_batches(ids_corpus,
                                        test_raw,
                                        padding_id,
                                        pad_left=not args.average,
                                        merge=args.merge)

    if args.train:
        start_time = time.time()
        train = myio.read_annotations(args.train)
        train_batches = myio.create_batches(ids_corpus,
                                            train,
                                            args.batch_size,
                                            padding_id,
                                            pad_left=not args.average,
                                            merge=args.merge)
        say("{} to create batches\n".format(time.time() - start_time))
        say("{} batches, {} tokens in total, {} triples in total\n".format(
            len(train_batches), sum(len(x[0].ravel()) for x in train_batches),
            sum(len(x[1].ravel()) for x in train_batches)))
        train_batches = None

        model = Model(args,
                      embedding_layer,
                      weights=weights if args.reweight else None)
        model.ready()

        # set parameters using pre-trained network
        if args.load_pretrain:
            model.encoder.load_pretrained_parameters(args)

        model.train(ids_corpus, train, (dev, dev_raw) if args.dev else None,
                    (test, test_raw) if args.test else None)
Beispiel #4
0
def main(args):
    raw_corpus = myio.read_corpus(args.corpus)
    print("raw corpus:", args.corpus, "len:", len(raw_corpus))
    embedding_layer = myio.create_embedding_layer(
                raw_corpus,
                n_d = args.hidden_dim,
                cut_off = args.cut_off,
                embs = None # embs = load_embedding_iterator(args.embeddings) if args.embeddings else None
            )
    ids_corpus = myio.map_corpus(raw_corpus, embedding_layer, max_len=args.max_seq_len)
    myio.say("vocab size={}, corpus size={}\n".format(
            embedding_layer.n_V,
            len(raw_corpus)
        ))
    padding_id = embedding_layer.vocab_map["<padding>"]
 
    if args.reweight:
        weights = myio.create_idf_weights(args.corpus, embedding_layer)

# 
#     if args.dev:
#         dev = myio.read_annotations(args.dev, K_neg=-1, prune_pos_cnt=-1)
#         dev = myio.create_eval_batches(ids_corpus, dev, padding_id, pad_left = not args.average)
#     if args.test:
#         test = myio.read_annotations(args.test, K_neg=-1, prune_pos_cnt=-1)
#         test = myio.create_eval_batches(ids_corpus, test, padding_id, pad_left = not args.average)
 
    if args.train:
        start_time = time.time()
        train = myio.read_annotations(args.train)
        print("training data:", args.train, "len:", len(train))
        train_batches = myio.create_batches(ids_corpus, train, args.batch_size,
                                padding_id, pad_left = not args.average)
        myio.say("{:.2f} secs to create {} batches of size {}\n".format( (time.time()-start_time), len(train_batches), args.batch_size))
        myio.say("{} batches, {} tokens in total, {} triples in total\n".format(
                len(train_batches),
                sum(len(x[0].ravel())+len(x[1].ravel()) for x in train_batches),
                sum(len(x[2].ravel()) for x in train_batches)
            ))
#         train_batches = None
 
        model = Model(args, embedding_layer,
                      weights=weights if args.reweight else None)
        model.ready()
 
#         # set parameters using pre-trained network
#         if args.load_pretrain:
#             model.load_pretrained_parameters(args)
# 
        model.train(
                ids_corpus,
                train,
                dev = None, # dev if args.dev else None,
                test = None # test if args.test else None
            )
Beispiel #5
0
def main(args):
    raw_corpus = myio.read_corpus(args.corpus)
    embedding_layer = myio.create_embedding_layer(
                raw_corpus,
                n_d = args.hidden_dim,
                embs = load_embedding_iterator(args.embeddings) if args.embeddings else None
            )
    ids_corpus = myio.map_corpus(raw_corpus, embedding_layer, max_len=args.max_seq_len)
    say("vocab size={}, corpus size={}\n".format(
            embedding_layer.n_V,
            len(raw_corpus)
        ))
    padding_id = embedding_layer.vocab_map["<padding>"]

    if args.reweight:
        weights = myio.create_idf_weights(args.corpus, embedding_layer)

    if args.dev:
        dev_raw = myio.read_annotations(args.dev, K_neg=-1, prune_pos_cnt=-1)
        dev = myio.create_eval_batches(ids_corpus, dev_raw, padding_id,
                    pad_left=not args.average, merge=args.merge)
    if args.test:
        test_raw = myio.read_annotations(args.test, K_neg=-1, prune_pos_cnt=-1)
        test = myio.create_eval_batches(ids_corpus, test_raw, padding_id,
                    pad_left=not args.average, merge=args.merge)

    if args.train:
        start_time = time.time()
        train = myio.read_annotations(args.train)
        train_batches = myio.create_batches(ids_corpus, train, args.batch_size,
                                padding_id, pad_left = not args.average, merge=args.merge)
        say("{} to create batches\n".format(time.time()-start_time))
        say("{} batches, {} tokens in total, {} triples in total\n".format(
                len(train_batches),
                sum(len(x[0].ravel()) for x in train_batches),
                sum(len(x[1].ravel()) for x in train_batches)
            ))
        train_batches = None

        model = Model(args, embedding_layer,
                      weights=weights if args.reweight else None)
        model.ready()

        # set parameters using pre-trained network
        if args.load_pretrain:
            model.encoder.load_pretrained_parameters(args)

        model.train(
                ids_corpus,
                train,
                (dev, dev_raw) if args.dev else None,
                (test, test_raw) if args.test else None
            )
Beispiel #6
0
def main(args):
    raw_corpus = myio.read_corpus(args.corpus)
    embedding_layer = myio.create_embedding_layer(
        raw_corpus,
        n_d=args.hidden_dim,
        cut_off=args.cut_off,
        embs=load_embedding_iterator(args.embeddings)
        if args.embeddings else None)
    ids_corpus = myio.map_corpus(raw_corpus, embedding_layer)
    say("vocab size={}, corpus size={}\n".format(embedding_layer.n_V,
                                                 len(raw_corpus)))
    padding_id = embedding_layer.vocab_map["<padding>"]
    bos_id = embedding_layer.vocab_map["<s>"]
    eos_id = embedding_layer.vocab_map["</s>"]

    if args.reweight:
        weights = myio.create_idf_weights(args.corpus, embedding_layer)

    if args.dev:
        dev = myio.read_annotations(args.dev, K_neg=20, prune_pos_cnt=-1)
        dev = myio.create_eval_batches(ids_corpus, dev, padding_id)
    if args.test:
        test = myio.read_annotations(args.test, K_neg=20, prune_pos_cnt=-1)
        test = myio.create_eval_batches(ids_corpus, test, padding_id)

    if args.heldout:
        with open(args.heldout) as fin:
            heldout_ids = fin.read().split()
        heldout_corpus = dict(
            (id, ids_corpus[id]) for id in heldout_ids if id in ids_corpus)
        train_corpus = dict((id, ids_corpus[id]) for id in ids_corpus
                            if id not in heldout_corpus)
        heldout = myio.create_batches(heldout_corpus, [],
                                      args.batch_size,
                                      padding_id,
                                      bos_id,
                                      eos_id,
                                      auto_encode=True)
        heldout = [
            myio.create_one_batch(b1, t2, padding_id) for t1, b1, t2 in heldout
        ]
        say("heldout examples={}\n".format(len(heldout_corpus)))

    if args.train:
        model = Model(args,
                      embedding_layer,
                      weights=weights if args.reweight else None)

        start_time = time.time()
        train = myio.read_annotations(args.train)
        if not args.use_anno: train = []
        train_batches = myio.create_batches(ids_corpus,
                                            train,
                                            args.batch_size,
                                            model.padding_id,
                                            model.bos_id,
                                            model.eos_id,
                                            auto_encode=True)
        say("{} to create batches\n".format(time.time() - start_time))
        model.ready()

        model.train(ids_corpus if not args.heldout else train_corpus, train,
                    dev if args.dev else None, test if args.test else None,
                    heldout if args.heldout else None)
Beispiel #7
0
    def train(self, ids_corpus, train, dev=None, test=None, heldout=None):
        args = self.args
        dropout_prob = np.float64(args.dropout).astype(theano.config.floatX)
        batch_size = args.batch_size
        padding_id = self.padding_id
        bos_id = self.bos_id
        eos_id = self.eos_id

        #train_batches = myio.create_batches(ids_corpus, train, batch_size, padding_id, args.loss)

        updates, lr, gnorm = create_optimization_updates(
            cost=self.cost,
            params=self.params,
            lr=args.learning_rate,
            method=args.learning)[:3]

        train_func = theano.function(inputs=[self.idxs, self.idys],
                                     outputs=[self.cost, self.loss, gnorm],
                                     updates=updates)

        eval_func = theano.function(
            inputs=[self.idxs],
            #outputs = self.scores2
            outputs=self.scores)

        nll_func = theano.function(inputs=[self.idxs, self.idys],
                                   outputs=[self.nll, self.mask])

        say("\tp_norm: {}\n".format(self.get_pnorm_stat()))

        result_table = PrettyTable(
            ["Epoch", "dev MAP", "dev MRR", "dev P@1", "dev P@5"] +
            ["tst MAP", "tst MRR", "tst P@1", "tst P@5"])

        unchanged = 0
        best_dev = -1
        dev_MAP = dev_MRR = dev_P1 = dev_P5 = 0
        test_MAP = test_MRR = test_P1 = test_P5 = 0
        heldout_PPL = -1

        start_time = 0
        max_epoch = args.max_epoch
        for epoch in xrange(max_epoch):
            unchanged += 1
            if unchanged > 8: break

            start_time = time.time()

            train_batches = myio.create_batches(ids_corpus,
                                                train,
                                                batch_size,
                                                padding_id,
                                                bos_id,
                                                eos_id,
                                                auto_encode=True)
            N = len(train_batches)

            train_cost = 0.0
            train_loss = 0.0
            train_loss2 = 0.0
            for i in xrange(N):
                # get current batch
                t1, b1, t2 = train_batches[i]

                if args.use_title:
                    idxs, idys = myio.create_one_batch(t1, t2, padding_id)
                    cur_cost, cur_loss, grad_norm = train_func(idxs, idys)
                    train_cost += cur_cost
                    train_loss += cur_loss
                    train_loss2 += cur_loss / idys.shape[0]

                if args.use_body:
                    idxs, idys = myio.create_one_batch(b1, t2, padding_id)
                    cur_cost, cur_loss, grad_norm = train_func(idxs, idys)
                    train_cost += cur_cost
                    train_loss += cur_loss
                    train_loss2 += cur_loss / idys.shape[0]

                if i % 10 == 0:
                    say("\r{}/{}".format(i, N))

                if i == N - 1:
                    self.dropout.set_value(0.0)

                    if dev is not None:
                        dev_MAP, dev_MRR, dev_P1, dev_P5 = self.evaluate(
                            dev, eval_func)
                    if test is not None:
                        test_MAP, test_MRR, test_P1, test_P5 = self.evaluate(
                            test, eval_func)
                    if heldout is not None:
                        heldout_PPL = self.evaluate_perplexity(
                            heldout, nll_func)

                    if dev_MRR > best_dev:
                        unchanged = 0
                        best_dev = dev_MRR
                        result_table.add_row([epoch] + [
                            "%.2f" % x
                            for x in [dev_MAP, dev_MRR, dev_P1, dev_P5] +
                            [test_MAP, test_MRR, test_P1, test_P5]
                        ])
                        if args.model:
                            self.save_model(args.model + ".pkl.gz")

                    dropout_p = np.float64(args.dropout).astype(
                        theano.config.floatX)
                    self.dropout.set_value(dropout_p)

                    say("\r\n\n")
                    say( ( "Epoch {}\tcost={:.3f}\tloss={:.3f} {:.3f}\t" \
                        +"\tMRR={:.2f},{:.2f}\tPPL={:.1f}\t|g|={:.3f}\t[{:.3f}m]\n" ).format(
                            epoch,
                            train_cost / (i+1),
                            train_loss / (i+1),
                            train_loss2 / (i+1),
                            dev_MRR,
                            best_dev,
                            heldout_PPL,
                            float(grad_norm),
                            (time.time()-start_time)/60.0
                    ))
                    say("\tp_norm: {}\n".format(self.get_pnorm_stat()))

                    say("\n")
                    say("{}".format(result_table))
                    say("\n")
Beispiel #8
0
    def train(self, ids_corpus, train, dev=None, test=None):
        dropout_prob = np.float64(args.dropout).astype(theano.config.floatX)
        batch_size = args.batch_size
        padding_id = self.padding_id

        #train_batches = myio.create_batches(ids_corpus, train, batch_size, padding_id)

        updates, lr, gnorm = create_optimization_updates(
                cost = self.cost,
                params = self.params,
                lr = args.learning_rate,
                method = args.learning
            )[:3]

        train_func = theano.function(
                inputs = [ self.idts, self.idbs, self.idps ],
                outputs = [ self.cost, self.loss, gnorm ],
                updates = updates
            )

        eval_func = theano.function(
                inputs = [ self.idts, self.idbs ],
                outputs = self.scores,
                on_unused_input='ignore'
            )

        say("\tp_norm: {}\n".format(
                self.get_pnorm_stat()
            ))

        result_table = PrettyTable(["Epoch", "dev MAP", "dev MRR", "dev P@1", "dev P@5"] +
                                    ["tst MAP", "tst MRR", "tst P@1", "tst P@5"])

        unchanged = 0
        best_dev = -1
        dev_MAP = dev_MRR = dev_P1 = dev_P5 = 0
        test_MAP = test_MRR = test_P1 = test_P5 = 0
        start_time = 0
        max_epoch = args.max_epoch
        for epoch in xrange(max_epoch):
            unchanged += 1
            if unchanged > 15: break

            start_time = time.time()

            train = myio.read_annotations(args.train)
            train_batches = myio.create_batches(ids_corpus, train, batch_size,
                                    padding_id, pad_left = not args.average)
            N =len(train_batches)

            train_loss = 0.0
            train_cost = 0.0

            for i in xrange(N):
                # get current batch
                idts, idbs, idps = train_batches[i]

                cur_cost, cur_loss, grad_norm = train_func(idts, idbs, idps)
                train_loss += cur_loss
                train_cost += cur_cost

                if i % 10 == 0:
                    say("\r{}/{}".format(i,N))

                if i == N-1:
                    self.dropout.set_value(0.0)

                    if dev is not None:
                        dev_MAP, dev_MRR, dev_P1, dev_P5 = self.evaluate(dev, eval_func)
                    if test is not None:
                        test_MAP, test_MRR, test_P1, test_P5 = self.evaluate(test, eval_func)

                    if dev_MRR > best_dev:
                        unchanged = 0
                        best_dev = dev_MRR
                        result_table.add_row(
                            [ epoch ] +
                            [ "%.2f" % x for x in [ dev_MAP, dev_MRR, dev_P1, dev_P5 ] +
                                        [ test_MAP, test_MRR, test_P1, test_P5 ] ]
                        )
                        if args.save_model:
                            self.save_model(args.save_model)

                    dropout_p = np.float64(args.dropout).astype(
                                theano.config.floatX)
                    self.dropout.set_value(dropout_p)

                    say("\r\n\n")
                    say( ( "Epoch {}\tcost={:.3f}\tloss={:.3f}" \
                        +"\tMRR={:.2f},{:.2f}\t|g|={:.3f}\t[{:.3f}m]\n" ).format(
                            epoch,
                            train_cost / (i+1),
                            train_loss / (i+1),
                            dev_MRR,
                            best_dev,
                            float(grad_norm),
                            (time.time()-start_time)/60.0
                    ))
                    say("\tp_norm: {}\n".format(
                            self.get_pnorm_stat()
                        ))

                    say("\n")
                    say("{}".format(result_table))
                    say("\n")
Beispiel #9
0
    def train_model(self, ids_corpus, train, dev=None, test=None):
        with tf.Session() as sess:

            result_table = PrettyTable([
                "Epoch", "Step", "dev MAP", "dev MRR", "dev P@1", "dev P@5",
                "tst MAP", "tst MRR", "tst P@1", "tst P@5"
            ])
            dev_MAP = dev_MRR = dev_P1 = dev_P5 = 0
            test_MAP = test_MRR = test_P1 = test_P5 = 0
            best_dev = -1

            # Define Training procedure
            global_step = tf.Variable(0, name="global_step", trainable=False)
            optimizer = tf.train.AdamOptimizer(self.args.learning_rate)
            train_op = optimizer.minimize(self.cost, global_step=global_step)

            print '\n\ntrainable params: ', tf.trainable_variables(), '\n\n'

            sess.run(tf.global_variables_initializer())
            emb = sess.run(self.embeddings)
            print '\nemb {}\n'.format(emb[10][0:10])

            if self.init_assign_ops != {}:
                print 'assigning trained values ...\n'
                sess.run(self.init_assign_ops)
                emb = sess.run(self.embeddings)
                print '\nemb {}\n'.format(emb[10][0:10])
                self.init_assign_ops = {}

            if self.args.save_dir != "":
                print("Writing to {}\n".format(self.args.save_dir))

            # TRAIN LOSS
            train_loss_writer = tf.summary.FileWriter(
                os.path.join(self.args.save_dir, "summaries", "train",
                             "loss"), )
            train_cost_writer = tf.summary.FileWriter(
                os.path.join(self.args.save_dir, "summaries", "train", "cost"),
                sess.graph)

            # VARIABLE NORM
            p_norm_summaries = {}
            p_norm_placeholders = {}
            for param_name, param_norm in self.get_pnorm_stat(
                    sess).iteritems():
                p_norm_placeholders[param_name] = tf.placeholder(tf.float32)
                p_norm_summaries[param_name] = tf.summary.scalar(
                    param_name, p_norm_placeholders[param_name])
            p_norm_summary_op = tf.summary.merge(p_norm_summaries.values())
            p_norm_summary_dir = os.path.join(self.args.save_dir, "summaries",
                                              "p_norm")
            p_norm_summary_writer = tf.summary.FileWriter(p_norm_summary_dir, )

            # DEV LOSS & EVAL
            dev_loss0_writer = tf.summary.FileWriter(
                os.path.join(self.args.save_dir, "summaries", "dev",
                             "loss0"), )
            dev_loss1_writer = tf.summary.FileWriter(
                os.path.join(self.args.save_dir, "summaries", "dev",
                             "loss1"), )
            dev_loss2_writer = tf.summary.FileWriter(
                os.path.join(self.args.save_dir, "summaries", "dev",
                             "loss2"), )
            dev_eval_writer1 = tf.summary.FileWriter(
                os.path.join(self.args.save_dir, "summaries", "dev", "MAP"), )
            dev_eval_writer2 = tf.summary.FileWriter(
                os.path.join(self.args.save_dir, "summaries", "dev", "MRR"), )
            dev_eval_writer3 = tf.summary.FileWriter(
                os.path.join(self.args.save_dir, "summaries", "dev", "Pat1"), )
            dev_eval_writer4 = tf.summary.FileWriter(
                os.path.join(self.args.save_dir, "summaries", "dev", "Pat5"), )

            loss = tf.placeholder(tf.float32)
            loss_summary = tf.summary.scalar("loss", loss)
            dev_eval = tf.placeholder(tf.float32)
            dev_summary = tf.summary.scalar("QR_evaluation", dev_eval)
            cost = tf.placeholder(tf.float32)
            cost_summary = tf.summary.scalar("cost", cost)
            # train_eval = tf.placeholder(tf.float32)
            # train_summary = tf.summary.scalar("QR_train", train_eval)

            if self.args.save_dir != "":
                checkpoint_dir = os.path.join(self.args.save_dir,
                                              "checkpoints")
                checkpoint_prefix = os.path.join(checkpoint_dir, "model")
                if not os.path.exists(checkpoint_dir):
                    os.makedirs(checkpoint_dir)

            patience = 8 if 'patience' not in self.args else self.args.patience
            unchanged = 0
            max_epoch = self.args.max_epoch
            for epoch in xrange(max_epoch):
                unchanged += 1
                if unchanged > patience:
                    break

                train_batches = myio.create_batches(ids_corpus,
                                                    train,
                                                    self.args.batch_size,
                                                    self.padding_id,
                                                    pad_left=False)

                N = len(train_batches)

                train_loss = 0.0
                train_cost = 0.0

                for i in xrange(N):
                    idts, idbs, idps, qpp = train_batches[i]
                    cur_step, cur_loss, cur_cost = self.train_batch(
                        idts, idbs, idps, qpp, train_op, global_step, sess)
                    summary = sess.run(loss_summary, {loss: cur_loss})
                    train_loss_writer.add_summary(summary, cur_step)
                    train_loss_writer.flush()
                    summary = sess.run(cost_summary, {cost: cur_cost})
                    train_cost_writer.add_summary(summary, cur_step)
                    train_cost_writer.flush()

                    train_loss += cur_loss
                    train_cost += cur_cost

                    if i % 10 == 0:
                        say("\r{}/{}".format(i, N))

                    if i == N - 1 or (i % 10 == 0 and 'testing' in self.args
                                      and self.args.testing):  # EVAL
                        if dev:
                            dev_MAP, dev_MRR, dev_P1, dev_P5, dloss0, dloss1, dloss2 = self.evaluate(
                                dev, sess)

                            summary = sess.run(loss_summary, {loss: dloss0})
                            dev_loss0_writer.add_summary(summary, cur_step)
                            dev_loss0_writer.flush()
                            summary = sess.run(loss_summary, {loss: dloss1})
                            dev_loss1_writer.add_summary(summary, cur_step)
                            dev_loss1_writer.flush()
                            summary = sess.run(loss_summary, {loss: dloss2})
                            dev_loss2_writer.add_summary(summary, cur_step)
                            dev_loss2_writer.flush()

                            summary = sess.run(dev_summary,
                                               {dev_eval: dev_MAP})
                            dev_eval_writer1.add_summary(summary, cur_step)
                            dev_eval_writer1.flush()
                            summary = sess.run(dev_summary,
                                               {dev_eval: dev_MRR})
                            dev_eval_writer2.add_summary(summary, cur_step)
                            dev_eval_writer2.flush()
                            summary = sess.run(dev_summary, {dev_eval: dev_P1})
                            dev_eval_writer3.add_summary(summary, cur_step)
                            dev_eval_writer3.flush()
                            summary = sess.run(dev_summary, {dev_eval: dev_P5})
                            dev_eval_writer4.add_summary(summary, cur_step)
                            dev_eval_writer4.flush()

                            feed_dict = {}
                            for param_name, param_norm in self.get_pnorm_stat(
                                    sess).iteritems():
                                feed_dict[p_norm_placeholders[
                                    param_name]] = param_norm
                            _p_norm_sum = sess.run(p_norm_summary_op,
                                                   feed_dict)
                            p_norm_summary_writer.add_summary(
                                _p_norm_sum, cur_step)

                        if test:
                            test_MAP, test_MRR, test_P1, test_P5, tloss0, tloss1, tloss2 = self.evaluate(
                                test, sess)

                        if self.args.performance == "MRR" and dev_MRR > best_dev:
                            unchanged = 0
                            best_dev = dev_MRR
                            result_table.add_row([
                                epoch, cur_step, dev_MAP, dev_MRR, dev_P1,
                                dev_P5, test_MAP, test_MRR, test_P1, test_P5
                            ])
                            if self.args.save_dir != "":
                                self.save(sess, checkpoint_prefix, cur_step)
                        elif self.args.performance == "MAP" and dev_MAP > best_dev:
                            unchanged = 0
                            best_dev = dev_MAP
                            result_table.add_row([
                                epoch, cur_step, dev_MAP, dev_MRR, dev_P1,
                                dev_P5, test_MAP, test_MRR, test_P1, test_P5
                            ])
                            if self.args.save_dir != "":
                                self.save(sess, checkpoint_prefix, cur_step)

                        say("\r\n\nEpoch {}\tcost={:.3f}\tloss={:.3f}\tMRR={:.2f},MAP={:.2f}\n"
                            .format(
                                epoch,
                                train_cost /
                                (i + 1),  # i.e. divided by N training batches
                                train_loss /
                                (i + 1),  # i.e. divided by N training batches
                                dev_MRR,
                                dev_MAP))
                        say("\n{}\n".format(result_table))
                        myio.say("\tp_norm: {}\n".format(
                            self.get_pnorm_stat(sess)))
Beispiel #10
0
    def train(self, ids_corpus, train, dev=None, test=None, heldout=None):
        args = self.args
        dropout_prob = np.float64(args.dropout).astype(theano.config.floatX)
        batch_size = args.batch_size
        padding_id = self.padding_id
        bos_id = self.bos_id
        eos_id = self.eos_id

        #train_batches = myio.create_batches(ids_corpus, train, batch_size, padding_id, args.loss)

        updates, lr, gnorm = create_optimization_updates(
                cost = self.cost,
                params = self.params,
                lr = args.learning_rate,
                method = args.learning
            )[:3]

        train_func = theano.function(
                inputs = [ self.idxs, self.idys ],
                outputs = [ self.cost, self.loss, gnorm ],
                updates = updates
            )

        eval_func = theano.function(
                inputs = [ self.idxs ],
                #outputs = self.scores2
                outputs = self.scores
            )

        nll_func = theano.function(
                inputs = [ self.idxs, self.idys ],
                outputs = [ self.nll, self.mask ]
            )

        say("\tp_norm: {}\n".format(
                self.get_pnorm_stat()
            ))

        result_table = PrettyTable(["Epoch", "dev MAP", "dev MRR", "dev P@1", "dev P@5"] +
                                    ["tst MAP", "tst MRR", "tst P@1", "tst P@5"])

        unchanged = 0
        best_dev = -1
        dev_MAP = dev_MRR = dev_P1 = dev_P5 = 0
        test_MAP = test_MRR = test_P1 = test_P5 = 0
        heldout_PPL = -1

        start_time = 0
        max_epoch = args.max_epoch
        for epoch in xrange(max_epoch):
            unchanged += 1
            if unchanged > 8: break

            start_time = time.time()

            train_batches = myio.create_batches(ids_corpus, train, batch_size,
                                    padding_id, bos_id, eos_id, auto_encode=True)
            N =len(train_batches)

            train_cost = 0.0
            train_loss = 0.0
            train_loss2 = 0.0
            for i in xrange(N):
                # get current batch
                t1, b1, t2 = train_batches[i]

                if args.use_title:
                    idxs, idys = myio.create_one_batch(t1, t2, padding_id)
                    cur_cost, cur_loss, grad_norm = train_func(idxs, idys)
                    train_cost += cur_cost
                    train_loss += cur_loss
                    train_loss2 += cur_loss / idys.shape[0]

                if args.use_body:
                    idxs, idys = myio.create_one_batch(b1, t2, padding_id)
                    cur_cost, cur_loss, grad_norm = train_func(idxs, idys)
                    train_cost += cur_cost
                    train_loss += cur_loss
                    train_loss2 += cur_loss / idys.shape[0]

                if i % 10 == 0:
                    say("\r{}/{}".format(i,N))

                if i == N-1:
                    self.dropout.set_value(0.0)

                    if dev is not None:
                        dev_MAP, dev_MRR, dev_P1, dev_P5 = self.evaluate(dev, eval_func)
                    if test is not None:
                        test_MAP, test_MRR, test_P1, test_P5 = self.evaluate(test, eval_func)
                    if heldout is not None:
                        heldout_PPL = self.evaluate_perplexity(heldout, nll_func)

                    if dev_MRR > best_dev:
                        unchanged = 0
                        best_dev = dev_MRR
                        result_table.add_row(
                            [ epoch ] +
                            [ "%.2f" % x for x in [ dev_MAP, dev_MRR, dev_P1, dev_P5 ] +
                                        [ test_MAP, test_MRR, test_P1, test_P5 ] ]
                        )
                        if args.model:
                            self.save_model(args.model+".pkl.gz")

                    dropout_p = np.float64(args.dropout).astype(
                                theano.config.floatX)
                    self.dropout.set_value(dropout_p)

                    say("\r\n\n")
                    say( ( "Epoch {}\tcost={:.3f}\tloss={:.3f} {:.3f}\t" \
                        +"\tMRR={:.2f},{:.2f}\tPPL={:.1f}\t|g|={:.3f}\t[{:.3f}m]\n" ).format(
                            epoch,
                            train_cost / (i+1),
                            train_loss / (i+1),
                            train_loss2 / (i+1),
                            dev_MRR,
                            best_dev,
                            heldout_PPL,
                            float(grad_norm),
                            (time.time()-start_time)/60.0
                    ))
                    say("\tp_norm: {}\n".format(
                            self.get_pnorm_stat()
                        ))

                    say("\n")
                    say("{}".format(result_table))
                    say("\n")
Beispiel #11
0
def main():
    print 'Starting at: {}\n'.format(datetime.now())
    raw_corpus = myio.read_corpus(args.corpus)
    embedding_layer = create_embedding_layer(
        n_d=200,
        embs=load_embedding_iterator(args.embeddings),
        only_words=False if args.use_embeddings else True,
        trainable=args.trainable
    )
    ids_corpus = myio.map_corpus(raw_corpus, embedding_layer, max_len=args.max_seq_len)
    print("vocab size={}, corpus size={}\n".format(
            embedding_layer.n_V,
            len(raw_corpus)
        ))
    padding_id = embedding_layer.vocab_map["<padding>"]

    if args.reweight:
        weights = myio.create_idf_weights(args.corpus, embedding_layer)

    if args.layer.lower() == "lstm":
        from models import LstmQR as Model
    elif args.layer.lower() in ["bilstm", "bigru"]:
        from models import BiRNNQR as Model
    elif args.layer.lower() == "cnn":
        from models import CnnQR as Model
    elif args.layer.lower() == "gru":
        from models import GruQR as Model
    else:
        raise Exception("no correct layer given")

    if args.dev:
        dev = myio.read_annotations(args.dev, K_neg=-1, prune_pos_cnt=-1)
        dev = myio.create_eval_batches(ids_corpus, dev, padding_id, pad_left=False)
    if args.test:
        test = myio.read_annotations(args.test, K_neg=-1, prune_pos_cnt=-1)
        test = myio.create_eval_batches(ids_corpus, test, padding_id, pad_left=False)

    model = Model(args, embedding_layer, weights=weights if args.reweight else None)
    model.ready()

    print 'total (non) trainable params: ', model.num_parameters()

    if args.load_pre_trained_part:
        # need to remove the old assigns to embeddings
        model.init_assign_ops = model.load_pre_trained_part(args.load_pre_trained_part)
    print '\nmodel init_assign_ops: {}\n'.format(model.init_assign_ops)

    if args.train:
        start_time = time.time()
        train = myio.read_annotations(args.train)
        train_batches = myio.create_batches(
            ids_corpus, train, args.batch_size, padding_id, pad_left=False
        )

        print("{} to create batches\n".format(time.time()-start_time))
        print("{} batches, {} tokens in total, {} triples in total\n".format(
                len(train_batches),
                sum(len(x[0].ravel())+len(x[1].ravel()) for x in train_batches),
                sum(len(x[2].ravel()) for x in train_batches)
            ))

        model.train_model(
            ids_corpus,
            train,
            dev=dev if args.dev else None,
            test=test if args.test else None
        )
    print '\nEnded at: {}'.format(datetime.now())
Beispiel #12
0
def main():
    print args
    set_default_rng_seed(args.seed)
    assert args.embedding, "Pre-trained word embeddings required."

    embedding_layer = myio.create_embedding_layer(args.embedding)

    max_len = args.max_len

    if args.train:
        train_x, train_y = myio.read_annotations(args.train)
        if args.debug:
            len_ = len(train_x) * args.debug
            len_ = int(len_)
            train_x = train_x[:len_]
            train_y = train_y[:len_]
        print 'train size: ', len(train_x)  #, train_x[0], len(train_x[0])
        #exit()
        train_x = [embedding_layer.map_to_ids(x)[:max_len] for x in train_x]

    if args.dev:
        dev_x, dev_y = myio.read_annotations(args.dev)
        if args.debug:
            len_ = len(dev_x) * args.debug
            len_ = int(len_)
            dev_x = dev_x[:len_]
            dev_x = dev_y[:len_]
        print 'train size: ', len(train_x)
        dev_x = [embedding_layer.map_to_ids(x)[:max_len] for x in dev_x]

    if args.load_rationale:
        rationale_data = myio.read_rationales(args.load_rationale)
        for x in rationale_data:
            x["xids"] = embedding_layer.map_to_ids(x["x"])

    #print 'in main: ', args.seed
    if args.train:
        model = Model(args=args,
                      embedding_layer=embedding_layer,
                      nclasses=len(train_y[0]))
        if args.load_model:
            model.load_model(args.load_model,
                             seed=args.seed,
                             select_all=args.select_all)
            say("model loaded successfully.\n")
        else:
            model.ready()
        #say(" ready time nedded {} \n".format(time.time()-start_ready_time))

        #debug_func2 = theano.function(
        #        inputs = [ model.x, model.z ],
        #        outputs = model.generator.logpz
        #    )
        #theano.printing.debugprint(debug_func2)
        #return

        model.train(
            (train_x, train_y),
            (dev_x, dev_y) if args.dev else None,
            None,  #(test_x, test_y),
            rationale_data if args.load_rationale else None,
            trained_max_epochs=args.trained_max_epochs)

    if args.load_model and not args.dev and not args.train:
        model = Model(args=args, embedding_layer=embedding_layer, nclasses=-1)
        model.load_model(args.load_model,
                         seed=args.seed,
                         select_all=args.select_all)
        say("model loaded successfully.\n")

        sample_generator = theano.function(
            inputs=[model.x],
            outputs=model.z,
            #updates = model.generator.sample_updates
        )
        sample_encoder = theano.function(
            inputs=[model.x, model.y, model.z],
            outputs=[
                model.encoder.obj, model.encoder.loss, model.encoder.pred_diff
            ],
            #updates = model.generator.sample_updates
        )
        # compile an evaluation function
        eval_func = theano.function(
            inputs=[model.x, model.y],
            outputs=[
                model.z, model.encoder.obj, model.encoder.loss,
                model.encoder.pred_diff
            ],
            #updates = model.generator.sample_updates
        )
        debug_func_enc = theano.function(
            inputs=[model.x, model.y],
            outputs=[
                model.z, model.encoder.obj, model.encoder.loss,
                model.encoder.pred_diff
            ],
            #updates = model.generator.sample_updates
        )
        debug_func_gen = theano.function(
            inputs=[model.x, model.y],
            outputs=[
                model.z, model.encoder.obj, model.encoder.loss,
                model.encoder.pred_diff
            ],
            #updates = model.generator.sample_updates
        )

        # compile a predictor function
        pred_func = theano.function(
            inputs=[model.x],
            outputs=[model.z, model.encoder.preds],
            #updates = model.generator.sample_updates
        )

        # batching data
        padding_id = embedding_layer.vocab_map["<padding>"]
        if rationale_data is not None:
            valid_batches_x, valid_batches_y = myio.create_batches(
                [u["xids"] for u in rationale_data],
                [u["y"] for u in rationale_data],
                args.batch,
                padding_id,
                sort=False)

        # disable dropout
        model.dropout.set_value(0.0)
        if rationale_data is not None:
            #model.dropout.set_value(0.0)
            start_rational_time = time.time()
            r_mse, r_p1, r_prec1, r_prec2, gen_time, enc_time, prec_cal_time = model.evaluate_rationale(
                rationale_data, valid_batches_x, valid_batches_y,
                sample_generator, sample_encoder, eval_func)
            #valid_batches_y, eval_func)

            #model.dropout.set_value(dropout_prob)
            #say(("\ttest rationale mser={:.4f}  p[1]r={:.2f}  prec1={:.4f}" +
            #            "  prec2={:.4f} generator time={:.4f} encoder time={:.4f} total test time={:.4f}\n").format(
            #        r_mse,
            #        r_p1,
            #        r_prec1,
            #        r_prec2,
            #        gen_time,
            #        enc_time,
            #        time.time() - start_rational_time
            #))

            data = str('%.5f' % r_mse) + "\t" + str(
                '%4.2f' % r_p1) + "\t" + str('%4.4f' % r_prec1) + "\t" + str(
                    '%4.4f' %
                    r_prec2) + "\t" + str('%4.2f' % gen_time) + "\t" + str(
                        '%4.2f' % enc_time) + "\t" + str(
                            '%4.2f' % prec_cal_time) + "\t" + str(
                                '%4.2f' % (time.time() - start_rational_time)
                            ) + "\t" + str(args.sparsity) + "\t" + str(
                                args.coherent) + "\t" + str(
                                    args.max_epochs) + "\t" + str(
                                        args.cur_epoch)

            with open(args.graph_data_path, 'a') as g_f:
                print 'writning to file: ', data
                g_f.write(data + "\n")
Beispiel #13
0
    def train(self, train, dev, test, rationale_data):
        args = self.args
        dropout = self.dropout
        padding_id = self.embedding_layer.vocab_map["<padding>"]

        if dev is not None:
            dev_batches_x, dev_batches_y = myio.create_batches(
                dev[0], dev[1], args.batch, padding_id)
        if test is not None:
            test_batches_x, test_batches_y = myio.create_batches(
                test[0], test[1], args.batch, padding_id)
        if rationale_data is not None:
            valid_batches_x, valid_batches_y = myio.create_batches(
                [u["xids"] for u in rationale_data],
                [u["y"] for u in rationale_data],
                args.batch,
                padding_id,
                sort=False)

        start_time = time.time()
        train_batches_x, train_batches_y = myio.create_batches(
            train[0], train[1], args.batch, padding_id)
        say("{:.2f}s to create training batches\n\n".format(time.time() -
                                                            start_time))

        updates_e, lr_e, gnorm_e = create_optimization_updates(
            cost=self.generator.cost_e,
            params=self.encoder.params,
            method=args.learning,
            lr=args.learning_rate)[:3]

        updates_g, lr_g, gnorm_g = create_optimization_updates(
            cost=self.generator.cost,
            params=self.generator.params,
            method=args.learning,
            lr=args.learning_rate)[:3]

        sample_generator = theano.function(
            inputs=[self.x],
            outputs=self.z_pred,
            #updates = self.generator.sample_updates
            #allow_input_downcast = True
        )

        get_loss_and_pred = theano.function(
            inputs=[self.x, self.z, self.y],
            outputs=[self.generator.loss_vec, self.encoder.preds])

        eval_generator = theano.function(
            inputs=[self.x, self.y],
            outputs=[
                self.z, self.generator.obj, self.generator.loss,
                self.encoder.pred_diff
            ],
            givens={self.z: self.generator.z_pred},
            #updates = self.generator.sample_updates,
            #no_default_updates = True
        )

        train_generator = theano.function(
                inputs = [ self.x, self.y ],
                outputs = [ self.generator.obj, self.generator.loss, \
                                self.generator.sparsity_cost, self.z, gnorm_g, gnorm_e ],
                givens = {
                    self.z : self.generator.z_pred
                },
                #updates = updates_g,
                updates = updates_g.items() | updates_e.items() #+ self.generator.sample_updates,
                #no_default_updates = True
            )

        eval_period = args.eval_period
        unchanged = 0
        best_dev = 1e+2
        best_dev_e = 1e+2
        dropout_prob = np.float64(args.dropout).astype(theano.config.floatX)

        for epoch in range(args.max_epochs):
            unchanged += 1
            if unchanged > 10: return

            train_batches_x, train_batches_y = myio.create_batches(
                train[0], train[1], args.batch, padding_id)

            processed = 0
            train_cost = 0.0
            train_loss = 0.0
            train_sparsity_cost = 0.0
            p1 = 0.0
            start_time = time.time()

            N = len(train_batches_x)
            for i in range(N):
                if (i + 1) % 100 == 0:
                    say("\r{}/{}     ".format(i + 1, N))

                bx, by = train_batches_x[i], train_batches_y[i]
                mask = bx != padding_id

                cost, loss, sparsity_cost, bz, gl2_g, gl2_e = train_generator(
                    bx, by)

                k = len(by)
                processed += k
                train_cost += cost
                train_loss += loss
                train_sparsity_cost += sparsity_cost
                p1 += np.sum(bz * mask) / (np.sum(mask) + 1e-8)

                if (i == N - 1) or (eval_period > 0
                                    and processed / eval_period >
                                    (processed - k) / eval_period):
                    say("\n")
                    say((
                        "Generator Epoch {:.2f}  costg={:.4f}  scost={:.4f}  lossg={:.4f}  "
                        +
                        "p[1]={:.2f}  |g|={:.4f} {:.4f}\t[{:.2f}m / {:.2f}m]\n"
                    ).format(epoch + (i + 1.0) / N, train_cost / (i + 1),
                             train_sparsity_cost / (i + 1),
                             train_loss / (i + 1), p1 / (i + 1), float(gl2_g),
                             float(gl2_e), (time.time() - start_time) / 60.0,
                             (time.time() - start_time) / 60.0 / (i + 1) * N))
                    say("\t"+str([ "{:.1f}".format(np.linalg.norm(x.get_value(borrow=True))) \
                                    for x in self.encoder.params ])+"\n")
                    say("\t"+str([ "{:.1f}".format(np.linalg.norm(x.get_value(borrow=True))) \
                                    for x in self.generator.params ])+"\n")

                    if dev:
                        self.dropout.set_value(0.0)
                        dev_obj, dev_loss, dev_diff, dev_p1 = self.evaluate_data(
                            dev_batches_x,
                            dev_batches_y,
                            eval_generator,
                            sampling=True)

                        if dev_obj < best_dev:
                            best_dev = dev_obj
                            unchanged = 0
                            if args.dump and rationale_data:
                                self.dump_rationales(args.dump,
                                                     valid_batches_x,
                                                     valid_batches_y,
                                                     get_loss_and_pred,
                                                     sample_generator)

                            if args.save_model:
                                self.save_model(args.save_model, args)

                        say((
                            "\tsampling devg={:.4f}  mseg={:.4f}  avg_diffg={:.4f}"
                            + "  p[1]g={:.2f}  best_dev={:.4f}\n").format(
                                dev_obj, dev_loss, dev_diff, dev_p1, best_dev))

                        if rationale_data is not None:
                            r_mse, r_p1, r_prec1, r_prec2 = self.evaluate_rationale(
                                rationale_data, valid_batches_x,
                                valid_batches_y, eval_generator)
                            say((
                                "\trationale mser={:.4f}  p[1]r={:.2f}  prec1={:.4f}"
                                + "  prec2={:.4f}\n").format(
                                    r_mse, r_p1, r_prec1, r_prec2))

                        self.dropout.set_value(dropout_prob)
Beispiel #14
0
    def train(self, train, dev, test):
        args = self.args
        dropout = self.dropout
        padding_id = self.embedding_layer.vocab_map["<padding>"]

        if dev is not None:
            dev_batches_x, dev_batches_y = myio.create_batches(
                            dev[0], dev[1], args.batch, padding_id
                        )
        if test is not None:
            test_batches_x, test_batches_y = myio.create_batches(
                            test[0], test[1], args.batch, padding_id
                        )

        start_time = time.time()
        train_batches_x, train_batches_y = myio.create_batches(
                            train[0], train[1], args.batch, padding_id
                        )
        say("{:.2f}s to create training batches\n\n".format(
                time.time()-start_time
            ))

        updates_e, lr_e, gnorm_e = create_optimization_updates(
                               cost = self.encoder.cost_e,
                               params = self.encoder.params,
                               method = args.learning,
                               lr = args.learning_rate
                        )[:3]


        updates_g, lr_g, gnorm_g = create_optimization_updates(
                               cost = self.encoder.cost_g,
                               params = self.generator.params,
                               method = args.learning,
                               lr = args.learning_rate
                        )[:3]

        sample_generator = theano.function(
                inputs = [ self.x ],
                outputs = self.z
            )

        get_loss_and_pred = theano.function(
                inputs = [ self.x, self.y ],
                outputs = [ self.encoder.loss_vec, self.encoder.preds, self.z ]
            )

        train_generator = theano.function(
                inputs = [ self.x, self.y ],
                outputs = [ self.encoder.obj, self.encoder.loss, \
                                self.encoder.sparsity_cost, self.z, gnorm_e, gnorm_g ],
                updates = updates_e.items() + updates_g.items(),
            )

        eval_func = theano.function(
                inputs = [ self.x, self.y ],
                outputs = [ self.z, self.encoder.obj, self.true_pos, self.tot_pos, self.tot_true ]
            )

        eval_period = args.eval_period
        unchanged = 0
        best_dev = 1e+2
        best_dev_e = 1e+2
        last_train_avg_cost = None
        last_dev_avg_cost = None
        tolerance = 0.10 + 1e-3
        dropout_prob = np.float64(args.dropout).astype(theano.config.floatX)

        for epoch in xrange(args.max_epochs):
            unchanged += 1
            if unchanged > 50: return

            train_batches_x, train_batches_y = myio.create_batches(
                            train[0], train[1], args.batch, padding_id
                        )

            more = True
            if args.decay_lr:
                param_bak = [ p.get_value(borrow=False) for p in self.params ]

            while more:
                processed = 0
                train_cost = 0.0
                train_loss = 0.0
                train_sparsity_cost = 0.0
                p1 = 0.0
                start_time = time.time()

                N = len(train_batches_x)
                for i in xrange(N):
                    if (i+1) % 100 == 0:
                        say("\r{}/{} {:.2f}       ".format(i+1,N,p1/(i+1)))

                    bx, by = train_batches_x[i], train_batches_y[i]
                    mask = bx != padding_id

                    cost, loss, sparsity_cost, bz, gl2_e, gl2_g = train_generator(bx, by)

                    k = len(by)
                    processed += k
                    train_cost += cost
                    train_loss += loss
                    train_sparsity_cost += sparsity_cost
                    p1 += np.sum(bz*mask) / (np.sum(mask)+1e-8)

                cur_train_avg_cost = train_cost / N

                if dev:
                    self.dropout.set_value(0.0)
                    dev_obj, dev_prec, dev_recall, dev_f1, dev_p1 = self.evaluate_data(
                            dev_batches_x, dev_batches_y, eval_func)
                    self.dropout.set_value(dropout_prob)
                    cur_dev_avg_cost = dev_obj

                more = False
                if args.decay_lr and last_train_avg_cost is not None:
                    if cur_train_avg_cost > last_train_avg_cost*(1+tolerance):
                        more = True
                        say("\nTrain cost {} --> {}\n".format(
                                last_train_avg_cost, cur_train_avg_cost
                            ))
                    if dev and cur_dev_avg_cost > last_dev_avg_cost*(1+tolerance):
                        more = True
                        say("\nDev cost {} --> {}\n".format(
                                last_dev_avg_cost, cur_dev_avg_cost
                            ))

                if more:
                    lr_val = lr_g.get_value()*0.5
                    lr_val = np.float64(lr_val).astype(theano.config.floatX)
                    lr_g.set_value(lr_val)
                    lr_e.set_value(lr_val)
                    say("Decrease learning rate to {}\n".format(float(lr_val)))
                    for p, v in zip(self.params, param_bak):
                        p.set_value(v)
                    continue

                last_train_avg_cost = cur_train_avg_cost
                if dev: last_dev_avg_cost = cur_dev_avg_cost

                say("\n")
                say(("Generator Epoch {:.2f}  costg={:.4f}  scost={:.4f}  lossg={:.4f}  " +
                    "p[1]={:.3f}  |g|={:.4f} {:.4f}\t[{:.2f}m / {:.2f}m]\n").format(
                        epoch+(i+1.0)/N,
                        train_cost / N,
                        train_sparsity_cost / N,
                        train_loss / N,
                        p1 / N,
                        float(gl2_e),
                        float(gl2_g),
                        (time.time()-start_time)/60.0,
                        (time.time()-start_time)/60.0/(i+1)*N
                    ))
                say("\t"+str([ "{:.2f}".format(np.linalg.norm(x.get_value(borrow=True))) \
                                for x in self.encoder.params ])+"\n")
                say("\t"+str([ "{:.2f}".format(np.linalg.norm(x.get_value(borrow=True))) \
                                for x in self.generator.params ])+"\n")

                if dev:
                    if dev_obj < best_dev:
                        best_dev = dev_obj
                        unchanged = 0
                        if args.dump and test:
                            self.dump_rationales(args.dump, test_batches_x, test_batches_y,
                                        get_loss_and_pred, sample_generator)

                    say(("\tdevg={:.4f}  f1g={:.4f}  preg={:.4f}  recg={:.4f}" +
                                "  p[1]g={:.3f}  best_dev={:.4f}\n").format(
                        dev_obj,
                        dev_f1,
                        dev_prec,
                        dev_recall,
                        dev_p1,
                        best_dev
                    ))

                    if test is not None:
                        self.dropout.set_value(0.0)
                        test_obj, test_prec, test_recall, test_f1, test_p1 = self.evaluate_data(
                            test_batches_x, test_batches_y, eval_func)
                        self.dropout.set_value(dropout_prob)
                        say(("\ttestt={:.4f}  f1t={:.4f}  pret={:.4f}  rect={:.4f}" +
                                    "  p[1]t={:.3f}\n").format(
                            test_obj,
                            test_f1,
                            test_prec,
                            test_recall,
                            test_p1
                        ))
Beispiel #15
0
    def train(self, ids_corpus, train, dev=None, test=None):
        dropout_prob = np.float64(args.dropout).astype(theano.config.floatX)
        batch_size = args.batch_size
        padding_id = self.padding_id

        #train_batches = myio.create_batches(ids_corpus, train, batch_size, padding_id)

        if dev is not None:
            dev, dev_raw = dev
        if test is not None:
            test, test_raw = test

        if args.joint:
            updates_e, lr_e, gnorm_e = create_optimization_updates(
                cost=self.encoder.cost_e,  #self.encoder.cost,
                params=self.encoder.params,
                lr=args.learning_rate * 0.1,
                method=args.learning)[:3]
        else:
            updates_e = {}

        updates_g, lr_g, gnorm_g = create_optimization_updates(
            cost=self.encoder.cost_g,
            params=self.generator.params,
            lr=args.learning_rate,
            method=args.learning)[:3]

        train_func = theano.function(
                inputs = [ self.x, self.triples, self.pairs ],
                outputs = [ self.encoder.obj, self.encoder.loss, \
                        self.encoder.sparsity_cost, self.generator.p1, gnorm_g ],
                # updates = updates_g.items() + updates_e.items() + self.generator.sample_updates,
                updates = collections.OrderedDict(list(updates_g.items()) + list(updates_e.items()) + list(self.generator.sample_updates.items())),
                #no_default_updates = True,
                on_unused_input= "ignore"
            )

        eval_func = theano.function(inputs=[self.x],
                                    outputs=self.encoder.scores)

        eval_func2 = theano.function(
            inputs=[self.x],
            outputs=[self.encoder.scores_z, self.generator.p1, self.z],
            updates=self.generator.sample_updates,
            #no_default_updates = True
        )

        say("\tp_norm: {}\n".format(self.get_pnorm_stat(self.encoder.params)))
        say("\tp_norm: {}\n".format(self.get_pnorm_stat(
            self.generator.params)))

        result_table = PrettyTable(
            ["Epoch", "dev MAP", "dev MRR", "dev P@1", "dev P@5"] +
            ["tst MAP", "tst MRR", "tst P@1", "tst P@5"])
        last_train_avg_cost = None
        tolerance = 0.5 + 1e-3
        unchanged = 0
        best_dev = -1
        dev_MAP = dev_MRR = dev_P1 = dev_P5 = 0
        test_MAP = test_MRR = test_P1 = test_P5 = 0
        start_time = 0
        max_epoch = args.max_epoch
        for epoch in range(max_epoch):
            unchanged += 1
            if unchanged > 20: break

            start_time = time.time()

            train = myio.read_annotations(args.train)
            train_batches = myio.create_batches(ids_corpus,
                                                train,
                                                batch_size,
                                                padding_id,
                                                pad_left=not args.average,
                                                merge=args.merge)
            N = len(train_batches)

            more = True
            param_bak = [p.get_value(borrow=False) for p in self.params]

            while more:

                train_loss = 0.0
                train_cost = 0.0
                train_scost = 0.0
                train_p1 = 0.0

                for i in range(N):
                    # get current batch
                    idts, triples, pairs = train_batches[i]

                    cur_cost, cur_loss, cur_scost, cur_p1, gnormg = train_func(
                        idts, triples, pairs)
                    train_loss += cur_loss
                    train_cost += cur_cost
                    train_scost += cur_scost
                    train_p1 += cur_p1

                    if i % 10 == 0:
                        say("\r{}/{} {:.3f}".format(i, N, train_p1 / (i + 1)))

                cur_train_avg_cost = train_cost / N
                more = False
                if last_train_avg_cost is not None:
                    if cur_train_avg_cost > last_train_avg_cost * (1 +
                                                                   tolerance):
                        more = True
                        say("\nTrain cost {} --> {}\n".format(
                            last_train_avg_cost, cur_train_avg_cost))

                if more:
                    lr_val = lr_g.get_value() * 0.5
                    if lr_val < 1e-5: return
                    lr_val = np.float64(lr_val).astype(theano.config.floatX)
                    lr_g.set_value(lr_val)
                    lr_e.set_value(lr_val)
                    say("Decrease learning rate to {}\n".format(float(lr_val)))
                    for p, v in zip(self.params, param_bak):
                        p.set_value(v)
                    continue

                last_train_avg_cost = cur_train_avg_cost

                say("\r\n\n")
                say( ( "Epoch {}  cost={:.3f}  loss={:.3f}  scost={:.3f}" \
                    +"  P[1]={:.3f}  |g|={:.3f}\t[{:.3f}m]\n" ).format(
                        epoch,
                        train_cost / N,
                        train_loss / N,
                        train_scost / N,
                        train_p1 / N,
                        float(gnormg),
                        (time.time()-start_time)/60.0
                ))
                say("\tp_norm: {}\n".format(
                    self.get_pnorm_stat(self.encoder.params)))
                say("\tp_norm: {}\n".format(
                    self.get_pnorm_stat(self.generator.params)))

                self.dropout.set_value(0.0)

                if dev is not None:
                    full_MAP, full_MRR, full_P1, full_P5 = self.evaluate(
                        dev, eval_func)
                    dev_MAP, dev_MRR, dev_P1, dev_P5, dev_PZ1, dev_PT = self.evaluate_z(
                        dev, dev_raw, ids_corpus, eval_func2)

                if test is not None:
                    test_MAP, test_MRR, test_P1, test_P5, test_PZ1, test_PT = \
                            self.evaluate_z(test, test_raw, ids_corpus, eval_func2)

                if dev_MAP > best_dev:
                    best_dev = dev_MAP
                    unchanged = 0

                say("\n")
                say("  fMAP={:.2f} fMRR={:.2f} fP1={:.2f} fP5={:.2f}\n".format(
                    full_MAP, full_MRR, full_P1, full_P5))

                say("\n")
                say(("  dMAP={:.2f} dMRR={:.2f} dP1={:.2f} dP5={:.2f}" +
                     " dP[1]={:.3f} d%T={:.3f} best_dev={:.2f}\n").format(
                         dev_MAP, dev_MRR, dev_P1, dev_P5, dev_PZ1, dev_PT,
                         best_dev))

                result_table.add_row([epoch] + [
                    "%.2f" % x for x in [dev_MAP, dev_MRR, dev_P1, dev_P5] +
                    [test_MAP, test_MRR, test_P1, test_P5]
                ])

                if unchanged == 0:
                    say("\n")
                    say(("  tMAP={:.2f} tMRR={:.2f} tP1={:.2f} tP5={:.2f}" +
                         " tP[1]={:.3f} t%T={:.3f}\n").format(
                             test_MAP, test_MRR, test_P1, test_P5, test_PZ1,
                             test_PT))
                    if args.dump_rationale:
                        self.evaluate_z(dev + test, dev_raw + test_raw,
                                        ids_corpus, eval_func2,
                                        args.dump_rationale)

                    #if args.save_model:
                    #    self.save_model(args.save_model)

                dropout_p = np.float64(args.dropout).astype(
                    theano.config.floatX)
                self.dropout.set_value(dropout_p)

                say("\n")
                say("{}".format(result_table))
                say("\n")

            if train_p1 / N <= 1e-4 or train_p1 / N + 1e-4 >= 1.0:
                break
Beispiel #16
0
def main():
    assert args.embedding, "Pre-trained word embeddings required."

    embedding_layer = myio.create_embedding_layer(args.embedding)
    embedding_layer_y = myio.create_embedding_layer(args.embedding)

    max_len_x = args.sentence_length * args.max_sentences
    max_len_y = args.sentence_length_hl * args.max_sentences_hl

    if args.train:
        train_x, train_y = myio.read_docs(args.train)
        train_x = [embedding_layer.map_to_ids(x)[:max_len_x] for x in train_x]
        train_y = [
            embedding_layer_y.map_to_ids(y)[:max_len_y] for y in train_y
        ]

    if args.dev:
        dev_x, dev_y = myio.read_docs(args.dev)
        dev_x = [embedding_layer.map_to_ids(x)[:max_len_x] for x in dev_x]
        dev_y = [embedding_layer_y.map_to_ids(y)[:max_len_y] for y in dev_y]

    if args.load_rationale:
        rationale_data = myio.read_rationales(args.load_rationale)
        for x in rationale_data:
            x["xids"] = embedding_layer.map_to_ids(x["x"])

    if args.train:
        model = Model(args=args,
                      embedding_layer=embedding_layer,
                      embedding_layer_y=embedding_layer_y,
                      nclasses=len(train_y[0]))
        model.ready()

        # debug_func2 = theano.function(
        #        inputs = [ model.x, model.z ],
        #        outputs = model.generator.logpz
        #    )
        # theano.printing.debugprint(debug_func2)
        # return

        model.train(
            (train_x, train_y),
            (dev_x, dev_y) if args.dev else None,
            None,  # (test_x, test_y),
            rationale_data if args.load_rationale else None)

    if args.load_model and args.dev and not args.train:
        model = Model(args=None, embedding_layer=embedding_layer, nclasses=-1)
        model.load_model(args.load_model)
        say("model loaded successfully.\n")

        # compile an evaluation function
        eval_func = theano.function(inputs=[model.x, model.y],
                                    outputs=[
                                        model.z, model.encoder.obj,
                                        model.encoder.loss,
                                        model.encoder.pred_diff
                                    ],
                                    updates=model.generator.sample_updates)

        # compile a predictor function
        pred_func = theano.function(inputs=[model.x],
                                    outputs=[model.z, model.encoder.preds],
                                    updates=model.generator.sample_updates)

        # batching data
        padding_id = embedding_layer.vocab_map["<padding>"]
        dev_batches_x, dev_batches_y = myio.create_batches(
            dev_x, dev_y, args.batch, padding_id)

        # disable dropout
        model.dropout.set_value(0.0)
        dev_obj, dev_loss, dev_diff, dev_p1 = model.evaluate_data(
            dev_batches_x, dev_batches_y, eval_func, sampling=True)
        say("{} {} {} {}\n".format(dev_obj, dev_loss, dev_diff, dev_p1))
Beispiel #17
0
    def train(self, train, dev, test, rationale_data):
        args = self.args
        dropout = self.dropout
        padding_id = self.embedding_layer.vocab_map["<padding>"]

        if dev is not None:
            dev_batches_x, dev_batches_y, dev_batches_bv = myio.create_batches(
                dev[0], dev[1], args.batch, padding_id)
        if test is not None:
            test_batches_x, test_batches_y = myio.create_batches(
                test[0], test[1], args.batch, padding_id)
        if rationale_data is not None:
            valid_batches_x, valid_batches_y = myio.create_batches(
                [u["xids"] for u in rationale_data],
                [u["y"] for u in rationale_data],
                args.batch,
                padding_id,
                sort=False)

        # start_time = time.time()
        # train_batches_x, train_batches_y = myio.create_batches(
        #     train[0], train[1], args.batch, padding_id
        # )
        # say("{:.2f}s to create training batches\n\n".format(
        #     time.time() - start_time
        # ))

        updates_e, lr_e, gnorm_e = create_optimization_updates(
            cost=self.encoder.cost_e,
            params=self.encoder.params,
            method=args.learning,
            beta1=args.beta1,
            beta2=args.beta2,
            lr=args.learning_rate)[:3]

        updates_g, lr_g, gnorm_g = create_optimization_updates(
            cost=self.encoder.cost_g,
            params=self.generator.params,
            method=args.learning,
            beta1=args.beta1,
            beta2=args.beta2,
            lr=args.learning_rate)[:3]

        sample_generator = theano.function(
            inputs=[self.x],
            outputs=self.z,
            updates=self.generator.sample_updates)

        # get_loss_and_pred = theano.function(
        #     inputs=[self.x, self.y],
        #     outputs=[self.encoder.loss_vec, self.z],
        #     updates=self.generator.sample_updates + self.generator.sample_updates_sent
        # )
        #
        eval_generator = theano.function(
            inputs=[self.x, self.y, self.bv],
            outputs=[self.z, self.encoder.obj, self.encoder.loss],
            updates=self.generator.sample_updates)

        train_generator = theano.function(
            inputs=[self.x, self.y, self.bv],
            outputs=[
                self.encoder.obj, self.encoder.loss,
                self.encoder.sparsity_cost, self.z, gnorm_e, gnorm_g
            ],
            updates=updates_e.items() + updates_g.items() +
            self.generator.sample_updates)

        eval_period = args.eval_period
        unchanged = 0
        best_dev = 1e+2
        best_dev_e = 1e+2
        last_train_avg_cost = None
        last_dev_avg_cost = None
        tolerance = 0.10 + 1e-3
        dropout_prob = np.float64(args.dropout).astype(theano.config.floatX)

        metric_output = open(
            args.train_output_readable + '_METRICS' + '_sparcity_' +
            str(args.sparsity) + '.out', 'w+')

        if args.dev_baseline:
            ofp1 = open(
                args.train_output_readable + '_METRICS' + '_sparcity_' +
                str(args.sparsity) + '_baseline.out', 'w+')
            ofp2 = open(
                args.train_output_readable + '_sparcity_' +
                str(args.sparsity) + '_baseline.out', 'w+')

            dz = myio.convert_bv_to_z(dev_batches_bv)

            myio.write_train_results(dz[0], dev_batches_x[0], dev_batches_y[0],
                                     self.embedding_layer, ofp2, padding_id)
            myio.write_summ_for_rouge(args, dz, dev_batches_x, dev_batches_y,
                                      self.embedding_layer)
            myio.write_metrics(-1, -1, ofp1, -1, args)

            ofp1.close()
            ofp2.close()

        for epoch in xrange(args.max_epochs):
            read_output = open(
                args.train_output_readable + '_e_' + str(epoch) +
                '_sparcity_' + str(args.sparsity) + '.out', 'w+')
            total_words_per_epoch = 0
            total_summaries_per_epoch = 0
            unchanged += 1
            if unchanged > 20:
                metric_output.write("PROBLEM TRAINING, NO DEV IMPROVEMENT")
                metric_output.close()
                break

            train_batches_x, train_batches_y, train_batches_bv = myio.create_batches(
                train[0], train[1], args.batch, padding_id)

            more = True
            if args.decay_lr:
                param_bak = [p.get_value(borrow=False) for p in self.params]

            while more:
                processed = 0
                train_cost = 0.0
                train_loss = 0.0
                train_sparsity_cost = 0.0
                p1 = 0.0
                start_time = time.time()

                N = len(train_batches_x)
                for i in xrange(N):
                    if (i + 1) % 32 == 0:
                        say("\r{}/{} {:.2f}       ".format(
                            i + 1, N, p1 / (i + 1)))

                    bx, by, bv = train_batches_x[i], train_batches_y[
                        i], train_batches_bv[i]
                    mask = bx != padding_id

                    cost, loss, sparsity_cost, bz, gl2_e, gl2_g = train_generator(
                        bx, by, bv)

                    if i % 64 == 0:
                        self.evaluate_rnn_weights(args, epoch, i)

                    if i % 8 == 0:
                        myio.write_train_results(bz, bx, by,
                                                 self.embedding_layer,
                                                 read_output, padding_id)

                    k = len(by)
                    processed += k
                    train_cost += cost
                    train_loss += loss
                    train_sparsity_cost += sparsity_cost
                    p1 += np.sum(bz * mask) / (np.sum(mask) + 1e-8)

                    total_summaries_per_epoch += args.batch
                    total_words_per_epoch += myio.total_words(bz)

                cur_train_avg_cost = train_cost / N

                if dev:
                    self.dropout.set_value(0.0)
                    dev_obj, dev_loss, dev_p1, dev_v, dev_x, dev_y = self.evaluate_data(
                        dev_batches_x,
                        dev_batches_y,
                        dev_batches_bv,
                        eval_generator,
                        sampling=True)

                    self.dropout.set_value(dropout_prob)
                    cur_dev_avg_cost = dev_obj

                    myio.write_train_results(dev_v[0], dev_x[0], dev_y[0],
                                             self.embedding_layer, read_output,
                                             padding_id)
                    myio.write_summ_for_rouge(args, dev_v, dev_x, dev_y,
                                              self.embedding_layer)
                    myio.write_metrics(total_summaries_per_epoch,
                                       total_words_per_epoch, metric_output,
                                       epoch, args)

                    metric_output.flush()

                more = False
                if args.decay_lr and last_train_avg_cost is not None:
                    if cur_train_avg_cost > last_train_avg_cost * (1 +
                                                                   tolerance):
                        more = True
                        say("\nTrain cost {} --> {}\n".format(
                            last_train_avg_cost, cur_train_avg_cost))
                    if dev and cur_dev_avg_cost > last_dev_avg_cost * (
                            1 + tolerance):
                        more = True
                        say("\nDev cost {} --> {}\n".format(
                            last_dev_avg_cost, cur_dev_avg_cost))

                if more:
                    lr_val = lr_g.get_value() * 0.5
                    lr_val = np.float64(lr_val).astype(theano.config.floatX)
                    lr_g.set_value(lr_val)
                    lr_e.set_value(lr_val)
                    say("Decrease learning rate to {}\n".format(float(lr_val)))
                    for p, v in zip(self.params, param_bak):
                        p.set_value(v)
                    continue

                last_train_avg_cost = cur_train_avg_cost
                if dev: last_dev_avg_cost = cur_dev_avg_cost

                say("\n")
                say((
                    "Generator Epoch {:.2f}  costg={:.4f}  scost={:.4f}  lossg={:.4f}  "
                    + "p[1]={:.2f}  |g|={:.4f} {:.4f}\t[{:.2f}m / {:.2f}m]\n"
                ).format(epoch + (i + 1.0) / N, train_cost / N,
                         train_sparsity_cost / N, train_loss / N, p1 / N,
                         float(gl2_e), float(gl2_g),
                         (time.time() - start_time) / 60.0,
                         (time.time() - start_time) / 60.0 / (i + 1) * N))
                say("\t" + str(["{:.2f}".format(np.linalg.norm(x.get_value(borrow=True))) \
                                for x in self.encoder.params]) + "\n")
                say("\t" + str(["{:.2f}".format(np.linalg.norm(x.get_value(borrow=True))) \
                                for x in self.generator.params]) + "\n")

                if dev:
                    if dev_obj < best_dev:
                        best_dev = dev_obj
                        unchanged = 0
                        # if args.dump and rationale_data:
                        #     self.dump_rationales(args.dump, valid_batches_x, valid_batches_y,
                        #                          get_loss_and_pred, sample_generator)
                        #
                        # if args.save_model:
                        #     self.save_model(args.save_model, args)

                    say(("\tsampling devg={:.4f}  mseg={:.4f}" +
                         "  p[1]g={:.2f}  best_dev={:.4f}\n").format(
                             dev_obj, dev_loss, dev_p1, best_dev))

                    # if rationale_data is not None:
                    #     self.dropout.set_value(0.0)
                    #     r_mse, r_p1, r_prec1, r_prec2 = self.evaluate_rationale(
                    #         rationale_data, valid_batches_x,
                    #         valid_batches_y, eval_generator)
                    #     self.dropout.set_value(dropout_prob)
                    #     say(("\trationale mser={:.4f}  p[1]r={:.2f}  prec1={:.4f}" +
                    #          "  prec2={:.4f}\n").format(
                    #         r_mse,
                    #         r_p1,
                    #         r_prec1,
                    #         r_prec2
                    #     ))

            read_output.close()

        metric_output.close()
Beispiel #18
0
    def train(self, train, dev, test, rationale_data):
        args = self.args
        dropout = self.dropout
        padding_id = self.embedding_layer.vocab_map["<padding>"]

        if dev is not None:
            dev_batches_x, dev_batches_y = myio.create_batches(
                            dev[0], dev[1], args.batch, padding_id
                        )
        if test is not None:
            test_batches_x, test_batches_y = myio.create_batches(
                            test[0], test[1], args.batch, padding_id
                        )
        if rationale_data is not None:
            valid_batches_x, valid_batches_y = myio.create_batches(
                    [ u["xids"] for u in rationale_data ],
                    [ u["y"] for u in rationale_data ],
                    args.batch,
                    padding_id,
                    sort = False
                )

        start_time = time.time()
        train_batches_x, train_batches_y = myio.create_batches(
                            train[0], train[1], args.batch, padding_id
                        )
        say("{:.2f}s to create training batches\n\n".format(
                time.time()-start_time
            ))

        updates_e, lr_e, gnorm_e = create_optimization_updates(
                               cost = self.generator.cost_e,
                               params = self.encoder.params,
                               method = args.learning,
                               lr = args.learning_rate
                        )[:3]


        updates_g, lr_g, gnorm_g = create_optimization_updates(
                               cost = self.generator.cost,
                               params = self.generator.params,
                               method = args.learning,
                               lr = args.learning_rate
                        )[:3]

        sample_generator = theano.function(
                inputs = [ self.x ],
                outputs = self.z_pred,
                #updates = self.generator.sample_updates
                #allow_input_downcast = True
            )

        get_loss_and_pred = theano.function(
                inputs = [ self.x, self.z, self.y ],
                outputs = [ self.generator.loss_vec, self.encoder.preds ]
            )

        eval_generator = theano.function(
                inputs = [ self.x, self.y ],
                outputs = [ self.z, self.generator.obj, self.generator.loss,
                                self.encoder.pred_diff ],
                givens = {
                    self.z : self.generator.z_pred
                },
                #updates = self.generator.sample_updates,
                #no_default_updates = True
            )

        train_generator = theano.function(
                inputs = [ self.x, self.y ],
                outputs = [ self.generator.obj, self.generator.loss, \
                                self.generator.sparsity_cost, self.z, gnorm_g, gnorm_e ],
                givens = {
                    self.z : self.generator.z_pred
                },
                #updates = updates_g,
                updates = updates_g.items() + updates_e.items() #+ self.generator.sample_updates,
                #no_default_updates = True
            )

        eval_period = args.eval_period
        unchanged = 0
        best_dev = 1e+2
        best_dev_e = 1e+2
        dropout_prob = np.float64(args.dropout).astype(theano.config.floatX)

        for epoch in xrange(args.max_epochs):
            unchanged += 1
            if unchanged > 10: return

            train_batches_x, train_batches_y = myio.create_batches(
                            train[0], train[1], args.batch, padding_id
                        )

            processed = 0
            train_cost = 0.0
            train_loss = 0.0
            train_sparsity_cost = 0.0
            p1 = 0.0
            start_time = time.time()

            N = len(train_batches_x)
            for i in xrange(N):
                if (i+1) % 100 == 0:
                    say("\r{}/{}     ".format(i+1,N))

                bx, by = train_batches_x[i], train_batches_y[i]
                mask = bx != padding_id

                cost, loss, sparsity_cost, bz, gl2_g, gl2_e = train_generator(bx, by)

                k = len(by)
                processed += k
                train_cost += cost
                train_loss += loss
                train_sparsity_cost += sparsity_cost
                p1 += np.sum(bz*mask) / (np.sum(mask)+1e-8)

                if (i == N-1) or (eval_period > 0 and processed/eval_period >
                                    (processed-k)/eval_period):
                    say("\n")
                    say(("Generator Epoch {:.2f}  costg={:.4f}  scost={:.4f}  lossg={:.4f}  " +
                        "p[1]={:.2f}  |g|={:.4f} {:.4f}\t[{:.2f}m / {:.2f}m]\n").format(
                            epoch+(i+1.0)/N,
                            train_cost / (i+1),
                            train_sparsity_cost / (i+1),
                            train_loss / (i+1),
                            p1 / (i+1),
                            float(gl2_g),
                            float(gl2_e),
                            (time.time()-start_time)/60.0,
                            (time.time()-start_time)/60.0/(i+1)*N
                        ))
                    say("\t"+str([ "{:.1f}".format(np.linalg.norm(x.get_value(borrow=True))) \
                                    for x in self.encoder.params ])+"\n")
                    say("\t"+str([ "{:.1f}".format(np.linalg.norm(x.get_value(borrow=True))) \
                                    for x in self.generator.params ])+"\n")

                    if dev:
                        self.dropout.set_value(0.0)
                        dev_obj, dev_loss, dev_diff, dev_p1 = self.evaluate_data(
                                dev_batches_x, dev_batches_y, eval_generator, sampling=True)

                        if dev_obj < best_dev:
                            best_dev = dev_obj
                            unchanged = 0
                            if args.dump and rationale_data:
                                self.dump_rationales(args.dump, valid_batches_x, valid_batches_y,
                                            get_loss_and_pred, sample_generator)

                            if args.save_model:
                                self.save_model(args.save_model, args)

                        say(("\tsampling devg={:.4f}  mseg={:.4f}  avg_diffg={:.4f}" +
                                    "  p[1]g={:.2f}  best_dev={:.4f}\n").format(
                            dev_obj,
                            dev_loss,
                            dev_diff,
                            dev_p1,
                            best_dev
                        ))

                        if rationale_data is not None:
                            r_mse, r_p1, r_prec1, r_prec2 = self.evaluate_rationale(
                                    rationale_data, valid_batches_x,
                                    valid_batches_y, eval_generator)
                            say(("\trationale mser={:.4f}  p[1]r={:.2f}  prec1={:.4f}" +
                                        "  prec2={:.4f}\n").format(
                                    r_mse,
                                    r_p1,
                                    r_prec1,
                                    r_prec2
                            ))

                        self.dropout.set_value(dropout_prob)
def run(
        in_train_file_embedded, aspect_idx, max_train_examples, batch_size, learning_rate,
        in_validate_file_embedded, max_validate_examples, validate_every,
        sparsity, coherence, use_cuda, debug_print_training_examples,
        num_printed_rationales):
    train_d = embeddings_helper.load_embedded_data(
        in_filename=in_train_file_embedded,
        max_examples=max_train_examples,
        aspect_idx=aspect_idx)
    validate_d = embeddings_helper.load_embedded_data(
        in_filename=in_validate_file_embedded,
        max_examples=max_validate_examples,
        aspect_idx=aspect_idx)
    combined = embeddings_helper.combine_embeddings(
        embedding_list=[train_d['embedding'], validate_d['embedding']],
        idx_by_word_list=[train_d['idx_by_word'], validate_d['idx_by_word']],
        words_lists=[train_d['words'], validate_d['words']],
        x_idxes_list=[train_d['x_idxes'], validate_d['x_idxes']])
    embedding = combined['embedding']
    num_hidden = combined['num_hidden']
    idx_by_word = combined['idx_by_word']
    x_idxes_list = combined['x_idxes_list']
    train_d['x_idxes'] = x_idxes_list[0]
    validate_d['x_idxes'] = x_idxes_list[1]
    words = combined['words']

    # these numbers, ie -0.05 to 0.05 come from
    # https://github.com/taolei87/rcnn/blob/master/code/nn/initialization.py#L79
    unk_idx = idx_by_word['<unk>']
    pad_idx = idx_by_word['<pad>']
    torch.manual_seed(123)
    embedding[unk_idx] = rand_uniform((num_hidden,), -0.05, 0.05)

    # draw validate batches now, since they should be fixed
    torch.manual_seed(124)
    validate_batches_x, validate_batches_y = myio.create_batches(
        x=validate_d['x_idxes'], y=validate_d['y_aspect'], batch_size=batch_size, padding_id=pad_idx)
    validate_num_batches = len(validate_batches_x)
    sample_idxes = np.random.choice(
        validate_num_batches * batch_size, num_printed_rationales, replace=False)
    sample_idxes_by_batch = defaultdict(list)
    for i in range(num_printed_rationales):
        sample_idx = sample_idxes[i]
        b = sample_idx // batch_size
        b_idx = sample_idx % batch_size
        sample_idxes_by_batch[b].append(b_idx)

    enc = Encoder(embeddings=embedding, num_layers=2)
    gen = Generator(embeddings=embedding, num_layers=2, pad_id=pad_idx)
    if use_cuda:
        enc.cuda()
        gen.cuda()
        embedding = embedding.cuda()

    params = filter(lambda p: p.requires_grad, set(enc.parameters()) | set(gen.parameters()))
    opt = optim.Adam(params=params, lr=learning_rate)
    epoch = 0
    while True:
        batches_x, batches_y = myio.create_batches(
            x=train_d['x_idxes'], y=train_d['y_aspect'], batch_size=batch_size, padding_id=pad_idx)
        num_batches = len(batches_x)
        epoch_loss = 0
        print('    t', end='', flush=True)
        epoch_start = time.time()
        bx_cuda_buf = torch.LongTensor(max_len, batch_size)
        by_cuda_buf = torch.FloatTensor(batch_size)
        if use_cuda:
            bx_cuda_buf = bx_cuda_buf.cuda()
            by_cuda_buf = by_cuda_buf.cuda()
            # by_cuda = autograd.Variable(by_cuda.cuda())
        for b in range(num_batches):
            # print('b %s' % b)
            print('.', end='', flush=True)
            if b != 0 and b % 70 == 0:
                print('%s/%s' % (b, num_batches))
                print('    t', end='', flush=True)
            gen.zero_grad()
            enc.zero_grad()
            bx = batches_x[b]
            by = batches_y[b]
            # this_seq_len = bx.size()[0]
            seq_len = bx.size()[0]
            batch_size = bx.size()[1]

            if debug_print_training_examples:
                print(rationale_helper.rationale_to_string(words, bx[0]))

            # print('bx.size()', bx.size())
            bx_cuda = autograd.Variable(bx_cuda_buf[:seq_len, :batch_size])
            by_cuda = autograd.Variable(by_cuda_buf[:batch_size])
            # print('bx_cuda.size()', bx_cuda.size())
            bx_cuda.data.copy_(bx)
            by_cuda.data.copy_(by)
            # if use_cuda:
            #     if bx_cuda is None:
            #         bx_cuda = autograd.Variable(bx.cuda())
            #         by_cuda = autograd.Variable(by.cuda())
            #     else:
            #         bx_cuda.data.copy_(bx)
            #         by_cuda.data.copy_(by)

            # print('bx.shape', bx.data.shape)
            rationale_selected_node, rationale_selected, rationales, rationale_lengths = gen.forward(bx_cuda)
            # print('rationales.shape', rationales.shape)
            out = enc.forward(rationales)
            loss_mse = ((by_cuda - out) * (by_cuda - out)).sum().sqrt()
            loss_z1 = rationale_lengths.sum().float()
            loss_transitions = (rationale_selected[1:] - rationale_selected[:-1]).abs().sum().float()
            loss = loss_mse + sparsity * loss_z1 + coherence * loss_transitions
            rationale_selected_node.reinforce(-loss.data[0])
            loss.backward(rationale_selected_node)
            opt.step()
            # epoch_loss += loss.data[0]
            epoch_loss += loss_mse.data[0]
        print('%s/%s' % (num_batches, num_batches))
        epoch_train_time = time.time() - epoch_start

        def run_validation():
            # num_batches = len(batches_x)
            epoch_loss = 0
            print('    v', end='', flush=True)
            # bx_cuda = None
            # by_cuda = None
            for b in range(validate_num_batches):
                # print('b %s' % b)
                print('.', end='', flush=True)
                if b != 0 and b % 70 == 0:
                    print('%s/%s' % (b, validate_num_batches))
                    print('    v', end='', flush=True)
                bx = validate_batches_x[b]
                by = validate_batches_y[b]

                seq_len = bx.size()[0]
                batch_size = bx.size()[1]

                bx_cuda = autograd.Variable(bx_cuda_buf[:seq_len, :batch_size])
                by_cuda = autograd.Variable(by_cuda_buf[:batch_size])
                bx_cuda.data.copy_(bx)
                by_cuda.data.copy_(by)

                # if use_cuda:
                #     bx = bx.cuda()
                #     by = by.cuda()
                # if use_cuda:
                # if bx_cuda is None:
                #     bx_cuda = autograd.Variable(bx.cuda())
                #     by_cuda = autograd.Variable(by.cuda())
                # else:
                #     bx_cuda.data.copy_(bx)
                #     by_cuda.data.copy_(by)
                rationale_selected_node, rationale_selected, rationales, rationale_lengths = gen.forward(bx_cuda)
                out = enc.forward(rationales)
                loss = ((by_cuda - out) * (by_cuda - out)).sum().sqrt()
                # print some sample rationales...
                for idx in sample_idxes_by_batch[b]:
                    # print('rationales.shape', rationales.size(), 'idx', idx)
                    rationale = rationales[:, idx]
                    # print('rationale.shape', rationale.size())
                    rationale_str = rationale_helper.rationale_to_string(words=words, rationale=rationale)
                    print('    [%s]' % rationale_str)
                epoch_loss += loss.data[0]
            print('%s/%s' % (validate_num_batches, validate_num_batches))
            return epoch_loss / validate_num_batches

        if (epoch + 1) % validate_every == 0:
            validation_loss = run_validation()
            print('epoch %s train loss %.3f traintime %s validate loss %.3f' % (
                epoch, epoch_loss / num_batches, int(epoch_train_time), validation_loss))
            # print('    validate loss %.3f' % (epoch_loss / num_batches))
        else:
            print('epoch %s train loss %.3f traintime %s' % (epoch, epoch_loss / num_batches, int(epoch_train_time)))
        gc.collect()
        gc.collect()
        epoch += 1
Beispiel #20
0
def main():
    print(args)
    assert args.embedding, "Pre-trained word embeddings required."

    embedding_layer = myio.create_embedding_layer(args.embedding)

    max_len = args.max_len

    if args.train:
        train_x, train_y = myio.read_annotations(args.train)
        train_x = [embedding_layer.map_to_ids(x)[:max_len] for x in train_x]

    if args.dev:
        dev_x, dev_y = myio.read_annotations(args.dev)
        dev_x = [embedding_layer.map_to_ids(x)[:max_len] for x in dev_x]

    if args.load_rationale:
        rationale_data = myio.read_rationales(args.load_rationale)
        for x in rationale_data:
            x["xids"] = embedding_layer.map_to_ids(x["x"])

    if args.train:
        model = Model(args=args,
                      embedding_layer=embedding_layer,
                      nclasses=len(train_y[0]))
        model.ready()

        model.train(
            (train_x, train_y),
            (dev_x, dev_y) if args.dev else None,
            None,  #(test_x, test_y),
            rationale_data if args.load_rationale else None)

    if args.load_model and args.dev and not args.train:
        model = Model(args=None, embedding_layer=embedding_layer, nclasses=-1)
        model.load_model(args.load_model)
        say("model loaded successfully.\n")

        # compile an evaluation function
        eval_func = theano.function(
            inputs=[model.x, model.y],
            outputs=[
                model.z, model.generator.obj, model.generator.loss,
                model.encoder.pred_diff
            ],
            givens={model.z: model.generator.z_pred},
        )

        # compile a predictor function
        pred_func = theano.function(
            inputs=[model.x],
            outputs=[model.z, model.encoder.preds],
            givens={model.z: model.generator.z_pred},
        )

        # batching data
        padding_id = embedding_layer.vocab_map["<padding>"]
        dev_batches_x, dev_batches_y = myio.create_batches(
            dev_x, dev_y, args.batch, padding_id)

        # disable dropout
        model.dropout.set_value(0.0)
        dev_obj, dev_loss, dev_diff, dev_p1 = model.evaluate_data(
            dev_batches_x, dev_batches_y, eval_func, sampling=True)
        say("{} {} {} {}\n".format(dev_obj, dev_loss, dev_diff, dev_p1))
Beispiel #21
0
def main():
    print args
    assert args.embedding, "Pre-trained word embeddings required."

    embedding_layer = myio.create_embedding_layer(
                        args.embedding
                    )

    max_len = args.max_len

    if args.train:
        train_x, train_y = myio.read_annotations(args.train)
        train_x = [ embedding_layer.map_to_ids(x)[:max_len] for x in train_x ]

    if args.dev:
        dev_x, dev_y = myio.read_annotations(args.dev)
        dev_x = [ embedding_layer.map_to_ids(x)[:max_len] for x in dev_x ]

    if args.load_rationale:
        rationale_data = myio.read_rationales(args.load_rationale)
        for x in rationale_data:
            x["xids"] = embedding_layer.map_to_ids(x["x"])

    if args.train:
        model = Model(
                    args = args,
                    embedding_layer = embedding_layer,
                    nclasses = len(train_y[0])
                )
        model.ready()

        #debug_func2 = theano.function(
        #        inputs = [ model.x, model.z ],
        #        outputs = model.generator.logpz
        #    )
        #theano.printing.debugprint(debug_func2)
        #return

        model.train(
                (train_x, train_y),
                (dev_x, dev_y) if args.dev else None,
                None, #(test_x, test_y),
                rationale_data if args.load_rationale else None
            )

    if args.load_model and args.dev and not args.train:
        model = Model(
                    args = None,
                    embedding_layer = embedding_layer,
                    nclasses = -1
                )
        model.load_model(args.load_model)
        say("model loaded successfully.\n")

        # compile an evaluation function
        eval_func = theano.function(
                inputs = [ model.x, model.y ],
                outputs = [ model.z, model.encoder.obj, model.encoder.loss,
                                model.encoder.pred_diff ],
                updates = model.generator.sample_updates
            )

        # compile a predictor function
        pred_func = theano.function(
                inputs = [ model.x ],
                outputs = [ model.z, model.encoder.preds ],
                updates = model.generator.sample_updates
            )

        # batching data
        padding_id = embedding_layer.vocab_map["<padding>"]
        dev_batches_x, dev_batches_y = myio.create_batches(
                        dev_x, dev_y, args.batch, padding_id
                    )

        # disable dropout
        model.dropout.set_value(0.0)
        dev_obj, dev_loss, dev_diff, dev_p1 = model.evaluate_data(
                dev_batches_x, dev_batches_y, eval_func, sampling=True)
        say("{} {} {} {}\n".format(dev_obj, dev_loss, dev_diff, dev_p1))
Beispiel #22
0
    def train(self, train, dev, test, rationale_data, trained_max_epochs=None):
        args = self.args
        args.trained_max_epochs = self.trained_max_epochs = trained_max_epochs
        dropout = self.dropout
        padding_id = self.embedding_layer.vocab_map["<padding>"]

        if dev is not None:
            dev_batches_x, dev_batches_y = myio.create_batches(
                dev[0], dev[1], args.batch, padding_id)
        if test is not None:
            test_batches_x, test_batches_y = myio.create_batches(
                test[0], test[1], args.batch, padding_id)
        if rationale_data is not None:
            valid_batches_x, valid_batches_y = myio.create_batches(
                [u["xids"] for u in rationale_data],
                [u["y"] for u in rationale_data],
                args.batch,
                padding_id,
                sort=False)

        start_time = time.time()
        train_batches_x, train_batches_y = myio.create_batches(
            train[0], train[1], args.batch, padding_id)
        say("{:.2f}s to create training batches\n\n".format(time.time() -
                                                            start_time))
        updates_e, lr_e, gnorm_e = create_optimization_updates(
            cost=self.encoder.cost_e,
            params=self.encoder.params,
            method=args.learning,
            beta1=args.beta1,
            beta2=args.beta2,
            lr=args.learning_rate)[:3]

        updates_g, lr_g, gnorm_g = create_optimization_updates(
            cost=self.encoder.cost_g,
            params=self.generator.params,
            method=args.learning,
            beta1=args.beta1,
            beta2=args.beta2,
            lr=args.learning_rate)[:3]

        sample_generator = theano.function(
            inputs=[self.x],
            outputs=self.z,
            #updates = self.generator.sample_updates
        )

        get_loss_and_pred = theano.function(
            inputs=[self.x, self.y],
            outputs=[self.encoder.loss_vec, self.encoder.preds, self.z],
            #updates = self.generator.sample_updates
        )

        eval_generator = theano.function(
            inputs=[self.x, self.y],
            outputs=[
                self.z, self.encoder.obj, self.encoder.loss,
                self.encoder.pred_diff
            ],
            #updates = self.generator.sample_updates
        )
        sample_generator = theano.function(
            inputs=[self.x],
            outputs=self.z,
            #updates = self.generator.sample_updates
        )
        sample_encoder = theano.function(
            inputs=[self.x, self.y, self.z],
            outputs=[
                self.encoder.obj, self.encoder.loss, self.encoder.pred_diff
            ],
            #updates = self.generator.sample_updates
        )

        train_generator = theano.function(
                inputs = [ self.x, self.y ],
                outputs = [ self.encoder.obj, self.encoder.loss, \
                                self.encoder.sparsity_cost, self.z, self.word_embs, gnorm_e, gnorm_g ],
                updates = updates_e.items() + updates_g.items() #+ self.generator.sample_updates,
            )

        eval_period = args.eval_period
        unchanged = 0
        best_dev = 1e+2
        best_dev_e = 1e+2
        last_train_avg_cost = None
        last_dev_avg_cost = None
        tolerance = 0.10 + 1e-3
        dropout_prob = np.float64(args.dropout).astype(theano.config.floatX)

        for epoch_ in xrange(args.max_epochs -
                             50):  # -50 when max_epochs  = 100 given
            #print(" max epochs in train func: ", args.max_epochs)
            epoch = args.trained_max_epochs + epoch_
            unchanged += 1
            if unchanged > 25:
                print 'dev set increases more than 25 times after the best dev found'
                #return

            train_batches_x, train_batches_y = myio.create_batches(
                train[0], train[1], args.batch, padding_id)

            more = True
            if args.decay_lr:
                param_bak = [p.get_value(borrow=False) for p in self.params]

            start_train_generate = time.time()
            more_counter = 0
            while more:
                processed = 0
                train_cost = 0.0
                train_loss = 0.0
                train_sparsity_cost = 0.0
                p1 = 0.0
                start_time = time.time()

                N = len(train_batches_x)
                #print(" begining : ", train_cost )
                for i in xrange(N):
                    if (i + 1) % 100 == 0:
                        say("\r{}/{} {:.2f}       ".format(
                            i + 1, N, p1 / (i + 1)))

                    bx, by = train_batches_x[i], train_batches_y[i]
                    mask = bx != padding_id
                    start_train_time = time.time()
                    cost, loss, sparsity_cost, bz, emb, gl2_e, gl2_g = train_generator(
                        bx, by)
                    #print('gl2_g: ' , gl2_g)

                    k = len(by)
                    processed += k
                    train_cost += cost
                    train_loss += loss
                    train_sparsity_cost += sparsity_cost
                    p1 += np.sum(bz * mask) / (np.sum(mask) + 1e-8)

                cur_train_avg_cost = train_cost / N
                #print(" end : ", cur_train_avg_cost )
                say("train generate  time: {} \n".format(time.time() -
                                                         start_train_generate))
                if dev:
                    self.dropout.set_value(0.0)
                    start_dev_time = time.time()
                    dev_obj, dev_loss, dev_diff, dev_p1 = self.evaluate_data(
                        dev_batches_x,
                        dev_batches_y,
                        eval_generator,
                        sampling=True)
                    self.dropout.set_value(dropout_prob)
                    say("dev evaluate data time: {} \n".format(time.time() -
                                                               start_dev_time))
                    cur_dev_avg_cost = dev_obj

                more = False
                if args.decay_lr and last_train_avg_cost is not None:
                    if cur_train_avg_cost > last_train_avg_cost * (1 +
                                                                   tolerance):
                        more = True
                        say("\nTrain cost {} --> {}\n".format(
                            last_train_avg_cost, cur_train_avg_cost))
                    if dev and cur_dev_avg_cost > last_dev_avg_cost * (
                            1 + tolerance):
                        more = True
                        say("\nDev cost {} --> {}\n".format(
                            last_dev_avg_cost, cur_dev_avg_cost))
                if more:
                    more_counter += 1
                    if more_counter < 20: more = False
                if more:
                    more_counter = 0
                    lr_val = lr_g.get_value() * 0.5
                    lr_val = np.float64(lr_val).astype(theano.config.floatX)
                    lr_g.set_value(lr_val)
                    lr_e.set_value(lr_val)
                    say("Decrease learning rate to {} at epoch {}\n".format(
                        float(lr_val), epoch_ + 1))
                    for p, v in zip(self.params, param_bak):
                        #print ('param restoreing: ', p, v)
                        p.set_value(v)
                    continue

                last_train_avg_cost = cur_train_avg_cost
                if dev: last_dev_avg_cost = cur_dev_avg_cost

                say("\n")
                say((
                    "Generator Epoch {:.2f}  costg={:.4f}  scost={:.4f}  lossg={:.4f}  "
                    + "p[1]={:.2f}  |g|={:.4f} {:.4f}\t[{:.2f}m / {:.2f}m]\n"
                ).format(epoch + (i + 1.0) / N, train_cost / N,
                         train_sparsity_cost / N, train_loss / N, p1 / N,
                         float(gl2_e), float(gl2_g),
                         (time.time() - start_time) / 60.0,
                         (time.time() - start_time) / 60.0 / (i + 1) * N))
                say("\t"+str([ "{:.2f}".format(np.linalg.norm(x.get_value(borrow=True))) \
                                for x in self.encoder.params ])+"\n")
                say("\t"+str([ "{:.2f}".format(np.linalg.norm(x.get_value(borrow=True))) \
                                for x in self.generator.params ])+"\n")
                say("total encode time = {} total geneartor time = {} \n".
                    format(total_encode_time, total_generate_time))

                if epoch_ % args.save_every == 0:  #and epoch_>0:
                    print 'saving model after epoch -', epoch_ + 1, ' file name: ', args.save_model + str(
                        epoch_)
                    self.save_model(args.save_model + str(epoch_), args)

                if dev:
                    if dev_obj < best_dev:
                        best_dev = dev_obj
                        unchanged = 0
                        if args.dump and rationale_data:
                            self.dump_rationales(args.dump, valid_batches_x,
                                                 valid_batches_y,
                                                 get_loss_and_pred,
                                                 sample_generator)

                        if args.save_model:
                            print 'saving best model after epoch -', epoch_ + 1, ' file name: ', args.save_model
                            self.save_model(args.save_model, args)

                    say((
                        "\tsampling devg={:.4f}  mseg={:.4f}  avg_diffg={:.4f}"
                        + "  p[1]g={:.2f}  best_dev={:.4f}\n").format(
                            dev_obj, dev_loss, dev_diff, dev_p1, best_dev))

                    if rationale_data is not None:
                        self.dropout.set_value(0.0)

                        start_rational_time = time.time()
                        #r_mse, r_p1, r_prec1, r_prec2 = self.evaluate_rationale(
                        #        rationale_data, valid_batches_x,
                        #        valid_batches_y, eval_generator)

                        r_mse, r_p1, r_prec1, r_prec2, gen_time, enc_time, prec_cal_time = self.evaluate_rationale(
                            rationale_data, valid_batches_x, valid_batches_y,
                            sample_generator, sample_encoder, eval_generator)

                        self.dropout.set_value(dropout_prob)
                        say((
                            "\trationale mser={:.4f}  p[1]r={:.2f}  prec1={:.4f}"
                            + "  prec2={:.4f} time nedded for rational={}\n"
                        ).format(r_mse, r_p1, r_prec1, r_prec2,
                                 time.time() - start_rational_time))
def main(args):
    raw_corpus = myio.read_corpus(args.corpus, args.translations or None,
                                  args.translatable_ids or None,
                                  args.generated_questions_train or None)

    generated_questions_eval = myio.read_generated_questions(
        args.generated_questions)

    embedding_layer = None
    if args.trainable_embeddings == 1:
        embedding_layer = myio.create_embedding_layer(
            raw_corpus,
            n_d=args.hidden_dim,
            cut_off=args.cut_off,
            embs=load_embedding_iterator(args.embeddings)
            if args.embeddings else None,
            fix_init_embs=False)
    else:
        embedding_layer = myio.create_embedding_layer(
            raw_corpus,
            n_d=args.hidden_dim,
            cut_off=args.cut_off,
            embs=load_embedding_iterator(args.embeddings)
            if args.embeddings else None)
    ids_corpus = myio.map_corpus(raw_corpus,
                                 embedding_layer,
                                 max_len=args.max_seq_len,
                                 generated_questions=generated_questions_eval)
    say("vocab size={}, corpus size={}\n".format(embedding_layer.n_V,
                                                 len(raw_corpus)))
    padding_id = embedding_layer.vocab_map["<padding>"]

    if args.reweight:
        weights = myio.create_idf_weights(args.corpus, embedding_layer)

    if args.dev:
        # dev = myio.read_annotations(args.dev, K_neg=-1, prune_pos_cnt=-1)
        dev = myio.read_annotations(args.dev,
                                    K_neg=args.dev_pool_size,
                                    prune_pos_cnt=-1)
        dev = myio.create_eval_batches(ids_corpus,
                                       dev,
                                       padding_id,
                                       pad_left=not args.average)
    if args.test:
        test = myio.read_annotations(args.test, K_neg=-1, prune_pos_cnt=-1)
        test = myio.create_eval_batches(ids_corpus,
                                        test,
                                        padding_id,
                                        pad_left=not args.average)

    if args.train:
        start_time = time.time()
        train = myio.read_annotations(
            args.train, training_data_percent=args.training_data_percent)
        train_batches = myio.create_batches(ids_corpus,
                                            train,
                                            args.batch_size,
                                            padding_id,
                                            pad_left=not args.average,
                                            include_generated_questions=True)
        say("{} to create batches\n".format(time.time() - start_time))
        say("{} batches, {} tokens in total, {} triples in total\n".format(
            len(train_batches),
            sum(len(x[0].ravel()) + len(x[1].ravel()) for x in train_batches),
            sum(len(x[2].ravel()) for x in train_batches)))
        train_batches = None

        model = Model(args,
                      embedding_layer,
                      weights=weights if args.reweight else None)
        # print('args.average: '+args.average)
        model.ready()

        # # # set parameters using pre-trained network
        if args.do_train == 1:
            if args.load_pretrain:
                model.load_pretrained_parameters(args)

            model.train(ids_corpus, train, dev if args.dev else None,
                        test if args.test else None)

        # AVERAGE THE PREDICTIONS OBTAINED BY RUNNING THE MODEL 10 TIMES
        if args.do_evaluate == 1:
            model.load_pretrained_parameters(args)
            # model.set_model(model.load_model(args.load_pretrain))
            for i in range(1):
                r = model.just_eval(dev if args.dev else None,
                                    test if args.test else None)

        # ANALYZE the results
        if len(args.analyze_file.strip()) > 0:
            model.load_pretrained_parameters(args)
            file_name = args.analyze_file.strip(
            )  # 'AskUbuntu.Rcnn_analysis3.gt(es)-gt.txt'
            model.analyze(file_name, embedding_layer, dev)
Beispiel #24
0
    def train(self, ids_corpus, train, dev=None, test=None):
        dropout_prob = np.float64(args.dropout).astype(theano.config.floatX)
        batch_size = args.batch_size
        padding_id = self.padding_id

        #train_batches = myio.create_batches(ids_corpus, train, batch_size, padding_id)

        updates, lr, gnorm = create_optimization_updates(
            cost=self.cost,
            params=self.params,
            lr=args.learning_rate,
            method=args.learning)[:3]

        train_func = theano.function(inputs=[self.idts, self.idbs, self.idps],
                                     outputs=[self.cost, self.loss, gnorm],
                                     updates=updates)

        eval_func = theano.function(inputs=[self.idts, self.idbs],
                                    outputs=self.scores,
                                    on_unused_input='ignore')

        say("\tp_norm: {}\n".format(self.get_pnorm_stat()))

        result_table = PrettyTable(
            ["Epoch", "dev MAP", "dev MRR", "dev P@1", "dev P@5"] +
            ["tst MAP", "tst MRR", "tst P@1", "tst P@5"])

        unchanged = 0
        best_dev = -1
        dev_MAP = dev_MRR = dev_P1 = dev_P5 = 0
        test_MAP = test_MRR = test_P1 = test_P5 = 0
        start_time = 0
        max_epoch = args.max_epoch
        for epoch in xrange(max_epoch):
            unchanged += 1
            if unchanged > 15: break

            start_time = time.time()

            train = myio.read_annotations(args.train)
            train_batches = myio.create_batches(ids_corpus,
                                                train,
                                                batch_size,
                                                padding_id,
                                                pad_left=not args.average)
            N = len(train_batches)

            train_loss = 0.0
            train_cost = 0.0

            for i in xrange(N):
                # get current batch
                idts, idbs, idps = train_batches[i]

                cur_cost, cur_loss, grad_norm = train_func(idts, idbs, idps)
                train_loss += cur_loss
                train_cost += cur_cost

                if i % 10 == 0:
                    say("\r{}/{}".format(i, N))

                if i == N - 1:
                    self.dropout.set_value(0.0)

                    if dev is not None:
                        dev_MAP, dev_MRR, dev_P1, dev_P5 = self.evaluate(
                            dev, eval_func)
                    if test is not None:
                        test_MAP, test_MRR, test_P1, test_P5 = self.evaluate(
                            test, eval_func)

                    if dev_MRR > best_dev:
                        unchanged = 0
                        best_dev = dev_MRR
                        result_table.add_row([epoch] + [
                            "%.2f" % x
                            for x in [dev_MAP, dev_MRR, dev_P1, dev_P5] +
                            [test_MAP, test_MRR, test_P1, test_P5]
                        ])
                        if args.save_model:
                            self.save_model(args.save_model)

                    dropout_p = np.float64(args.dropout).astype(
                        theano.config.floatX)
                    self.dropout.set_value(dropout_p)

                    say("\r\n\n")
                    say( ( "Epoch {}\tcost={:.3f}\tloss={:.3f}" \
                        +"\tMRR={:.2f},{:.2f}\t|g|={:.3f}\t[{:.3f}m]\n" ).format(
                            epoch,
                            train_cost / (i+1),
                            train_loss / (i+1),
                            dev_MRR,
                            best_dev,
                            float(grad_norm),
                            (time.time()-start_time)/60.0
                    ))
                    say("\tp_norm: {}\n".format(self.get_pnorm_stat()))

                    say("\n")
                    say("{}".format(result_table))
                    say("\n")
Beispiel #25
0
    def train(self, ids_corpus, train, dev=None, test=None):
        dropout_prob = np.float64(args.dropout).astype(theano.config.floatX)
        batch_size = args.batch_size
        padding_id = self.padding_id

        #train_batches = myio.create_batches(ids_corpus, train, batch_size, padding_id)

        if dev is not None:
            dev, dev_raw = dev
        if test is not None:
            test, test_raw = test

        if args.joint:
            updates_e, lr_e, gnorm_e = create_optimization_updates(
                    cost = self.encoder.cost_e, #self.encoder.cost,
                    params = self.encoder.params,
                    lr = args.learning_rate*0.1,
                    method = args.learning
                )[:3]
        else:
            updates_e = {}

        updates_g, lr_g, gnorm_g = create_optimization_updates(
                cost = self.encoder.cost_g,
                params = self.generator.params,
                lr = args.learning_rate,
                method = args.learning
            )[:3]

        train_func = theano.function(
                inputs = [ self.x, self.triples, self.pairs ],
                outputs = [ self.encoder.obj, self.encoder.loss, \
                        self.encoder.sparsity_cost, self.generator.p1, gnorm_g ],
                updates = updates_g.items() + updates_e.items() + self.generator.sample_updates,
                #no_default_updates = True,
                on_unused_input= "ignore"
            )

        eval_func = theano.function(
                inputs = [ self.x ],
                outputs = self.encoder.scores
            )

        eval_func2 = theano.function(
                inputs = [ self.x ],
                outputs = [ self.encoder.scores_z, self.generator.p1, self.z ],
                updates = self.generator.sample_updates,
                #no_default_updates = True
            )


        say("\tp_norm: {}\n".format(
                self.get_pnorm_stat(self.encoder.params)
            ))
        say("\tp_norm: {}\n".format(
                self.get_pnorm_stat(self.generator.params)
            ))

        result_table = PrettyTable(["Epoch", "dev MAP", "dev MRR", "dev P@1", "dev P@5"] +
                                    ["tst MAP", "tst MRR", "tst P@1", "tst P@5"])
        last_train_avg_cost = None
        tolerance = 0.5 + 1e-3
        unchanged = 0
        best_dev = -1
        dev_MAP = dev_MRR = dev_P1 = dev_P5 = 0
        test_MAP = test_MRR = test_P1 = test_P5 = 0
        start_time = 0
        max_epoch = args.max_epoch
        for epoch in xrange(max_epoch):
            unchanged += 1
            if unchanged > 20: break

            start_time = time.time()

            train = myio.read_annotations(args.train)
            train_batches = myio.create_batches(ids_corpus, train, batch_size,
                                    padding_id, pad_left=not args.average, merge=args.merge)
            N =len(train_batches)

            more = True
            param_bak = [ p.get_value(borrow=False) for p in self.params ]

            while more:

                train_loss = 0.0
                train_cost = 0.0
                train_scost = 0.0
                train_p1 = 0.0

                for i in xrange(N):
                    # get current batch
                    idts, triples, pairs = train_batches[i]

                    cur_cost, cur_loss, cur_scost, cur_p1, gnormg = train_func(idts,
                                                                                triples, pairs)
                    train_loss += cur_loss
                    train_cost += cur_cost
                    train_scost += cur_scost
                    train_p1 += cur_p1

                    if i % 10 == 0:
                        say("\r{}/{} {:.3f}".format(i,N,train_p1/(i+1)))

                cur_train_avg_cost = train_cost / N
                more = False
                if last_train_avg_cost is not None:
                    if cur_train_avg_cost > last_train_avg_cost*(1+tolerance):
                        more = True
                        say("\nTrain cost {} --> {}\n".format(
                                last_train_avg_cost, cur_train_avg_cost
                            ))

                if more:
                    lr_val = lr_g.get_value()*0.5
                    if lr_val < 1e-5: return
                    lr_val = np.float64(lr_val).astype(theano.config.floatX)
                    lr_g.set_value(lr_val)
                    lr_e.set_value(lr_val)
                    say("Decrease learning rate to {}\n".format(float(lr_val)))
                    for p, v in zip(self.params, param_bak):
                        p.set_value(v)
                    continue

                last_train_avg_cost = cur_train_avg_cost

                say("\r\n\n")
                say( ( "Epoch {}  cost={:.3f}  loss={:.3f}  scost={:.3f}" \
                    +"  P[1]={:.3f}  |g|={:.3f}\t[{:.3f}m]\n" ).format(
                        epoch,
                        train_cost / N,
                        train_loss / N,
                        train_scost / N,
                        train_p1 / N,
                        float(gnormg),
                        (time.time()-start_time)/60.0
                ))
                say("\tp_norm: {}\n".format(
                        self.get_pnorm_stat(self.encoder.params)
                    ))
                say("\tp_norm: {}\n".format(
                        self.get_pnorm_stat(self.generator.params)
                    ))

                self.dropout.set_value(0.0)

                if dev is not None:
                    full_MAP, full_MRR, full_P1, full_P5 = self.evaluate(dev, eval_func)
                    dev_MAP, dev_MRR, dev_P1, dev_P5, dev_PZ1, dev_PT = self.evaluate_z(dev,
                            dev_raw, ids_corpus, eval_func2)

                if test is not None:
                    test_MAP, test_MRR, test_P1, test_P5, test_PZ1, test_PT = \
                            self.evaluate_z(test, test_raw, ids_corpus, eval_func2)

                if dev_MAP > best_dev:
                    best_dev = dev_MAP
                    unchanged = 0

                say("\n")
                say("  fMAP={:.2f} fMRR={:.2f} fP1={:.2f} fP5={:.2f}\n".format(
                        full_MAP, full_MRR,
                        full_P1, full_P5
                    ))

                say("\n")
                say(("  dMAP={:.2f} dMRR={:.2f} dP1={:.2f} dP5={:.2f}" +
                     " dP[1]={:.3f} d%T={:.3f} best_dev={:.2f}\n").format(
                        dev_MAP, dev_MRR,
                        dev_P1, dev_P5,
                        dev_PZ1, dev_PT, best_dev
                    ))

                result_table.add_row(
                        [ epoch ] +
                        [ "%.2f" % x for x in [ dev_MAP, dev_MRR, dev_P1, dev_P5 ] +
                                    [ test_MAP, test_MRR, test_P1, test_P5 ] ]
                    )

                if unchanged == 0:
                    say("\n")
                    say(("  tMAP={:.2f} tMRR={:.2f} tP1={:.2f} tP5={:.2f}" +
                        " tP[1]={:.3f} t%T={:.3f}\n").format(
                        test_MAP, test_MRR,
                        test_P1, test_P5,
                        test_PZ1, test_PT
                    ))
                    if args.dump_rationale:
                        self.evaluate_z(dev+test, dev_raw+test_raw, ids_corpus,
                                eval_func2, args.dump_rationale)

                    #if args.save_model:
                    #    self.save_model(args.save_model)

                dropout_p = np.float64(args.dropout).astype(
                            theano.config.floatX)
                self.dropout.set_value(dropout_p)

                say("\n")
                say("{}".format(result_table))
                say("\n")

            if train_p1/N <= 1e-4 or train_p1/N+1e-4 >= 1.0:
                break