Beispiel #1
0
 def save_model(self, path):
     args = self.args
     lst_params = [ ]
     for i in range(args.depth):
         lst_params.append(self.layers[i*2].params)
     with gzip.open(path,"w") as fout:
         pickle.dump(
                 { "d": args.hidden_dim,
                   "layer_type": args.layer,
                   "args": args,
                   "params": lst_params },
                 fout,
                 protocol = pickle.HIGHEST_PROTOCOL
             )
     say(" \tmodel saved.\n")
Beispiel #2
0
 def save_model(self, path):
     args = self.args
     lst_params = []
     for i in range(args.depth):
         lst_params.append(self.layers[i * 2].params)
     with gzip.open(path, "w") as fout:
         pickle.dump(
             {
                 "d": args.hidden_dim,
                 "layer_type": args.layer,
                 "args": args,
                 "params": lst_params
             },
             fout,
             protocol=pickle.HIGHEST_PROTOCOL)
     say(" \tmodel saved.\n")
Beispiel #3
0
    def __init__(self, model_path, corpus_path, emb_path):
        raw_corpus = myio.read_corpus(corpus_path)
        embedding_layer = myio.create_embedding_layer(
            raw_corpus,
            n_d=10,
            cut_off=1,
            embs=load_embedding_iterator(emb_path))
        weights = myio.create_idf_weights(corpus_path, embedding_layer)
        say("vocab size={}, corpus size={}\n".format(embedding_layer.n_V,
                                                     len(raw_corpus)))

        model = Model(args=None,
                      embedding_layer=embedding_layer,
                      weights=weights)

        model_data = model.load_model(model_path)
        model.set_model(model_data)
        model.dropout.set_value(0.0)
        say("model initialized\n")

        score_func = theano.function(inputs=[model.idts, model.idbs],
                                     outputs=model.scores,
                                     on_unused_input='ignore')
        self.model = model
        self.score_func = score_func
        say("scoring function compiled\n")
Beispiel #4
0
    def __init__(self, model_path, corpus_path, emb_path):
        raw_corpus = myio.read_corpus(corpus_path)
        embedding_layer = myio.create_embedding_layer(
                    raw_corpus,
                    n_d = 10,
                    cut_off = 1,
                    embs = load_embedding_iterator(emb_path)
                )
        weights = myio.create_idf_weights(corpus_path, embedding_layer)
        say("vocab size={}, corpus size={}\n".format(
                embedding_layer.n_V,
                len(raw_corpus)
            ))

        model = Model(args=None, embedding_layer=embedding_layer,
                    weights=weights)

        model_data = model.load_model(model_path)
        model.set_model(model_data)
        model.dropout.set_value(0.0)
        say("model initialized\n")

        score_func = theano.function(
                inputs = [ model.idts, model.idbs ],
                outputs = model.scores,
                on_unused_input='ignore'
            )
        self.model = model
        self.score_func = score_func
        say("scoring function compiled\n")
Beispiel #5
0
def main(args):
    raw_corpus = myio.read_corpus(args.corpus)
    embedding_layer = myio.create_embedding_layer(
        raw_corpus,
        n_d=args.hidden_dim,
        embs=load_embedding_iterator(args.embeddings)
        if args.embeddings else None)
    ids_corpus = myio.map_corpus(raw_corpus,
                                 embedding_layer,
                                 max_len=args.max_seq_len)
    say("vocab size={}, corpus size={}\n".format(embedding_layer.n_V,
                                                 len(raw_corpus)))
    padding_id = embedding_layer.vocab_map["<padding>"]

    if args.reweight:
        weights = myio.create_idf_weights(args.corpus, embedding_layer)

    if args.dev:
        dev_raw = myio.read_annotations(args.dev, K_neg=-1, prune_pos_cnt=-1)
        dev = myio.create_eval_batches(ids_corpus,
                                       dev_raw,
                                       padding_id,
                                       pad_left=not args.average,
                                       merge=args.merge)
    if args.test:
        test_raw = myio.read_annotations(args.test, K_neg=-1, prune_pos_cnt=-1)
        test = myio.create_eval_batches(ids_corpus,
                                        test_raw,
                                        padding_id,
                                        pad_left=not args.average,
                                        merge=args.merge)

    if args.train:
        start_time = time.time()
        train = myio.read_annotations(args.train)
        train_batches = myio.create_batches(ids_corpus,
                                            train,
                                            args.batch_size,
                                            padding_id,
                                            pad_left=not args.average,
                                            merge=args.merge)
        say("{} to create batches\n".format(time.time() - start_time))
        say("{} batches, {} tokens in total, {} triples in total\n".format(
            len(train_batches), sum(len(x[0].ravel()) for x in train_batches),
            sum(len(x[1].ravel()) for x in train_batches)))
        train_batches = None

        model = Model(args,
                      embedding_layer,
                      weights=weights if args.reweight else None)
        model.ready()

        # set parameters using pre-trained network
        if args.load_pretrain:
            model.encoder.load_pretrained_parameters(args)

        model.train(ids_corpus, train, (dev, dev_raw) if args.dev else None,
                    (test, test_raw) if args.test else None)
Beispiel #6
0
def main(args):
    raw_corpus = myio.read_corpus(args.corpus)
    embedding_layer = myio.create_embedding_layer(
                raw_corpus,
                n_d = args.hidden_dim,
                cut_off = args.cut_off,
                embs = load_embedding_iterator(args.embeddings) if args.embeddings else None
            )
    ids_corpus = myio.map_corpus(raw_corpus, embedding_layer)
    say("vocab size={}, corpus size={}\n".format(
            embedding_layer.n_V,
            len(raw_corpus)
        ))
    padding_id = embedding_layer.vocab_map["<padding>"]
    bos_id = embedding_layer.vocab_map["<s>"]
    eos_id = embedding_layer.vocab_map["</s>"]

    if args.reweight:
        weights = myio.create_idf_weights(args.corpus, embedding_layer)

    if args.dev:
        dev = myio.read_annotations(args.dev, K_neg=20, prune_pos_cnt=-1)
        dev = myio.create_eval_batches(ids_corpus, dev, padding_id)
    if args.test:
        test = myio.read_annotations(args.test, K_neg=20, prune_pos_cnt=-1)
        test = myio.create_eval_batches(ids_corpus, test, padding_id)

    if args.heldout:
        with open(args.heldout) as fin:
            heldout_ids = fin.read().split()
        heldout_corpus = dict((id, ids_corpus[id]) for id in heldout_ids if id in ids_corpus)
        train_corpus = dict((id, ids_corpus[id]) for id in ids_corpus
                                                if id not in heldout_corpus)
        heldout = myio.create_batches(heldout_corpus, [ ], args.batch_size,
                    padding_id, bos_id, eos_id, auto_encode=True)
        heldout = [ myio.create_one_batch(b1, t2, padding_id) for t1, b1, t2 in heldout ]
        say("heldout examples={}\n".format(len(heldout_corpus)))

    if args.train:
        model = Model(args, embedding_layer,
                      weights=weights if args.reweight else None)

        start_time = time.time()
        train = myio.read_annotations(args.train)
        if not args.use_anno: train = [ ]
        train_batches = myio.create_batches(ids_corpus, train, args.batch_size,
                    model.padding_id, model.bos_id, model.eos_id, auto_encode=True)
        say("{} to create batches\n".format(time.time()-start_time))

        model.ready()
        model.train(
                ids_corpus if not args.heldout else train_corpus,
                train,
                dev if args.dev else None,
                test if args.test else None,
                heldout if args.heldout else None
            )
Beispiel #7
0
def main(args):
    raw_corpus = myio.read_corpus(args.corpus)
    print("raw corpus:", args.corpus, "len:", len(raw_corpus))
    embedding_layer = myio.create_embedding_layer(
                raw_corpus,
                n_d = args.hidden_dim,
                cut_off = args.cut_off,
                embs = None # embs = load_embedding_iterator(args.embeddings) if args.embeddings else None
            )
    ids_corpus = myio.map_corpus(raw_corpus, embedding_layer, max_len=args.max_seq_len)
    myio.say("vocab size={}, corpus size={}\n".format(
            embedding_layer.n_V,
            len(raw_corpus)
        ))
    padding_id = embedding_layer.vocab_map["<padding>"]
 
    if args.reweight:
        weights = myio.create_idf_weights(args.corpus, embedding_layer)

# 
#     if args.dev:
#         dev = myio.read_annotations(args.dev, K_neg=-1, prune_pos_cnt=-1)
#         dev = myio.create_eval_batches(ids_corpus, dev, padding_id, pad_left = not args.average)
#     if args.test:
#         test = myio.read_annotations(args.test, K_neg=-1, prune_pos_cnt=-1)
#         test = myio.create_eval_batches(ids_corpus, test, padding_id, pad_left = not args.average)
 
    if args.train:
        start_time = time.time()
        train = myio.read_annotations(args.train)
        print("training data:", args.train, "len:", len(train))
        train_batches = myio.create_batches(ids_corpus, train, args.batch_size,
                                padding_id, pad_left = not args.average)
        myio.say("{:.2f} secs to create {} batches of size {}\n".format( (time.time()-start_time), len(train_batches), args.batch_size))
        myio.say("{} batches, {} tokens in total, {} triples in total\n".format(
                len(train_batches),
                sum(len(x[0].ravel())+len(x[1].ravel()) for x in train_batches),
                sum(len(x[2].ravel()) for x in train_batches)
            ))
#         train_batches = None
 
        model = Model(args, embedding_layer,
                      weights=weights if args.reweight else None)
        model.ready()
 
#         # set parameters using pre-trained network
#         if args.load_pretrain:
#             model.load_pretrained_parameters(args)
# 
        model.train(
                ids_corpus,
                train,
                dev = None, # dev if args.dev else None,
                test = None # test if args.test else None
            )
Beispiel #8
0
def main(args):
    raw_corpus = myio.read_corpus(args.corpus)
    embedding_layer = myio.create_embedding_layer(
                raw_corpus,
                n_d = args.hidden_dim,
                embs = load_embedding_iterator(args.embeddings) if args.embeddings else None
            )
    ids_corpus = myio.map_corpus(raw_corpus, embedding_layer, max_len=args.max_seq_len)
    say("vocab size={}, corpus size={}\n".format(
            embedding_layer.n_V,
            len(raw_corpus)
        ))
    padding_id = embedding_layer.vocab_map["<padding>"]

    if args.reweight:
        weights = myio.create_idf_weights(args.corpus, embedding_layer)

    if args.dev:
        dev_raw = myio.read_annotations(args.dev, K_neg=-1, prune_pos_cnt=-1)
        dev = myio.create_eval_batches(ids_corpus, dev_raw, padding_id,
                    pad_left=not args.average, merge=args.merge)
    if args.test:
        test_raw = myio.read_annotations(args.test, K_neg=-1, prune_pos_cnt=-1)
        test = myio.create_eval_batches(ids_corpus, test_raw, padding_id,
                    pad_left=not args.average, merge=args.merge)

    if args.train:
        start_time = time.time()
        train = myio.read_annotations(args.train)
        train_batches = myio.create_batches(ids_corpus, train, args.batch_size,
                                padding_id, pad_left = not args.average, merge=args.merge)
        say("{} to create batches\n".format(time.time()-start_time))
        say("{} batches, {} tokens in total, {} triples in total\n".format(
                len(train_batches),
                sum(len(x[0].ravel()) for x in train_batches),
                sum(len(x[1].ravel()) for x in train_batches)
            ))
        train_batches = None

        model = Model(args, embedding_layer,
                      weights=weights if args.reweight else None)
        model.ready()

        # set parameters using pre-trained network
        if args.load_pretrain:
            model.encoder.load_pretrained_parameters(args)

        model.train(
                ids_corpus,
                train,
                (dev, dev_raw) if args.dev else None,
                (test, test_raw) if args.test else None
            )
Beispiel #9
0
    def __init__(self,
                 model_path,
                 corpus_path,
                 emb_path,
                 session,
                 layer='lstm'):
        raw_corpus = myio.read_corpus(corpus_path)
        embedding_layer = create_embedding_layer(n_d=10,
                                                 embs=load_embedding_iterator(
                                                     args.embeddings),
                                                 only_words=False)
        # weights = myio.create_idf_weights(corpus_path, embedding_layer) # todo
        say("vocab size={}, corpus size={}\n".format(embedding_layer.n_V,
                                                     len(raw_corpus)))

        if layer.lower() == "lstm":
            from models import LstmQR as Model
        elif layer.lower() in ["bilstm", "bigru"]:
            from models import BiRNNQR as Model
        elif layer.lower() == "cnn":
            from models import CnnQR as Model
        elif layer.lower() == "gru":
            from models import GruQR as Model

        model = Model(args={"layer": args.layer},
                      embedding_layer=embedding_layer,
                      weights=None)

        model.load_n_set_model(model_path, session)
        say("model initialized\n")

        self.model = model

        def score_func(titles, bodies, cur_sess):
            feed_dict = {
                self.model.titles_words_ids_placeholder:
                titles.T,  # IT IS TRANSPOSE ;)
                self.model.bodies_words_ids_placeholder:
                bodies.T,  # IT IS TRANSPOSE ;)
                self.model.dropout_prob: 0.,
            }
            _scores = cur_sess.run(self.model.scores, feed_dict)
            return _scores

        self.score_func = score_func
        say("scoring function compiled\n")
Beispiel #10
0
    def train(self, ids_corpus, train, dev=None, test=None):
        dropout_prob = np.float64(args.dropout).astype(theano.config.floatX)
        batch_size = args.batch_size
        padding_id = self.padding_id

        #train_batches = myio.create_batches(ids_corpus, train, batch_size, padding_id)

        updates, lr, gnorm = create_optimization_updates(
            cost=self.cost,
            params=self.params,
            lr=args.learning_rate,
            method=args.learning)[:3]

        train_func = theano.function(inputs=[self.idts, self.idbs, self.idps],
                                     outputs=[self.cost, self.loss, gnorm],
                                     updates=updates)

        eval_func = theano.function(inputs=[self.idts, self.idbs],
                                    outputs=self.scores,
                                    on_unused_input='ignore')

        say("\tp_norm: {}\n".format(self.get_pnorm_stat()))

        result_table = PrettyTable(
            ["Epoch", "dev MAP", "dev MRR", "dev P@1", "dev P@5"] +
            ["tst MAP", "tst MRR", "tst P@1", "tst P@5"])

        unchanged = 0
        best_dev = -1
        dev_MAP = dev_MRR = dev_P1 = dev_P5 = 0
        test_MAP = test_MRR = test_P1 = test_P5 = 0
        start_time = 0
        max_epoch = args.max_epoch
        for epoch in xrange(max_epoch):
            unchanged += 1
            if unchanged > 15: break

            start_time = time.time()

            train = myio.read_annotations(args.train)
            train_batches = myio.create_batches(ids_corpus,
                                                train,
                                                batch_size,
                                                padding_id,
                                                pad_left=not args.average)
            N = len(train_batches)

            train_loss = 0.0
            train_cost = 0.0

            for i in xrange(N):
                # get current batch
                idts, idbs, idps = train_batches[i]

                cur_cost, cur_loss, grad_norm = train_func(idts, idbs, idps)
                train_loss += cur_loss
                train_cost += cur_cost

                if i % 10 == 0:
                    say("\r{}/{}".format(i, N))

                if i == N - 1:
                    self.dropout.set_value(0.0)

                    if dev is not None:
                        dev_MAP, dev_MRR, dev_P1, dev_P5 = self.evaluate(
                            dev, eval_func)
                    if test is not None:
                        test_MAP, test_MRR, test_P1, test_P5 = self.evaluate(
                            test, eval_func)

                    if dev_MRR > best_dev:
                        unchanged = 0
                        best_dev = dev_MRR
                        result_table.add_row([epoch] + [
                            "%.2f" % x
                            for x in [dev_MAP, dev_MRR, dev_P1, dev_P5] +
                            [test_MAP, test_MRR, test_P1, test_P5]
                        ])
                        if args.save_model:
                            self.save_model(args.save_model)

                    dropout_p = np.float64(args.dropout).astype(
                        theano.config.floatX)
                    self.dropout.set_value(dropout_p)

                    say("\r\n\n")
                    say( ( "Epoch {}\tcost={:.3f}\tloss={:.3f}" \
                        +"\tMRR={:.2f},{:.2f}\t|g|={:.3f}\t[{:.3f}m]\n" ).format(
                            epoch,
                            train_cost / (i+1),
                            train_loss / (i+1),
                            dev_MRR,
                            best_dev,
                            float(grad_norm),
                            (time.time()-start_time)/60.0
                    ))
                    say("\tp_norm: {}\n".format(self.get_pnorm_stat()))

                    say("\n")
                    say("{}".format(result_table))
                    say("\n")
Beispiel #11
0
    def ready(self):
        args = self.args
        weights = self.weights

        # len(source) * batch
        idxs = self.idxs = T.imatrix()

        # len(target) * batch
        idys = self.idys = T.imatrix()
        idts = idys[:-1]
        idgs = idys[1:]

        dropout = self.dropout = theano.shared(
            np.float64(args.dropout).astype(theano.config.floatX))

        embedding_layer = self.embedding_layer

        activation = get_activation_by_name(args.activation)
        n_d = self.n_d = args.hidden_dim
        n_e = self.n_e = embedding_layer.n_d
        n_V = self.n_V = embedding_layer.n_V

        if args.layer.lower() == "rcnn":
            LayerType = RCNN
        elif args.layer.lower() == "lstm":
            LayerType = LSTM
        elif args.layer.lower() == "gru":
            LayerType = GRU

        depth = self.depth = args.depth
        layers = self.layers = []
        for i in range(depth * 2):
            if LayerType != RCNN:
                feature_layer = LayerType(n_in=n_e if i / 2 == 0 else n_d,
                                          n_out=n_d,
                                          activation=activation)
            else:
                feature_layer = LayerType(n_in=n_e if i / 2 == 0 else n_d,
                                          n_out=n_d,
                                          activation=activation,
                                          order=args.order,
                                          mode=args.mode,
                                          has_outgate=args.outgate)
            layers.append(feature_layer)

        self.output_layer = output_layer = Layer(
            n_in=n_d,
            n_out=n_V,
            activation=T.nnet.softmax,
        )

        # feature computation starts here

        # (len*batch)*n_e
        xs_flat = embedding_layer.forward(idxs.ravel())
        xs_flat = apply_dropout(xs_flat, dropout)
        if weights is not None:
            xs_w = weights[idxs.ravel()].dimshuffle((0, 'x'))
            xs_flat = xs_flat * xs_w
        # len*batch*n_e
        xs = xs_flat.reshape((idxs.shape[0], idxs.shape[1], n_e))

        # (len*batch)*n_e
        xt_flat = embedding_layer.forward(idts.ravel())
        xt_flat = apply_dropout(xt_flat, dropout)
        if weights is not None:
            xt_w = weights[idts.ravel()].dimshuffle((0, 'x'))
            xt_flat = xt_flat * xt_w
        # len*batch*n_e
        xt = xt_flat.reshape((idts.shape[0], idts.shape[1], n_e))

        prev_hs = xs
        prev_ht = xt
        for i in range(depth):
            # len*batch*n_d
            hs = layers[i * 2].forward_all(prev_hs, return_c=True)
            ht = layers[i * 2 + 1].forward_all(prev_ht, hs[-1])
            hs = hs[:, :, -n_d:]
            ht = ht[:, :, -n_d:]
            prev_hs = hs
            prev_ht = ht
            prev_hs = apply_dropout(hs, dropout)
            prev_ht = apply_dropout(ht, dropout)

        self.p_y_given_x = output_layer.forward(
            prev_ht.reshape((xt_flat.shape[0], n_d)))

        h_final = hs[-1]
        self.scores2 = -(h_final[1:] - h_final[0]).norm(2, axis=1)
        h_final = self.normalize_2d(h_final)
        self.scores = T.dot(h_final[1:], h_final[0])

        # (len*batch)
        nll = T.nnet.categorical_crossentropy(self.p_y_given_x, idgs.ravel())
        nll = nll.reshape(idgs.shape)
        self.nll = nll
        self.mask = mask = T.cast(T.neq(idgs, self.padding_id),
                                  theano.config.floatX)
        nll = T.sum(nll * mask, axis=0)

        #layers.append(embedding_layer)
        layers.append(output_layer)
        params = []
        for l in self.layers:
            params += l.params
        self.params = params
        say("num of parameters: {}\n".format(
            sum(len(x.get_value(borrow=True).ravel()) for x in params)))

        l2_reg = None
        for p in params:
            if l2_reg is None:
                l2_reg = p.norm(2)
            else:
                l2_reg = l2_reg + p.norm(2)
        l2_reg = l2_reg * args.l2_reg
        self.loss = T.mean(nll)
        self.cost = self.loss + l2_reg
Beispiel #12
0
    def train(self, ids_corpus, train, dev=None, test=None, heldout=None):
        args = self.args
        dropout_prob = np.float64(args.dropout).astype(theano.config.floatX)
        batch_size = args.batch_size
        padding_id = self.padding_id
        bos_id = self.bos_id
        eos_id = self.eos_id

        #train_batches = myio.create_batches(ids_corpus, train, batch_size, padding_id, args.loss)

        updates, lr, gnorm = create_optimization_updates(
            cost=self.cost,
            params=self.params,
            lr=args.learning_rate,
            method=args.learning)[:3]

        train_func = theano.function(inputs=[self.idxs, self.idys],
                                     outputs=[self.cost, self.loss, gnorm],
                                     updates=updates)

        eval_func = theano.function(
            inputs=[self.idxs],
            #outputs = self.scores2
            outputs=self.scores)

        nll_func = theano.function(inputs=[self.idxs, self.idys],
                                   outputs=[self.nll, self.mask])

        say("\tp_norm: {}\n".format(self.get_pnorm_stat()))

        result_table = PrettyTable(
            ["Epoch", "dev MAP", "dev MRR", "dev P@1", "dev P@5"] +
            ["tst MAP", "tst MRR", "tst P@1", "tst P@5"])

        unchanged = 0
        best_dev = -1
        dev_MAP = dev_MRR = dev_P1 = dev_P5 = 0
        test_MAP = test_MRR = test_P1 = test_P5 = 0
        heldout_PPL = -1

        start_time = 0
        max_epoch = args.max_epoch
        for epoch in xrange(max_epoch):
            unchanged += 1
            if unchanged > 8: break

            start_time = time.time()

            train_batches = myio.create_batches(ids_corpus,
                                                train,
                                                batch_size,
                                                padding_id,
                                                bos_id,
                                                eos_id,
                                                auto_encode=True)
            N = len(train_batches)

            train_cost = 0.0
            train_loss = 0.0
            train_loss2 = 0.0
            for i in xrange(N):
                # get current batch
                t1, b1, t2 = train_batches[i]

                if args.use_title:
                    idxs, idys = myio.create_one_batch(t1, t2, padding_id)
                    cur_cost, cur_loss, grad_norm = train_func(idxs, idys)
                    train_cost += cur_cost
                    train_loss += cur_loss
                    train_loss2 += cur_loss / idys.shape[0]

                if args.use_body:
                    idxs, idys = myio.create_one_batch(b1, t2, padding_id)
                    cur_cost, cur_loss, grad_norm = train_func(idxs, idys)
                    train_cost += cur_cost
                    train_loss += cur_loss
                    train_loss2 += cur_loss / idys.shape[0]

                if i % 10 == 0:
                    say("\r{}/{}".format(i, N))

                if i == N - 1:
                    self.dropout.set_value(0.0)

                    if dev is not None:
                        dev_MAP, dev_MRR, dev_P1, dev_P5 = self.evaluate(
                            dev, eval_func)
                    if test is not None:
                        test_MAP, test_MRR, test_P1, test_P5 = self.evaluate(
                            test, eval_func)
                    if heldout is not None:
                        heldout_PPL = self.evaluate_perplexity(
                            heldout, nll_func)

                    if dev_MRR > best_dev:
                        unchanged = 0
                        best_dev = dev_MRR
                        result_table.add_row([epoch] + [
                            "%.2f" % x
                            for x in [dev_MAP, dev_MRR, dev_P1, dev_P5] +
                            [test_MAP, test_MRR, test_P1, test_P5]
                        ])
                        if args.model:
                            self.save_model(args.model + ".pkl.gz")

                    dropout_p = np.float64(args.dropout).astype(
                        theano.config.floatX)
                    self.dropout.set_value(dropout_p)

                    say("\r\n\n")
                    say( ( "Epoch {}\tcost={:.3f}\tloss={:.3f} {:.3f}\t" \
                        +"\tMRR={:.2f},{:.2f}\tPPL={:.1f}\t|g|={:.3f}\t[{:.3f}m]\n" ).format(
                            epoch,
                            train_cost / (i+1),
                            train_loss / (i+1),
                            train_loss2 / (i+1),
                            dev_MRR,
                            best_dev,
                            heldout_PPL,
                            float(grad_norm),
                            (time.time()-start_time)/60.0
                    ))
                    say("\tp_norm: {}\n".format(self.get_pnorm_stat()))

                    say("\n")
                    say("{}".format(result_table))
                    say("\n")
Beispiel #13
0
    def ready(self):
        args = self.args
        weights = self.weights

        # len(title) * batch
        idts = self.idts = T.imatrix()

        # len(body) * batch
        idbs = self.idbs = T.imatrix()

        # num pairs * 3, or num queries * candidate size
        idps = self.idps = T.imatrix()

        dropout = self.dropout = theano.shared(np.float64(args.dropout).astype(
                            theano.config.floatX))
        dropout_op = self.dropout_op = Dropout(self.dropout)

        embedding_layer = self.embedding_layer

        activation = get_activation_by_name(args.activation)
        n_d = self.n_d = args.hidden_dim
        n_e = self.n_e = embedding_layer.n_d

        if args.layer.lower() == "rcnn":
            LayerType = RCNN
        elif args.layer.lower() == "lstm":
            LayerType = LSTM
        elif args.layer.lower() == "gru":
            LayerType = GRU

        depth = self.depth = args.depth
        layers = self.layers = [ ]
        for i in range(depth):
            if LayerType != RCNN:
                feature_layer = LayerType(
                        n_in = n_e if i == 0 else n_d,
                        n_out = n_d,
                        activation = activation
                    )
            else:
                feature_layer = LayerType(
                        n_in = n_e if i == 0 else n_d,
                        n_out = n_d,
                        activation = activation,
                        order = args.order,
                        mode = args.mode,
                        has_outgate = args.outgate
                    )
            layers.append(feature_layer)

        # feature computation starts here

        # (len*batch)*n_e
        xt = embedding_layer.forward(idts.ravel())
        if weights is not None:
            xt_w = weights[idts.ravel()].dimshuffle((0,'x'))
            xt = xt * xt_w

        # len*batch*n_e
        xt = xt.reshape((idts.shape[0], idts.shape[1], n_e))
        xt = apply_dropout(xt, dropout)

        # (len*batch)*n_e
        xb = embedding_layer.forward(idbs.ravel())
        if weights is not None:
            xb_w = weights[idbs.ravel()].dimshuffle((0,'x'))
            xb = xb * xb_w

        # len*batch*n_e
        xb = xb.reshape((idbs.shape[0], idbs.shape[1], n_e))
        xb = apply_dropout(xb, dropout)

        prev_ht = self.xt = xt
        prev_hb = self.xb = xb
        for i in range(depth):
            # len*batch*n_d
            ht = layers[i].forward_all(prev_ht)
            hb = layers[i].forward_all(prev_hb)
            prev_ht = ht
            prev_hb = hb

        # normalize vectors
        if args.normalize:
            ht = self.normalize_3d(ht)
            hb = self.normalize_3d(hb)
            say("h_title dtype: {}\n".format(ht.dtype))

        self.ht = ht
        self.hb = hb

        # average over length, ignore paddings
        # batch * d
        if args.average:
            ht = self.average_without_padding(ht, idts)
            hb = self.average_without_padding(hb, idbs)
        else:
            ht = ht[-1]
            hb = hb[-1]
        say("h_avg_title dtype: {}\n".format(ht.dtype))

        # batch * d
        h_final = (ht+hb)*0.5
        h_final = apply_dropout(h_final, dropout)
        h_final = self.normalize_2d(h_final)
        self.h_final = h_final
        say("h_final dtype: {}\n".format(ht.dtype))

        # For testing:
        #   first one in batch is query, the rest are candidate questions
        self.scores = T.dot(h_final[1:], h_final[0])

        # For training:
        xp = h_final[idps.ravel()]
        xp = xp.reshape((idps.shape[0], idps.shape[1], n_d))
        # num query * n_d
        query_vecs = xp[:,0,:]
        # num query
        pos_scores = T.sum(query_vecs*xp[:,1,:], axis=1)
        # num query * candidate size
        neg_scores = T.sum(query_vecs.dimshuffle((0,'x',1))*xp[:,2:,:], axis=2)
        # num query
        neg_scores = T.max(neg_scores, axis=1)
        diff = neg_scores - pos_scores + 1.0
        loss = T.mean( (diff>0)*diff )
        self.loss = loss

        params = [ ]
        for l in self.layers:
            params += l.params
        self.params = params
        say("num of parameters: {}\n".format(
            sum(len(x.get_value(borrow=True).ravel()) for x in params)
        ))

        l2_reg = None
        for p in params:
            if l2_reg is None:
                l2_reg = p.norm(2)
            else:
                l2_reg = l2_reg + p.norm(2)
        l2_reg = l2_reg * args.l2_reg
        self.cost = self.loss + l2_reg
Beispiel #14
0
    def train(self, ids_corpus, train, dev=None, test=None):
        dropout_prob = np.float64(args.dropout).astype(theano.config.floatX)
        batch_size = args.batch_size
        padding_id = self.padding_id

        #train_batches = myio.create_batches(ids_corpus, train, batch_size, padding_id)

        updates, lr, gnorm = create_optimization_updates(
                cost = self.cost,
                params = self.params,
                lr = args.learning_rate,
                method = args.learning
            )[:3]

        train_func = theano.function(
                inputs = [ self.idts, self.idbs, self.idps ],
                outputs = [ self.cost, self.loss, gnorm ],
                updates = updates
            )

        eval_func = theano.function(
                inputs = [ self.idts, self.idbs ],
                outputs = self.scores,
                on_unused_input='ignore'
            )

        say("\tp_norm: {}\n".format(
                self.get_pnorm_stat()
            ))

        result_table = PrettyTable(["Epoch", "dev MAP", "dev MRR", "dev P@1", "dev P@5"] +
                                    ["tst MAP", "tst MRR", "tst P@1", "tst P@5"])

        unchanged = 0
        best_dev = -1
        dev_MAP = dev_MRR = dev_P1 = dev_P5 = 0
        test_MAP = test_MRR = test_P1 = test_P5 = 0
        start_time = 0
        max_epoch = args.max_epoch
        for epoch in xrange(max_epoch):
            unchanged += 1
            if unchanged > 15: break

            start_time = time.time()

            train = myio.read_annotations(args.train)
            train_batches = myio.create_batches(ids_corpus, train, batch_size,
                                    padding_id, pad_left = not args.average)
            N =len(train_batches)

            train_loss = 0.0
            train_cost = 0.0

            for i in xrange(N):
                # get current batch
                idts, idbs, idps = train_batches[i]

                cur_cost, cur_loss, grad_norm = train_func(idts, idbs, idps)
                train_loss += cur_loss
                train_cost += cur_cost

                if i % 10 == 0:
                    say("\r{}/{}".format(i,N))

                if i == N-1:
                    self.dropout.set_value(0.0)

                    if dev is not None:
                        dev_MAP, dev_MRR, dev_P1, dev_P5 = self.evaluate(dev, eval_func)
                    if test is not None:
                        test_MAP, test_MRR, test_P1, test_P5 = self.evaluate(test, eval_func)

                    if dev_MRR > best_dev:
                        unchanged = 0
                        best_dev = dev_MRR
                        result_table.add_row(
                            [ epoch ] +
                            [ "%.2f" % x for x in [ dev_MAP, dev_MRR, dev_P1, dev_P5 ] +
                                        [ test_MAP, test_MRR, test_P1, test_P5 ] ]
                        )
                        if args.save_model:
                            self.save_model(args.save_model)

                    dropout_p = np.float64(args.dropout).astype(
                                theano.config.floatX)
                    self.dropout.set_value(dropout_p)

                    say("\r\n\n")
                    say( ( "Epoch {}\tcost={:.3f}\tloss={:.3f}" \
                        +"\tMRR={:.2f},{:.2f}\t|g|={:.3f}\t[{:.3f}m]\n" ).format(
                            epoch,
                            train_cost / (i+1),
                            train_loss / (i+1),
                            dev_MRR,
                            best_dev,
                            float(grad_norm),
                            (time.time()-start_time)/60.0
                    ))
                    say("\tp_norm: {}\n".format(
                            self.get_pnorm_stat()
                        ))

                    say("\n")
                    say("{}".format(result_table))
                    say("\n")
Beispiel #15
0
    def train(self, ids_corpus, train, dev=None, test=None):
        dropout_prob = np.float64(args.dropout).astype(theano.config.floatX)
        batch_size = args.batch_size
        padding_id = self.padding_id

        #train_batches = myio.create_batches(ids_corpus, train, batch_size, padding_id)

        if dev is not None:
            dev, dev_raw = dev
        if test is not None:
            test, test_raw = test

        if args.joint:
            updates_e, lr_e, gnorm_e = create_optimization_updates(
                    cost = self.encoder.cost_e, #self.encoder.cost,
                    params = self.encoder.params,
                    lr = args.learning_rate*0.1,
                    method = args.learning
                )[:3]
        else:
            updates_e = {}

        updates_g, lr_g, gnorm_g = create_optimization_updates(
                cost = self.encoder.cost_g,
                params = self.generator.params,
                lr = args.learning_rate,
                method = args.learning
            )[:3]

        train_func = theano.function(
                inputs = [ self.x, self.triples, self.pairs ],
                outputs = [ self.encoder.obj, self.encoder.loss, \
                        self.encoder.sparsity_cost, self.generator.p1, gnorm_g ],
                updates = updates_g.items() + updates_e.items() + self.generator.sample_updates,
                #no_default_updates = True,
                on_unused_input= "ignore"
            )

        eval_func = theano.function(
                inputs = [ self.x ],
                outputs = self.encoder.scores
            )

        eval_func2 = theano.function(
                inputs = [ self.x ],
                outputs = [ self.encoder.scores_z, self.generator.p1, self.z ],
                updates = self.generator.sample_updates,
                #no_default_updates = True
            )


        say("\tp_norm: {}\n".format(
                self.get_pnorm_stat(self.encoder.params)
            ))
        say("\tp_norm: {}\n".format(
                self.get_pnorm_stat(self.generator.params)
            ))

        result_table = PrettyTable(["Epoch", "dev MAP", "dev MRR", "dev P@1", "dev P@5"] +
                                    ["tst MAP", "tst MRR", "tst P@1", "tst P@5"])
        last_train_avg_cost = None
        tolerance = 0.5 + 1e-3
        unchanged = 0
        best_dev = -1
        dev_MAP = dev_MRR = dev_P1 = dev_P5 = 0
        test_MAP = test_MRR = test_P1 = test_P5 = 0
        start_time = 0
        max_epoch = args.max_epoch
        for epoch in xrange(max_epoch):
            unchanged += 1
            if unchanged > 20: break

            start_time = time.time()

            train = myio.read_annotations(args.train)
            train_batches = myio.create_batches(ids_corpus, train, batch_size,
                                    padding_id, pad_left=not args.average, merge=args.merge)
            N =len(train_batches)

            more = True
            param_bak = [ p.get_value(borrow=False) for p in self.params ]

            while more:

                train_loss = 0.0
                train_cost = 0.0
                train_scost = 0.0
                train_p1 = 0.0

                for i in xrange(N):
                    # get current batch
                    idts, triples, pairs = train_batches[i]

                    cur_cost, cur_loss, cur_scost, cur_p1, gnormg = train_func(idts,
                                                                                triples, pairs)
                    train_loss += cur_loss
                    train_cost += cur_cost
                    train_scost += cur_scost
                    train_p1 += cur_p1

                    if i % 10 == 0:
                        say("\r{}/{} {:.3f}".format(i,N,train_p1/(i+1)))

                cur_train_avg_cost = train_cost / N
                more = False
                if last_train_avg_cost is not None:
                    if cur_train_avg_cost > last_train_avg_cost*(1+tolerance):
                        more = True
                        say("\nTrain cost {} --> {}\n".format(
                                last_train_avg_cost, cur_train_avg_cost
                            ))

                if more:
                    lr_val = lr_g.get_value()*0.5
                    if lr_val < 1e-5: return
                    lr_val = np.float64(lr_val).astype(theano.config.floatX)
                    lr_g.set_value(lr_val)
                    lr_e.set_value(lr_val)
                    say("Decrease learning rate to {}\n".format(float(lr_val)))
                    for p, v in zip(self.params, param_bak):
                        p.set_value(v)
                    continue

                last_train_avg_cost = cur_train_avg_cost

                say("\r\n\n")
                say( ( "Epoch {}  cost={:.3f}  loss={:.3f}  scost={:.3f}" \
                    +"  P[1]={:.3f}  |g|={:.3f}\t[{:.3f}m]\n" ).format(
                        epoch,
                        train_cost / N,
                        train_loss / N,
                        train_scost / N,
                        train_p1 / N,
                        float(gnormg),
                        (time.time()-start_time)/60.0
                ))
                say("\tp_norm: {}\n".format(
                        self.get_pnorm_stat(self.encoder.params)
                    ))
                say("\tp_norm: {}\n".format(
                        self.get_pnorm_stat(self.generator.params)
                    ))

                self.dropout.set_value(0.0)

                if dev is not None:
                    full_MAP, full_MRR, full_P1, full_P5 = self.evaluate(dev, eval_func)
                    dev_MAP, dev_MRR, dev_P1, dev_P5, dev_PZ1, dev_PT = self.evaluate_z(dev,
                            dev_raw, ids_corpus, eval_func2)

                if test is not None:
                    test_MAP, test_MRR, test_P1, test_P5, test_PZ1, test_PT = \
                            self.evaluate_z(test, test_raw, ids_corpus, eval_func2)

                if dev_MAP > best_dev:
                    best_dev = dev_MAP
                    unchanged = 0

                say("\n")
                say("  fMAP={:.2f} fMRR={:.2f} fP1={:.2f} fP5={:.2f}\n".format(
                        full_MAP, full_MRR,
                        full_P1, full_P5
                    ))

                say("\n")
                say(("  dMAP={:.2f} dMRR={:.2f} dP1={:.2f} dP5={:.2f}" +
                     " dP[1]={:.3f} d%T={:.3f} best_dev={:.2f}\n").format(
                        dev_MAP, dev_MRR,
                        dev_P1, dev_P5,
                        dev_PZ1, dev_PT, best_dev
                    ))

                result_table.add_row(
                        [ epoch ] +
                        [ "%.2f" % x for x in [ dev_MAP, dev_MRR, dev_P1, dev_P5 ] +
                                    [ test_MAP, test_MRR, test_P1, test_P5 ] ]
                    )

                if unchanged == 0:
                    say("\n")
                    say(("  tMAP={:.2f} tMRR={:.2f} tP1={:.2f} tP5={:.2f}" +
                        " tP[1]={:.3f} t%T={:.3f}\n").format(
                        test_MAP, test_MRR,
                        test_P1, test_P5,
                        test_PZ1, test_PT
                    ))
                    if args.dump_rationale:
                        self.evaluate_z(dev+test, dev_raw+test_raw, ids_corpus,
                                eval_func2, args.dump_rationale)

                    #if args.save_model:
                    #    self.save_model(args.save_model)

                dropout_p = np.float64(args.dropout).astype(
                            theano.config.floatX)
                self.dropout.set_value(dropout_p)

                say("\n")
                say("{}".format(result_table))
                say("\n")

            if train_p1/N <= 1e-4 or train_p1/N+1e-4 >= 1.0:
                break
Beispiel #16
0
    def ready(self):
        embedding_layer = self.embedding_layer
        args = self.args
        padding_id = self.padding_id
        weights = self.weights

        dropout = self.dropout = theano.shared(
                np.float64(args.dropout).astype(theano.config.floatX)
            )

        # len*batch
        x = self.x = T.imatrix()

        n_d = args.hidden_dim2
        n_e = embedding_layer.n_d
        activation = get_activation_by_name(args.activation)

        layers = self.layers = [ ]
        layer_type = args.layer.lower()
        for i in xrange(2):
            if layer_type == "rcnn":
                l = RCNN(
                        n_in = n_e,# if i == 0 else n_d,
                        n_out = n_d,
                        activation = activation,
                        order = args.order
                    )
            elif layer_type == "lstm":
                l = LSTM(
                        n_in = n_e,# if i == 0 else n_d,
                        n_out = n_d,
                        activation = activation
                    )
            layers.append(l)

        # len * batch
        masks = T.cast(T.neq(x, padding_id), "float32")

        #masks = masks.dimshuffle((0,1,"x"))

        # (len*batch)*n_e
        embs = embedding_layer.forward(x.ravel())
        if weights is not None:
            embs_w = weights[x.ravel()].dimshuffle((0,'x'))
            embs = embs * embs_w

        # len*batch*n_e
        embs = embs.reshape((x.shape[0], x.shape[1], n_e))
        embs = apply_dropout(embs, dropout)
        self.word_embs = embs
        flipped_embs = embs[::-1]

        # len*bacth*n_d
        h1 = layers[0].forward_all(embs)
        h2 = layers[1].forward_all(flipped_embs)
        h_final = T.concatenate([h1, h2[::-1]], axis=2)
        h_final = apply_dropout(h_final, dropout)
        size = n_d * 2

        output_layer = self.output_layer = ZLayer(
                n_in = size,
                n_hidden = n_d,
                activation = activation
            )

        # sample z given text (i.e. x)
        z_pred, sample_updates = output_layer.sample_all(h_final)

        # we are computing approximated gradient by sampling z;
        # so should mark sampled z not part of the gradient propagation path
        #
        z_pred = self.z_pred = theano.gradient.disconnected_grad(z_pred)
        self.sample_updates = sample_updates
        print "z_pred", z_pred.ndim

        self.p1 = T.sum(masks*z_pred) / (T.sum(masks) + 1e-8)

        # len*batch*1
        probs = output_layer.forward_all(h_final, z_pred)
        print "probs", probs.ndim

        logpz = - T.nnet.binary_crossentropy(probs, z_pred) * masks
        logpz = self.logpz = logpz.reshape(x.shape)
        probs = self.probs = probs.reshape(x.shape)

        # batch
        z = z_pred
        self.zsum = T.sum(z, axis=0, dtype=theano.config.floatX)
        self.zdiff = T.sum(T.abs_(z[1:]-z[:-1]), axis=0, dtype=theano.config.floatX)

        params = self.params = [ ]
        for l in layers + [ output_layer ]:
            for p in l.params:
                params.append(p)
        nparams = sum(len(x.get_value(borrow=True).ravel()) \
                                        for x in params)
        say("total # parameters: {}\n".format(nparams))

        l2_cost = None
        for p in params:
            if l2_cost is None:
                l2_cost = T.sum(p**2)
            else:
                l2_cost = l2_cost + T.sum(p**2)
        l2_cost = l2_cost * args.l2_reg
        self.l2_cost = l2_cost
Beispiel #17
0
    def ready(self):
        args = self.args
        weights = self.weights

        # len(source) * batch
        idxs = self.idxs = T.imatrix()

        # len(target) * batch
        idys = self.idys = T.imatrix()
        idts = idys[:-1]
        idgs = idys[1:]

        dropout = self.dropout = theano.shared(np.float64(args.dropout).astype(
                            theano.config.floatX))

        embedding_layer = self.embedding_layer

        activation = get_activation_by_name(args.activation)
        n_d = self.n_d = args.hidden_dim
        n_e = self.n_e = embedding_layer.n_d
        n_V = self.n_V = embedding_layer.n_V

        if args.layer.lower() == "rcnn":
            LayerType = RCNN
        elif args.layer.lower() == "lstm":
            LayerType = LSTM
        elif args.layer.lower() == "gru":
            LayerType = GRU

        depth = self.depth = args.depth
        layers = self.layers = [ ]
        for i in range(depth*2):
            if LayerType != RCNN:
                feature_layer = LayerType(
                        n_in = n_e if i/2 == 0 else n_d,
                        n_out = n_d,
                        activation = activation
                    )
            else:
                feature_layer = LayerType(
                        n_in = n_e if i/2 == 0 else n_d,
                        n_out = n_d,
                        activation = activation,
                        order = args.order,
                        mode = args.mode,
                        has_outgate = args.outgate
                    )
            layers.append(feature_layer)

        self.output_layer = output_layer = Layer(
                n_in = n_d,
                n_out = n_V,
                activation = T.nnet.softmax,
            )

        # feature computation starts here

        # (len*batch)*n_e
        xs_flat = embedding_layer.forward(idxs.ravel())
        xs_flat = apply_dropout(xs_flat, dropout)
        if weights is not None:
            xs_w = weights[idxs.ravel()].dimshuffle((0,'x'))
            xs_flat = xs_flat * xs_w
        # len*batch*n_e
        xs = xs_flat.reshape((idxs.shape[0], idxs.shape[1], n_e))

        # (len*batch)*n_e
        xt_flat = embedding_layer.forward(idts.ravel())
        xt_flat = apply_dropout(xt_flat, dropout)
        if weights is not None:
            xt_w = weights[idts.ravel()].dimshuffle((0,'x'))
            xt_flat = xt_flat * xt_w
        # len*batch*n_e
        xt = xt_flat.reshape((idts.shape[0], idts.shape[1], n_e))

        prev_hs = xs
        prev_ht = xt
        for i in range(depth):
            # len*batch*n_d
            hs = layers[i*2].forward_all(prev_hs, return_c=True)
            ht = layers[i*2+1].forward_all(prev_ht, hs[-1])
            hs = hs[:,:,-n_d:]
            ht = ht[:,:,-n_d:]
            prev_hs = hs
            prev_ht = ht
            prev_hs = apply_dropout(hs, dropout)
            prev_ht = apply_dropout(ht, dropout)

        self.p_y_given_x = output_layer.forward(prev_ht.reshape(
                                (xt_flat.shape[0], n_d)
                            ))

        h_final = hs[-1]
        self.scores2 = -(h_final[1:]-h_final[0]).norm(2,axis=1)
        h_final = self.normalize_2d(h_final)
        self.scores = T.dot(h_final[1:], h_final[0])

        # (len*batch)
        nll = T.nnet.categorical_crossentropy(
                        self.p_y_given_x,
                        idgs.ravel()
                    )
        nll = nll.reshape(idgs.shape)
        self.nll = nll
        self.mask = mask = T.cast(T.neq(idgs, self.padding_id), theano.config.floatX)
        nll = T.sum(nll*mask, axis=0)

        #layers.append(embedding_layer)
        layers.append(output_layer)
        params = [ ]
        for l in self.layers:
            params += l.params
        self.params = params
        say("num of parameters: {}\n".format(
            sum(len(x.get_value(borrow=True).ravel()) for x in params)
        ))

        l2_reg = None
        for p in params:
            if l2_reg is None:
                l2_reg = p.norm(2)
            else:
                l2_reg = l2_reg + p.norm(2)
        l2_reg = l2_reg * args.l2_reg
        self.loss = T.mean(nll)
        self.cost = self.loss + l2_reg
Beispiel #18
0
    def train_model(self, ids_corpus, train, dev=None, test=None):
        with tf.Session() as sess:

            result_table = PrettyTable([
                "Epoch", "Step", "dev MAP", "dev MRR", "dev P@1", "dev P@5",
                "tst MAP", "tst MRR", "tst P@1", "tst P@5"
            ])
            dev_MAP = dev_MRR = dev_P1 = dev_P5 = 0
            test_MAP = test_MRR = test_P1 = test_P5 = 0
            best_dev = -1

            # Define Training procedure
            global_step = tf.Variable(0, name="global_step", trainable=False)
            optimizer = tf.train.AdamOptimizer(self.args.learning_rate)
            train_op = optimizer.minimize(self.cost, global_step=global_step)

            print '\n\ntrainable params: ', tf.trainable_variables(), '\n\n'

            sess.run(tf.global_variables_initializer())
            emb = sess.run(self.embeddings)
            print '\nemb {}\n'.format(emb[10][0:10])

            if self.init_assign_ops != {}:
                print 'assigning trained values ...\n'
                sess.run(self.init_assign_ops)
                emb = sess.run(self.embeddings)
                print '\nemb {}\n'.format(emb[10][0:10])
                self.init_assign_ops = {}

            if self.args.save_dir != "":
                print("Writing to {}\n".format(self.args.save_dir))

            # TRAIN LOSS
            train_loss_writer = tf.summary.FileWriter(
                os.path.join(self.args.save_dir, "summaries", "train",
                             "loss"), )
            train_cost_writer = tf.summary.FileWriter(
                os.path.join(self.args.save_dir, "summaries", "train", "cost"),
                sess.graph)

            # VARIABLE NORM
            p_norm_summaries = {}
            p_norm_placeholders = {}
            for param_name, param_norm in self.get_pnorm_stat(
                    sess).iteritems():
                p_norm_placeholders[param_name] = tf.placeholder(tf.float32)
                p_norm_summaries[param_name] = tf.summary.scalar(
                    param_name, p_norm_placeholders[param_name])
            p_norm_summary_op = tf.summary.merge(p_norm_summaries.values())
            p_norm_summary_dir = os.path.join(self.args.save_dir, "summaries",
                                              "p_norm")
            p_norm_summary_writer = tf.summary.FileWriter(p_norm_summary_dir, )

            # DEV LOSS & EVAL
            dev_loss0_writer = tf.summary.FileWriter(
                os.path.join(self.args.save_dir, "summaries", "dev",
                             "loss0"), )
            dev_loss1_writer = tf.summary.FileWriter(
                os.path.join(self.args.save_dir, "summaries", "dev",
                             "loss1"), )
            dev_loss2_writer = tf.summary.FileWriter(
                os.path.join(self.args.save_dir, "summaries", "dev",
                             "loss2"), )
            dev_eval_writer1 = tf.summary.FileWriter(
                os.path.join(self.args.save_dir, "summaries", "dev", "MAP"), )
            dev_eval_writer2 = tf.summary.FileWriter(
                os.path.join(self.args.save_dir, "summaries", "dev", "MRR"), )
            dev_eval_writer3 = tf.summary.FileWriter(
                os.path.join(self.args.save_dir, "summaries", "dev", "Pat1"), )
            dev_eval_writer4 = tf.summary.FileWriter(
                os.path.join(self.args.save_dir, "summaries", "dev", "Pat5"), )

            loss = tf.placeholder(tf.float32)
            loss_summary = tf.summary.scalar("loss", loss)
            dev_eval = tf.placeholder(tf.float32)
            dev_summary = tf.summary.scalar("QR_evaluation", dev_eval)
            cost = tf.placeholder(tf.float32)
            cost_summary = tf.summary.scalar("cost", cost)
            # train_eval = tf.placeholder(tf.float32)
            # train_summary = tf.summary.scalar("QR_train", train_eval)

            if self.args.save_dir != "":
                checkpoint_dir = os.path.join(self.args.save_dir,
                                              "checkpoints")
                checkpoint_prefix = os.path.join(checkpoint_dir, "model")
                if not os.path.exists(checkpoint_dir):
                    os.makedirs(checkpoint_dir)

            patience = 8 if 'patience' not in self.args else self.args.patience
            unchanged = 0
            max_epoch = self.args.max_epoch
            for epoch in xrange(max_epoch):
                unchanged += 1
                if unchanged > patience:
                    break

                train_batches = myio.create_batches(ids_corpus,
                                                    train,
                                                    self.args.batch_size,
                                                    self.padding_id,
                                                    pad_left=False)

                N = len(train_batches)

                train_loss = 0.0
                train_cost = 0.0

                for i in xrange(N):
                    idts, idbs, idps, qpp = train_batches[i]
                    cur_step, cur_loss, cur_cost = self.train_batch(
                        idts, idbs, idps, qpp, train_op, global_step, sess)
                    summary = sess.run(loss_summary, {loss: cur_loss})
                    train_loss_writer.add_summary(summary, cur_step)
                    train_loss_writer.flush()
                    summary = sess.run(cost_summary, {cost: cur_cost})
                    train_cost_writer.add_summary(summary, cur_step)
                    train_cost_writer.flush()

                    train_loss += cur_loss
                    train_cost += cur_cost

                    if i % 10 == 0:
                        say("\r{}/{}".format(i, N))

                    if i == N - 1 or (i % 10 == 0 and 'testing' in self.args
                                      and self.args.testing):  # EVAL
                        if dev:
                            dev_MAP, dev_MRR, dev_P1, dev_P5, dloss0, dloss1, dloss2 = self.evaluate(
                                dev, sess)

                            summary = sess.run(loss_summary, {loss: dloss0})
                            dev_loss0_writer.add_summary(summary, cur_step)
                            dev_loss0_writer.flush()
                            summary = sess.run(loss_summary, {loss: dloss1})
                            dev_loss1_writer.add_summary(summary, cur_step)
                            dev_loss1_writer.flush()
                            summary = sess.run(loss_summary, {loss: dloss2})
                            dev_loss2_writer.add_summary(summary, cur_step)
                            dev_loss2_writer.flush()

                            summary = sess.run(dev_summary,
                                               {dev_eval: dev_MAP})
                            dev_eval_writer1.add_summary(summary, cur_step)
                            dev_eval_writer1.flush()
                            summary = sess.run(dev_summary,
                                               {dev_eval: dev_MRR})
                            dev_eval_writer2.add_summary(summary, cur_step)
                            dev_eval_writer2.flush()
                            summary = sess.run(dev_summary, {dev_eval: dev_P1})
                            dev_eval_writer3.add_summary(summary, cur_step)
                            dev_eval_writer3.flush()
                            summary = sess.run(dev_summary, {dev_eval: dev_P5})
                            dev_eval_writer4.add_summary(summary, cur_step)
                            dev_eval_writer4.flush()

                            feed_dict = {}
                            for param_name, param_norm in self.get_pnorm_stat(
                                    sess).iteritems():
                                feed_dict[p_norm_placeholders[
                                    param_name]] = param_norm
                            _p_norm_sum = sess.run(p_norm_summary_op,
                                                   feed_dict)
                            p_norm_summary_writer.add_summary(
                                _p_norm_sum, cur_step)

                        if test:
                            test_MAP, test_MRR, test_P1, test_P5, tloss0, tloss1, tloss2 = self.evaluate(
                                test, sess)

                        if self.args.performance == "MRR" and dev_MRR > best_dev:
                            unchanged = 0
                            best_dev = dev_MRR
                            result_table.add_row([
                                epoch, cur_step, dev_MAP, dev_MRR, dev_P1,
                                dev_P5, test_MAP, test_MRR, test_P1, test_P5
                            ])
                            if self.args.save_dir != "":
                                self.save(sess, checkpoint_prefix, cur_step)
                        elif self.args.performance == "MAP" and dev_MAP > best_dev:
                            unchanged = 0
                            best_dev = dev_MAP
                            result_table.add_row([
                                epoch, cur_step, dev_MAP, dev_MRR, dev_P1,
                                dev_P5, test_MAP, test_MRR, test_P1, test_P5
                            ])
                            if self.args.save_dir != "":
                                self.save(sess, checkpoint_prefix, cur_step)

                        say("\r\n\nEpoch {}\tcost={:.3f}\tloss={:.3f}\tMRR={:.2f},MAP={:.2f}\n"
                            .format(
                                epoch,
                                train_cost /
                                (i + 1),  # i.e. divided by N training batches
                                train_loss /
                                (i + 1),  # i.e. divided by N training batches
                                dev_MRR,
                                dev_MAP))
                        say("\n{}\n".format(result_table))
                        myio.say("\tp_norm: {}\n".format(
                            self.get_pnorm_stat(sess)))
Beispiel #19
0
def main(args):
    raw_corpus = myio.read_corpus(args.corpus)
    embedding_layer = myio.create_embedding_layer(
        raw_corpus,
        n_d=args.hidden_dim,
        cut_off=args.cut_off,
        embs=load_embedding_iterator(args.embeddings)
        if args.embeddings else None)
    ids_corpus = myio.map_corpus(raw_corpus, embedding_layer)
    say("vocab size={}, corpus size={}\n".format(embedding_layer.n_V,
                                                 len(raw_corpus)))
    padding_id = embedding_layer.vocab_map["<padding>"]
    bos_id = embedding_layer.vocab_map["<s>"]
    eos_id = embedding_layer.vocab_map["</s>"]

    if args.reweight:
        weights = myio.create_idf_weights(args.corpus, embedding_layer)

    if args.dev:
        dev = myio.read_annotations(args.dev, K_neg=20, prune_pos_cnt=-1)
        dev = myio.create_eval_batches(ids_corpus, dev, padding_id)
    if args.test:
        test = myio.read_annotations(args.test, K_neg=20, prune_pos_cnt=-1)
        test = myio.create_eval_batches(ids_corpus, test, padding_id)

    if args.heldout:
        with open(args.heldout) as fin:
            heldout_ids = fin.read().split()
        heldout_corpus = dict(
            (id, ids_corpus[id]) for id in heldout_ids if id in ids_corpus)
        train_corpus = dict((id, ids_corpus[id]) for id in ids_corpus
                            if id not in heldout_corpus)
        heldout = myio.create_batches(heldout_corpus, [],
                                      args.batch_size,
                                      padding_id,
                                      bos_id,
                                      eos_id,
                                      auto_encode=True)
        heldout = [
            myio.create_one_batch(b1, t2, padding_id) for t1, b1, t2 in heldout
        ]
        say("heldout examples={}\n".format(len(heldout_corpus)))

    if args.train:
        model = Model(args,
                      embedding_layer,
                      weights=weights if args.reweight else None)

        start_time = time.time()
        train = myio.read_annotations(args.train)
        if not args.use_anno: train = []
        train_batches = myio.create_batches(ids_corpus,
                                            train,
                                            args.batch_size,
                                            model.padding_id,
                                            model.bos_id,
                                            model.eos_id,
                                            auto_encode=True)
        say("{} to create batches\n".format(time.time() - start_time))
        model.ready()

        model.train(ids_corpus if not args.heldout else train_corpus, train,
                    dev if args.dev else None, test if args.test else None,
                    heldout if args.heldout else None)
Beispiel #20
0
    def ready(self):
        generator = self.generator
        args = self.args
        weights = self.weights

        dropout = generator.dropout

        # len(text) * batch
        idts = generator.x
        z = generator.z_pred
        z = z.dimshuffle((0, 1, "x"))

        # batch * 2
        pairs = self.pairs = T.imatrix()

        # num pairs * 3, or num queries * candidate size
        triples = self.triples = T.imatrix()

        embedding_layer = self.embedding_layer

        activation = get_activation_by_name(args.activation)
        n_d = self.n_d = args.hidden_dim
        n_e = self.n_e = embedding_layer.n_d

        if args.layer.lower() == "rcnn":
            LayerType = RCNN
            LayerType2 = ExtRCNN
        elif args.layer.lower() == "lstm":
            LayerType = LSTM
            LayerType2 = ExtLSTM
        #elif args.layer.lower() == "gru":
        #    LayerType = GRU

        depth = self.depth = args.depth
        layers = self.layers = []
        for i in range(depth):
            if LayerType != RCNN:
                feature_layer = LayerType(n_in=n_e if i == 0 else n_d,
                                          n_out=n_d,
                                          activation=activation)
            else:
                feature_layer = LayerType(n_in=n_e if i == 0 else n_d,
                                          n_out=n_d,
                                          activation=activation,
                                          order=args.order,
                                          mode=args.mode,
                                          has_outgate=args.outgate)
            layers.append(feature_layer)

        extlayers = self.extlayers = []
        for i in range(depth):
            if LayerType != RCNN:
                feature_layer = LayerType2(n_in=n_e if i == 0 else n_d,
                                           n_out=n_d,
                                           activation=activation)
            else:
                feature_layer = LayerType2(n_in=n_e if i == 0 else n_d,
                                           n_out=n_d,
                                           activation=activation,
                                           order=args.order,
                                           mode=args.mode,
                                           has_outgate=args.outgate)
            feature_layer.copy_params(layers[i])
            extlayers.append(feature_layer)

        # feature computation starts here

        xt = generator.word_embs

        # encode full text into representation
        prev_ht = self.xt = xt
        for i in range(depth):
            # len*batch*n_d
            ht = layers[i].forward_all(prev_ht)
            prev_ht = ht

        # encode selected text into representation
        prev_htz = self.xt = xt
        for i in range(depth):
            # len*batch*n_d
            htz = extlayers[i].forward_all(prev_htz, z)
            prev_htz = htz

        # normalize vectors
        if args.normalize:
            ht = self.normalize_3d(ht)
            htz = self.normalize_3d(htz)
            say("h_title dtype: {}\n".format(ht.dtype))

        self.ht = ht
        self.htz = htz

        # average over length, ignore paddings
        # batch * d
        if args.average:
            ht = self.average_without_padding(ht, idts)
            htz = self.average_without_padding(htz, idts, z)
        else:
            ht = ht[-1]
            htz = htz[-1]
        say("h_avg_title dtype: {}\n".format(ht.dtype))

        # batch * d
        h_final = apply_dropout(ht, dropout)
        h_final = self.normalize_2d(h_final)
        hz_final = apply_dropout(htz, dropout)
        hz_final = self.normalize_2d(hz_final)
        self.h_final = h_final
        self.hz_final = hz_final

        say("h_final dtype: {}\n".format(ht.shape))

        # For testing:
        #   first one in batch is query, the rest are candidate questions
        self.scores = T.dot(h_final[1:], h_final[0])
        self.scores_z = T.dot(hz_final[1:], hz_final[0])

        # For training encoder:
        xp = h_final[triples.ravel()]
        xp = xp.reshape((triples.shape[0], triples.shape[1], n_d))
        # num query * n_d
        query_vecs = xp[:, 0, :]
        # num query
        pos_scores = T.sum(query_vecs * xp[:, 1, :], axis=1)
        # num query * candidate size
        neg_scores = T.sum(query_vecs.dimshuffle((0, 'x', 1)) * xp[:, 2:, :],
                           axis=2)
        # num query
        neg_scores = T.max(neg_scores, axis=1)
        diff = neg_scores - pos_scores + 1.0
        hinge_loss = T.mean((diff > 0) * diff)

        # For training generator

        # batch
        self_cosine_distance = 1.0 - T.sum(hz_final * h_final, axis=1)
        pair_cosine_distance = 1.0 - T.sum(hz_final * h_final[pairs[:, 1]],
                                           axis=1)
        alpha = args.alpha
        loss_vec = self_cosine_distance * alpha + pair_cosine_distance * (
            1 - alpha)
        #loss_vec = self_cosine_distance*0.2 + pair_cosine_distance*0.8

        zsum = generator.zsum
        zdiff = generator.zdiff
        logpz = generator.logpz

        sfactor = args.sparsity
        cfactor = args.sparsity * args.coherent
        scost_vec = zsum * sfactor + zdiff * cfactor

        # batch
        cost_vec = loss_vec + scost_vec
        cost_logpz = T.mean(cost_vec * T.sum(logpz, axis=0))
        loss = self.loss = T.mean(loss_vec)
        sparsity_cost = self.sparsity_cost = T.mean(scost_vec)
        self.obj = loss + sparsity_cost

        params = []
        for l in self.layers:
            params += l.params
        self.params = params
        say("num of parameters: {}\n".format(
            sum(len(x.get_value(borrow=True).ravel()) for x in params)))

        l2_reg = None
        for p in params:
            if l2_reg is None:
                l2_reg = T.sum(p**2)  #p.norm(2)
            else:
                l2_reg = l2_reg + T.sum(p**2)  #p.norm(2)
        l2_reg = l2_reg * args.l2_reg
        self.l2_cost = l2_reg

        beta = args.beta
        self.cost_g = cost_logpz + generator.l2_cost
        self.cost_e = hinge_loss + loss * beta + l2_reg
Beispiel #21
0
    def train(self, ids_corpus, train, dev=None, test=None):
        dropout_prob = np.float64(args.dropout).astype(theano.config.floatX)
        batch_size = args.batch_size
        padding_id = self.padding_id

        #train_batches = myio.create_batches(ids_corpus, train, batch_size, padding_id)

        if dev is not None:
            dev, dev_raw = dev
        if test is not None:
            test, test_raw = test

        if args.joint:
            updates_e, lr_e, gnorm_e = create_optimization_updates(
                cost=self.encoder.cost_e,  #self.encoder.cost,
                params=self.encoder.params,
                lr=args.learning_rate * 0.1,
                method=args.learning)[:3]
        else:
            updates_e = {}

        updates_g, lr_g, gnorm_g = create_optimization_updates(
            cost=self.encoder.cost_g,
            params=self.generator.params,
            lr=args.learning_rate,
            method=args.learning)[:3]

        train_func = theano.function(
                inputs = [ self.x, self.triples, self.pairs ],
                outputs = [ self.encoder.obj, self.encoder.loss, \
                        self.encoder.sparsity_cost, self.generator.p1, gnorm_g ],
                # updates = updates_g.items() + updates_e.items() + self.generator.sample_updates,
                updates = collections.OrderedDict(list(updates_g.items()) + list(updates_e.items()) + list(self.generator.sample_updates.items())),
                #no_default_updates = True,
                on_unused_input= "ignore"
            )

        eval_func = theano.function(inputs=[self.x],
                                    outputs=self.encoder.scores)

        eval_func2 = theano.function(
            inputs=[self.x],
            outputs=[self.encoder.scores_z, self.generator.p1, self.z],
            updates=self.generator.sample_updates,
            #no_default_updates = True
        )

        say("\tp_norm: {}\n".format(self.get_pnorm_stat(self.encoder.params)))
        say("\tp_norm: {}\n".format(self.get_pnorm_stat(
            self.generator.params)))

        result_table = PrettyTable(
            ["Epoch", "dev MAP", "dev MRR", "dev P@1", "dev P@5"] +
            ["tst MAP", "tst MRR", "tst P@1", "tst P@5"])
        last_train_avg_cost = None
        tolerance = 0.5 + 1e-3
        unchanged = 0
        best_dev = -1
        dev_MAP = dev_MRR = dev_P1 = dev_P5 = 0
        test_MAP = test_MRR = test_P1 = test_P5 = 0
        start_time = 0
        max_epoch = args.max_epoch
        for epoch in range(max_epoch):
            unchanged += 1
            if unchanged > 20: break

            start_time = time.time()

            train = myio.read_annotations(args.train)
            train_batches = myio.create_batches(ids_corpus,
                                                train,
                                                batch_size,
                                                padding_id,
                                                pad_left=not args.average,
                                                merge=args.merge)
            N = len(train_batches)

            more = True
            param_bak = [p.get_value(borrow=False) for p in self.params]

            while more:

                train_loss = 0.0
                train_cost = 0.0
                train_scost = 0.0
                train_p1 = 0.0

                for i in range(N):
                    # get current batch
                    idts, triples, pairs = train_batches[i]

                    cur_cost, cur_loss, cur_scost, cur_p1, gnormg = train_func(
                        idts, triples, pairs)
                    train_loss += cur_loss
                    train_cost += cur_cost
                    train_scost += cur_scost
                    train_p1 += cur_p1

                    if i % 10 == 0:
                        say("\r{}/{} {:.3f}".format(i, N, train_p1 / (i + 1)))

                cur_train_avg_cost = train_cost / N
                more = False
                if last_train_avg_cost is not None:
                    if cur_train_avg_cost > last_train_avg_cost * (1 +
                                                                   tolerance):
                        more = True
                        say("\nTrain cost {} --> {}\n".format(
                            last_train_avg_cost, cur_train_avg_cost))

                if more:
                    lr_val = lr_g.get_value() * 0.5
                    if lr_val < 1e-5: return
                    lr_val = np.float64(lr_val).astype(theano.config.floatX)
                    lr_g.set_value(lr_val)
                    lr_e.set_value(lr_val)
                    say("Decrease learning rate to {}\n".format(float(lr_val)))
                    for p, v in zip(self.params, param_bak):
                        p.set_value(v)
                    continue

                last_train_avg_cost = cur_train_avg_cost

                say("\r\n\n")
                say( ( "Epoch {}  cost={:.3f}  loss={:.3f}  scost={:.3f}" \
                    +"  P[1]={:.3f}  |g|={:.3f}\t[{:.3f}m]\n" ).format(
                        epoch,
                        train_cost / N,
                        train_loss / N,
                        train_scost / N,
                        train_p1 / N,
                        float(gnormg),
                        (time.time()-start_time)/60.0
                ))
                say("\tp_norm: {}\n".format(
                    self.get_pnorm_stat(self.encoder.params)))
                say("\tp_norm: {}\n".format(
                    self.get_pnorm_stat(self.generator.params)))

                self.dropout.set_value(0.0)

                if dev is not None:
                    full_MAP, full_MRR, full_P1, full_P5 = self.evaluate(
                        dev, eval_func)
                    dev_MAP, dev_MRR, dev_P1, dev_P5, dev_PZ1, dev_PT = self.evaluate_z(
                        dev, dev_raw, ids_corpus, eval_func2)

                if test is not None:
                    test_MAP, test_MRR, test_P1, test_P5, test_PZ1, test_PT = \
                            self.evaluate_z(test, test_raw, ids_corpus, eval_func2)

                if dev_MAP > best_dev:
                    best_dev = dev_MAP
                    unchanged = 0

                say("\n")
                say("  fMAP={:.2f} fMRR={:.2f} fP1={:.2f} fP5={:.2f}\n".format(
                    full_MAP, full_MRR, full_P1, full_P5))

                say("\n")
                say(("  dMAP={:.2f} dMRR={:.2f} dP1={:.2f} dP5={:.2f}" +
                     " dP[1]={:.3f} d%T={:.3f} best_dev={:.2f}\n").format(
                         dev_MAP, dev_MRR, dev_P1, dev_P5, dev_PZ1, dev_PT,
                         best_dev))

                result_table.add_row([epoch] + [
                    "%.2f" % x for x in [dev_MAP, dev_MRR, dev_P1, dev_P5] +
                    [test_MAP, test_MRR, test_P1, test_P5]
                ])

                if unchanged == 0:
                    say("\n")
                    say(("  tMAP={:.2f} tMRR={:.2f} tP1={:.2f} tP5={:.2f}" +
                         " tP[1]={:.3f} t%T={:.3f}\n").format(
                             test_MAP, test_MRR, test_P1, test_P5, test_PZ1,
                             test_PT))
                    if args.dump_rationale:
                        self.evaluate_z(dev + test, dev_raw + test_raw,
                                        ids_corpus, eval_func2,
                                        args.dump_rationale)

                    #if args.save_model:
                    #    self.save_model(args.save_model)

                dropout_p = np.float64(args.dropout).astype(
                    theano.config.floatX)
                self.dropout.set_value(dropout_p)

                say("\n")
                say("{}".format(result_table))
                say("\n")

            if train_p1 / N <= 1e-4 or train_p1 / N + 1e-4 >= 1.0:
                break
Beispiel #22
0
    def ready(self):
        embedding_layer = self.embedding_layer
        args = self.args
        padding_id = self.padding_id
        weights = self.weights

        dropout = self.dropout = theano.shared(
            np.float64(args.dropout).astype(theano.config.floatX))

        # len*batch
        x = self.x = T.imatrix()

        n_d = args.hidden_dim2
        n_e = embedding_layer.n_d
        activation = get_activation_by_name(args.activation)

        layers = self.layers = []
        layer_type = args.layer.lower()
        for i in range(2):
            if layer_type == "rcnn":
                l = RCNN(
                    n_in=n_e,  # if i == 0 else n_d,
                    n_out=n_d,
                    activation=activation,
                    order=args.order)
            elif layer_type == "lstm":
                l = LSTM(
                    n_in=n_e,  # if i == 0 else n_d,
                    n_out=n_d,
                    activation=activation)
            layers.append(l)

        # len * batch
        masks = T.cast(T.neq(x, padding_id), "float32")

        #masks = masks.dimshuffle((0,1,"x"))

        # (len*batch)*n_e
        embs = embedding_layer.forward(x.ravel())
        if weights is not None:
            embs_w = weights[x.ravel()].dimshuffle((0, 'x'))
            embs = embs * embs_w

        # len*batch*n_e
        embs = embs.reshape((x.shape[0], x.shape[1], n_e))
        embs = apply_dropout(embs, dropout)
        self.word_embs = embs
        flipped_embs = embs[::-1]

        # len*bacth*n_d
        h1 = layers[0].forward_all(embs)
        h2 = layers[1].forward_all(flipped_embs)
        h_final = T.concatenate([h1, h2[::-1]], axis=2)
        h_final = apply_dropout(h_final, dropout)
        size = n_d * 2

        output_layer = self.output_layer = ZLayer(n_in=size,
                                                  n_hidden=n_d,
                                                  activation=activation)

        # sample z given text (i.e. x)
        z_pred, sample_updates = output_layer.sample_all(h_final)

        # we are computing approximated gradient by sampling z;
        # so should mark sampled z not part of the gradient propagation path
        #
        z_pred = self.z_pred = theano.gradient.disconnected_grad(z_pred)
        self.sample_updates = sample_updates
        # print "z_pred", z_pred.ndim

        self.p1 = T.sum(masks * z_pred) / (T.sum(masks) + 1e-8)

        # len*batch*1
        probs = output_layer.forward_all(h_final, z_pred)
        # print "probs", probs.ndim

        logpz = -T.nnet.binary_crossentropy(probs, z_pred) * masks
        logpz = self.logpz = logpz.reshape(x.shape)
        probs = self.probs = probs.reshape(x.shape)

        # batch
        z = z_pred
        self.zsum = T.sum(z, axis=0, dtype=theano.config.floatX)
        self.zdiff = T.sum(T.abs_(z[1:] - z[:-1]),
                           axis=0,
                           dtype=theano.config.floatX)

        params = self.params = []
        for l in layers + [output_layer]:
            for p in l.params:
                params.append(p)
        nparams = sum(len(x.get_value(borrow=True).ravel()) \
                                        for x in params)
        say("total # parameters: {}\n".format(nparams))

        l2_cost = None
        for p in params:
            if l2_cost is None:
                l2_cost = T.sum(p**2)
            else:
                l2_cost = l2_cost + T.sum(p**2)
        l2_cost = l2_cost * args.l2_reg
        self.l2_cost = l2_cost
def main(args):
    raw_corpus = myio.read_corpus(args.corpus, args.translations or None,
                                  args.translatable_ids or None,
                                  args.generated_questions_train or None)

    generated_questions_eval = myio.read_generated_questions(
        args.generated_questions)

    embedding_layer = None
    if args.trainable_embeddings == 1:
        embedding_layer = myio.create_embedding_layer(
            raw_corpus,
            n_d=args.hidden_dim,
            cut_off=args.cut_off,
            embs=load_embedding_iterator(args.embeddings)
            if args.embeddings else None,
            fix_init_embs=False)
    else:
        embedding_layer = myio.create_embedding_layer(
            raw_corpus,
            n_d=args.hidden_dim,
            cut_off=args.cut_off,
            embs=load_embedding_iterator(args.embeddings)
            if args.embeddings else None)
    ids_corpus = myio.map_corpus(raw_corpus,
                                 embedding_layer,
                                 max_len=args.max_seq_len,
                                 generated_questions=generated_questions_eval)
    say("vocab size={}, corpus size={}\n".format(embedding_layer.n_V,
                                                 len(raw_corpus)))
    padding_id = embedding_layer.vocab_map["<padding>"]

    if args.reweight:
        weights = myio.create_idf_weights(args.corpus, embedding_layer)

    if args.dev:
        # dev = myio.read_annotations(args.dev, K_neg=-1, prune_pos_cnt=-1)
        dev = myio.read_annotations(args.dev,
                                    K_neg=args.dev_pool_size,
                                    prune_pos_cnt=-1)
        dev = myio.create_eval_batches(ids_corpus,
                                       dev,
                                       padding_id,
                                       pad_left=not args.average)
    if args.test:
        test = myio.read_annotations(args.test, K_neg=-1, prune_pos_cnt=-1)
        test = myio.create_eval_batches(ids_corpus,
                                        test,
                                        padding_id,
                                        pad_left=not args.average)

    if args.train:
        start_time = time.time()
        train = myio.read_annotations(
            args.train, training_data_percent=args.training_data_percent)
        train_batches = myio.create_batches(ids_corpus,
                                            train,
                                            args.batch_size,
                                            padding_id,
                                            pad_left=not args.average,
                                            include_generated_questions=True)
        say("{} to create batches\n".format(time.time() - start_time))
        say("{} batches, {} tokens in total, {} triples in total\n".format(
            len(train_batches),
            sum(len(x[0].ravel()) + len(x[1].ravel()) for x in train_batches),
            sum(len(x[2].ravel()) for x in train_batches)))
        train_batches = None

        model = Model(args,
                      embedding_layer,
                      weights=weights if args.reweight else None)
        # print('args.average: '+args.average)
        model.ready()

        # # # set parameters using pre-trained network
        if args.do_train == 1:
            if args.load_pretrain:
                model.load_pretrained_parameters(args)

            model.train(ids_corpus, train, dev if args.dev else None,
                        test if args.test else None)

        # AVERAGE THE PREDICTIONS OBTAINED BY RUNNING THE MODEL 10 TIMES
        if args.do_evaluate == 1:
            model.load_pretrained_parameters(args)
            # model.set_model(model.load_model(args.load_pretrain))
            for i in range(1):
                r = model.just_eval(dev if args.dev else None,
                                    test if args.test else None)

        # ANALYZE the results
        if len(args.analyze_file.strip()) > 0:
            model.load_pretrained_parameters(args)
            file_name = args.analyze_file.strip(
            )  # 'AskUbuntu.Rcnn_analysis3.gt(es)-gt.txt'
            model.analyze(file_name, embedding_layer, dev)
Beispiel #24
0
    def train(self, ids_corpus, train, dev=None, test=None, heldout=None):
        args = self.args
        dropout_prob = np.float64(args.dropout).astype(theano.config.floatX)
        batch_size = args.batch_size
        padding_id = self.padding_id
        bos_id = self.bos_id
        eos_id = self.eos_id

        #train_batches = myio.create_batches(ids_corpus, train, batch_size, padding_id, args.loss)

        updates, lr, gnorm = create_optimization_updates(
                cost = self.cost,
                params = self.params,
                lr = args.learning_rate,
                method = args.learning
            )[:3]

        train_func = theano.function(
                inputs = [ self.idxs, self.idys ],
                outputs = [ self.cost, self.loss, gnorm ],
                updates = updates
            )

        eval_func = theano.function(
                inputs = [ self.idxs ],
                #outputs = self.scores2
                outputs = self.scores
            )

        nll_func = theano.function(
                inputs = [ self.idxs, self.idys ],
                outputs = [ self.nll, self.mask ]
            )

        say("\tp_norm: {}\n".format(
                self.get_pnorm_stat()
            ))

        result_table = PrettyTable(["Epoch", "dev MAP", "dev MRR", "dev P@1", "dev P@5"] +
                                    ["tst MAP", "tst MRR", "tst P@1", "tst P@5"])

        unchanged = 0
        best_dev = -1
        dev_MAP = dev_MRR = dev_P1 = dev_P5 = 0
        test_MAP = test_MRR = test_P1 = test_P5 = 0
        heldout_PPL = -1

        start_time = 0
        max_epoch = args.max_epoch
        for epoch in xrange(max_epoch):
            unchanged += 1
            if unchanged > 8: break

            start_time = time.time()

            train_batches = myio.create_batches(ids_corpus, train, batch_size,
                                    padding_id, bos_id, eos_id, auto_encode=True)
            N =len(train_batches)

            train_cost = 0.0
            train_loss = 0.0
            train_loss2 = 0.0
            for i in xrange(N):
                # get current batch
                t1, b1, t2 = train_batches[i]

                if args.use_title:
                    idxs, idys = myio.create_one_batch(t1, t2, padding_id)
                    cur_cost, cur_loss, grad_norm = train_func(idxs, idys)
                    train_cost += cur_cost
                    train_loss += cur_loss
                    train_loss2 += cur_loss / idys.shape[0]

                if args.use_body:
                    idxs, idys = myio.create_one_batch(b1, t2, padding_id)
                    cur_cost, cur_loss, grad_norm = train_func(idxs, idys)
                    train_cost += cur_cost
                    train_loss += cur_loss
                    train_loss2 += cur_loss / idys.shape[0]

                if i % 10 == 0:
                    say("\r{}/{}".format(i,N))

                if i == N-1:
                    self.dropout.set_value(0.0)

                    if dev is not None:
                        dev_MAP, dev_MRR, dev_P1, dev_P5 = self.evaluate(dev, eval_func)
                    if test is not None:
                        test_MAP, test_MRR, test_P1, test_P5 = self.evaluate(test, eval_func)
                    if heldout is not None:
                        heldout_PPL = self.evaluate_perplexity(heldout, nll_func)

                    if dev_MRR > best_dev:
                        unchanged = 0
                        best_dev = dev_MRR
                        result_table.add_row(
                            [ epoch ] +
                            [ "%.2f" % x for x in [ dev_MAP, dev_MRR, dev_P1, dev_P5 ] +
                                        [ test_MAP, test_MRR, test_P1, test_P5 ] ]
                        )
                        if args.model:
                            self.save_model(args.model+".pkl.gz")

                    dropout_p = np.float64(args.dropout).astype(
                                theano.config.floatX)
                    self.dropout.set_value(dropout_p)

                    say("\r\n\n")
                    say( ( "Epoch {}\tcost={:.3f}\tloss={:.3f} {:.3f}\t" \
                        +"\tMRR={:.2f},{:.2f}\tPPL={:.1f}\t|g|={:.3f}\t[{:.3f}m]\n" ).format(
                            epoch,
                            train_cost / (i+1),
                            train_loss / (i+1),
                            train_loss2 / (i+1),
                            dev_MRR,
                            best_dev,
                            heldout_PPL,
                            float(grad_norm),
                            (time.time()-start_time)/60.0
                    ))
                    say("\tp_norm: {}\n".format(
                            self.get_pnorm_stat()
                        ))

                    say("\n")
                    say("{}".format(result_table))
                    say("\n")
Beispiel #25
0
    def ready(self):
        args = self.args
        weights = self.weights

        # len(title) * batch
        idts = self.idts = T.imatrix()

        # len(body) * batch
        idbs = self.idbs = T.imatrix()

        # num pairs * 3, or num queries * candidate size
        idps = self.idps = T.imatrix()

        dropout = self.dropout = theano.shared(
            np.float64(args.dropout).astype(theano.config.floatX))
        dropout_op = self.dropout_op = Dropout(self.dropout)

        embedding_layer = self.embedding_layer

        activation = get_activation_by_name(args.activation)
        n_d = self.n_d = args.hidden_dim
        n_e = self.n_e = embedding_layer.n_d

        if args.layer.lower() == "rcnn":
            LayerType = RCNN
        elif args.layer.lower() == "lstm":
            LayerType = LSTM
        elif args.layer.lower() == "gru":
            LayerType = GRU

        depth = self.depth = args.depth
        layers = self.layers = []
        for i in range(depth):
            if LayerType != RCNN:
                feature_layer = LayerType(n_in=n_e if i == 0 else n_d,
                                          n_out=n_d,
                                          activation=activation)
            else:
                feature_layer = LayerType(n_in=n_e if i == 0 else n_d,
                                          n_out=n_d,
                                          activation=activation,
                                          order=args.order,
                                          mode=args.mode,
                                          has_outgate=args.outgate)
            layers.append(feature_layer)

        # feature computation starts here

        # (len*batch)*n_e
        xt = embedding_layer.forward(idts.ravel())
        if weights is not None:
            xt_w = weights[idts.ravel()].dimshuffle((0, 'x'))
            xt = xt * xt_w

        # len*batch*n_e
        xt = xt.reshape((idts.shape[0], idts.shape[1], n_e))
        xt = apply_dropout(xt, dropout)

        # (len*batch)*n_e
        xb = embedding_layer.forward(idbs.ravel())
        if weights is not None:
            xb_w = weights[idbs.ravel()].dimshuffle((0, 'x'))
            xb = xb * xb_w

        # len*batch*n_e
        xb = xb.reshape((idbs.shape[0], idbs.shape[1], n_e))
        xb = apply_dropout(xb, dropout)

        prev_ht = self.xt = xt
        prev_hb = self.xb = xb
        for i in range(depth):
            # len*batch*n_d
            ht = layers[i].forward_all(prev_ht)
            hb = layers[i].forward_all(prev_hb)
            prev_ht = ht
            prev_hb = hb

        # normalize vectors
        if args.normalize:
            ht = self.normalize_3d(ht)
            hb = self.normalize_3d(hb)
            say("h_title dtype: {}\n".format(ht.dtype))

        self.ht = ht
        self.hb = hb

        # average over length, ignore paddings
        # batch * d
        if args.average:
            ht = self.average_without_padding(ht, idts)
            hb = self.average_without_padding(hb, idbs)
        else:
            ht = ht[-1]
            hb = hb[-1]
        say("h_avg_title dtype: {}\n".format(ht.dtype))

        # batch * d
        h_final = (ht + hb) * 0.5
        h_final = apply_dropout(h_final, dropout)
        h_final = self.normalize_2d(h_final)
        self.h_final = h_final
        say("h_final dtype: {}\n".format(ht.dtype))

        # For testing:
        #   first one in batch is query, the rest are candidate questions
        self.scores = T.dot(h_final[1:], h_final[0])

        # For training:
        xp = h_final[idps.ravel()]
        xp = xp.reshape((idps.shape[0], idps.shape[1], n_d))
        # num query * n_d
        query_vecs = xp[:, 0, :]
        # num query
        pos_scores = T.sum(query_vecs * xp[:, 1, :], axis=1)
        # num query * candidate size
        neg_scores = T.sum(query_vecs.dimshuffle((0, 'x', 1)) * xp[:, 2:, :],
                           axis=2)
        # num query
        neg_scores = T.max(neg_scores, axis=1)
        diff = neg_scores - pos_scores + 1.0
        loss = T.mean((diff > 0) * diff)
        self.loss = loss

        params = []
        for l in self.layers:
            params += l.params
        self.params = params
        say("num of parameters: {}\n".format(
            sum(len(x.get_value(borrow=True).ravel()) for x in params)))

        l2_reg = None
        for p in params:
            if l2_reg is None:
                l2_reg = p.norm(2)
            else:
                l2_reg = l2_reg + p.norm(2)
        l2_reg = l2_reg * args.l2_reg
        self.cost = self.loss + l2_reg
Beispiel #26
0
    def ready(self):
        generator = self.generator
        args = self.args
        weights = self.weights

        dropout = generator.dropout

        # len(text) * batch
        idts = generator.x
        z = generator.z_pred
        z = z.dimshuffle((0,1,"x"))

        # batch * 2
        pairs = self.pairs = T.imatrix()

        # num pairs * 3, or num queries * candidate size
        triples = self.triples = T.imatrix()

        embedding_layer = self.embedding_layer

        activation = get_activation_by_name(args.activation)
        n_d = self.n_d = args.hidden_dim
        n_e = self.n_e = embedding_layer.n_d

        if args.layer.lower() == "rcnn":
            LayerType = RCNN
            LayerType2 = ExtRCNN
        elif args.layer.lower() == "lstm":
            LayerType = LSTM
            LayerType2 = ExtLSTM
        #elif args.layer.lower() == "gru":
        #    LayerType = GRU

        depth = self.depth = args.depth
        layers = self.layers = [ ]
        for i in range(depth):
            if LayerType != RCNN:
                feature_layer = LayerType(
                        n_in = n_e if i == 0 else n_d,
                        n_out = n_d,
                        activation = activation
                    )
            else:
                feature_layer = LayerType(
                        n_in = n_e if i == 0 else n_d,
                        n_out = n_d,
                        activation = activation,
                        order = args.order,
                        mode = args.mode,
                        has_outgate = args.outgate
                    )
            layers.append(feature_layer)

        extlayers = self.extlayers = [ ]
        for i in range(depth):
            if LayerType != RCNN:
                feature_layer = LayerType2(
                        n_in = n_e if i == 0 else n_d,
                        n_out = n_d,
                        activation = activation
                    )
            else:
                feature_layer = LayerType2(
                        n_in = n_e if i == 0 else n_d,
                        n_out = n_d,
                        activation = activation,
                        order = args.order,
                        mode = args.mode,
                        has_outgate = args.outgate
                    )
            feature_layer.copy_params(layers[i])
            extlayers.append(feature_layer)


        # feature computation starts here

        xt = generator.word_embs

        # encode full text into representation
        prev_ht = self.xt = xt
        for i in range(depth):
            # len*batch*n_d
            ht = layers[i].forward_all(prev_ht)
            prev_ht = ht

        # encode selected text into representation
        prev_htz = self.xt = xt
        for i in range(depth):
            # len*batch*n_d
            htz = extlayers[i].forward_all(prev_htz, z)
            prev_htz = htz

        # normalize vectors
        if args.normalize:
            ht = self.normalize_3d(ht)
            htz = self.normalize_3d(htz)
            say("h_title dtype: {}\n".format(ht.dtype))

        self.ht = ht
        self.htz = htz

        # average over length, ignore paddings
        # batch * d
        if args.average:
            ht = self.average_without_padding(ht, idts)
            htz = self.average_without_padding(htz, idts, z)
        else:
            ht = ht[-1]
            htz = htz[-1]
        say("h_avg_title dtype: {}\n".format(ht.dtype))

        # batch * d
        h_final = apply_dropout(ht, dropout)
        h_final = self.normalize_2d(h_final)
        hz_final = apply_dropout(htz, dropout)
        hz_final = self.normalize_2d(hz_final)
        self.h_final = h_final
        self.hz_final = hz_final

        say("h_final dtype: {}\n".format(ht.dtype))

        # For testing:
        #   first one in batch is query, the rest are candidate questions
        self.scores = T.dot(h_final[1:], h_final[0])
        self.scores_z = T.dot(hz_final[1:], hz_final[0])

        # For training encoder:
        xp = h_final[triples.ravel()]
        xp = xp.reshape((triples.shape[0], triples.shape[1], n_d))
        # num query * n_d
        query_vecs = xp[:,0,:]
        # num query
        pos_scores = T.sum(query_vecs*xp[:,1,:], axis=1)
        # num query * candidate size
        neg_scores = T.sum(query_vecs.dimshuffle((0,'x',1))*xp[:,2:,:], axis=2)
        # num query
        neg_scores = T.max(neg_scores, axis=1)
        diff = neg_scores - pos_scores + 1.0
        hinge_loss = T.mean( (diff>0)*diff )

        # For training generator

        # batch
        self_cosine_distance = 1.0 - T.sum(hz_final * h_final, axis=1)
        pair_cosine_distance = 1.0 - T.sum(hz_final * h_final[pairs[:,1]], axis=1)
        alpha = args.alpha
        loss_vec = self_cosine_distance*alpha + pair_cosine_distance*(1-alpha)
        #loss_vec = self_cosine_distance*0.2 + pair_cosine_distance*0.8

        zsum = generator.zsum
        zdiff = generator.zdiff
        logpz = generator.logpz

        sfactor = args.sparsity
        cfactor = args.sparsity * args.coherent
        scost_vec = zsum*sfactor + zdiff*cfactor

        # batch
        cost_vec = loss_vec + scost_vec
        cost_logpz = T.mean(cost_vec * T.sum(logpz, axis=0))
        loss = self.loss = T.mean(loss_vec)
        sparsity_cost = self.sparsity_cost = T.mean(scost_vec)
        self.obj =  loss + sparsity_cost

        params = [ ]
        for l in self.layers:
            params += l.params
        self.params = params
        say("num of parameters: {}\n".format(
            sum(len(x.get_value(borrow=True).ravel()) for x in params)
        ))

        l2_reg = None
        for p in params:
            if l2_reg is None:
                l2_reg = T.sum(p**2) #p.norm(2)
            else:
                l2_reg = l2_reg + T.sum(p**2) #p.norm(2)
        l2_reg = l2_reg * args.l2_reg
        self.l2_cost = l2_reg

        beta = args.beta
        self.cost_g = cost_logpz + generator.l2_cost
        self.cost_e = hinge_loss + loss*beta + l2_reg
        print "cost dtype", self.cost_g.dtype, self.cost_e.dtype