예제 #1
0
    def build_graph(self):
        args = self.args
        cost = self.all_loss
        meta_emb = self.meta_emb
        updates, lr, gnorm = create_optimization_updates(
            cost=cost,
            params=self.params,
            lr=args.learning_rate,
            method=args.learning)[:3]

        train_model = theano.function(
            inputs=[self.batch_ids, self.batch_masks],
            outputs=[cost, gnorm],
            updates=updates,
            allow_input_downcast=True)

        predict_model = theano.function(
            inputs=[self.batch_ids, self.batch_masks],
            outputs=cost,
            allow_input_downcast=True)

        embs_output = theano.function(inputs=[self.batch_ids],
                                      outputs=meta_emb.embs,
                                      allow_input_downcast=True)

        return train_model, predict_model, embs_output, self.params
예제 #2
0
파일: main.py 프로젝트: Two222/rcnn-1
    def train(self, train, dev, test):
        args = self.args
        trainx, trainy = train
        batch_size = args.batch

        if dev:
            dev_batches_x, dev_batches_y = create_batches(
                range(len(dev[0])), dev[0], dev[1], batch_size)

        if test:
            test_batches_x, test_batches_y = create_batches(
                range(len(test[0])), test[0], test[1], batch_size)

        cost = self.nll_loss + self.l2_sqr

        updates, lr, gnorm = create_optimization_updates(
            cost=cost,
            params=self.params,
            lr=args.learning_rate,
            method=args.learning)[:3]

        train_model = theano.function(inputs=[self.x, self.y],
                                      outputs=[cost, gnorm],
                                      updates=updates,
                                      allow_input_downcast=True)

        eval_acc = theano.function(inputs=[self.x],
                                   outputs=self.pred,
                                   allow_input_downcast=True)

        unchanged = 0
        best_dev = 0.0
        dropout_prob = np.float64(args.dropout_rate).astype(
            theano.config.floatX)

        start_time = time.time()
        eval_period = args.eval_period

        perm = range(len(trainx))

        say(
            str([
                "%.2f" % np.linalg.norm(x.get_value(borrow=True))
                for x in self.params
            ]) + "\n")
        for epoch in xrange(args.max_epochs):
            unchanged += 1
            if unchanged > 20: return
            train_loss = 0.0

            random.shuffle(perm)
            batches_x, batches_y = create_batches(perm, trainx, trainy,
                                                  batch_size)

            N = len(batches_x)
            for i in xrange(N):

                if i % 100 == 0:
                    sys.stdout.write("\r%d" % i)
                    sys.stdout.flush()

                x = batches_x[i]
                y = batches_y[i]

                va, grad_norm = train_model(x, y)
                train_loss += va

                # debug
                if math.isnan(va):
                    print ""
                    print i - 1, i
                    print x
                    print y
                    return

                if (i == N - 1) or (eval_period > 0 and
                                    (i + 1) % eval_period == 0):
                    self.dropout.set_value(0.0)

                    say("\n")
                    say("Epoch %.1f\tloss=%.4f\t|g|=%s  [%.2fm]\n" %
                        (epoch + (i + 1) / (N + 0.0), train_loss /
                         (i + 1), float(grad_norm),
                         (time.time() - start_time) / 60.0))
                    say(
                        str([
                            "%.2f" % np.linalg.norm(x.get_value(borrow=True))
                            for x in self.params
                        ]) + "\n")

                    if dev:
                        preds = [eval_acc(x) for x in dev_batches_x]
                        nowf_dev = self.eval_accuracy(preds, dev_batches_y)
                        if nowf_dev > best_dev:
                            unchanged = 0
                            best_dev = nowf_dev
                            if args.save:
                                self.save_model(args.save, args)

                        say("\tdev accuracy=%.4f\tbest=%.4f\n" %
                            (nowf_dev, best_dev))
                        if args.test and nowf_dev == best_dev:
                            preds = [eval_acc(x) for x in test_batches_x]
                            nowf_test = self.eval_accuracy(
                                preds, test_batches_y)
                            say("\ttest accuracy=%.4f\n" % (nowf_test, ))

                        if best_dev > nowf_dev + 0.05:
                            return

                    self.dropout.set_value(dropout_prob)

                    start_time = time.time()
예제 #3
0
    def train(self, ids_corpus, train, dev=None, test=None, heldout=None):
        args = self.args
        dropout_prob = np.float64(args.dropout).astype(theano.config.floatX)
        batch_size = args.batch_size
        padding_id = self.padding_id
        bos_id = self.bos_id
        eos_id = self.eos_id

        #train_batches = myio.create_batches(ids_corpus, train, batch_size, padding_id, args.loss)

        updates, lr, gnorm = create_optimization_updates(
            cost=self.cost,
            params=self.params,
            lr=args.learning_rate,
            method=args.learning)[:3]

        train_func = theano.function(inputs=[self.idxs, self.idys],
                                     outputs=[self.cost, self.loss, gnorm],
                                     updates=updates)

        eval_func = theano.function(
            inputs=[self.idxs],
            #outputs = self.scores2
            outputs=self.scores)

        nll_func = theano.function(inputs=[self.idxs, self.idys],
                                   outputs=[self.nll, self.mask])

        say("\tp_norm: {}\n".format(self.get_pnorm_stat()))

        result_table = PrettyTable(
            ["Epoch", "dev MAP", "dev MRR", "dev P@1", "dev P@5"] +
            ["tst MAP", "tst MRR", "tst P@1", "tst P@5"])

        unchanged = 0
        best_dev = -1
        dev_MAP = dev_MRR = dev_P1 = dev_P5 = 0
        test_MAP = test_MRR = test_P1 = test_P5 = 0
        heldout_PPL = -1

        start_time = 0
        max_epoch = args.max_epoch
        for epoch in xrange(max_epoch):
            unchanged += 1
            if unchanged > 8: break

            start_time = time.time()

            train_batches = myio.create_batches(ids_corpus,
                                                train,
                                                batch_size,
                                                padding_id,
                                                bos_id,
                                                eos_id,
                                                auto_encode=True)
            N = len(train_batches)

            train_cost = 0.0
            train_loss = 0.0
            train_loss2 = 0.0
            for i in xrange(N):
                # get current batch
                t1, b1, t2 = train_batches[i]

                if args.use_title:
                    idxs, idys = myio.create_one_batch(t1, t2, padding_id)
                    cur_cost, cur_loss, grad_norm = train_func(idxs, idys)
                    train_cost += cur_cost
                    train_loss += cur_loss
                    train_loss2 += cur_loss / idys.shape[0]

                if args.use_body:
                    idxs, idys = myio.create_one_batch(b1, t2, padding_id)
                    cur_cost, cur_loss, grad_norm = train_func(idxs, idys)
                    train_cost += cur_cost
                    train_loss += cur_loss
                    train_loss2 += cur_loss / idys.shape[0]

                if i % 10 == 0:
                    say("\r{}/{}".format(i, N))

                if i == N - 1:
                    self.dropout.set_value(0.0)

                    if dev is not None:
                        dev_MAP, dev_MRR, dev_P1, dev_P5 = self.evaluate(
                            dev, eval_func)
                    if test is not None:
                        test_MAP, test_MRR, test_P1, test_P5 = self.evaluate(
                            test, eval_func)
                    if heldout is not None:
                        heldout_PPL = self.evaluate_perplexity(
                            heldout, nll_func)

                    if dev_MRR > best_dev:
                        unchanged = 0
                        best_dev = dev_MRR
                        result_table.add_row([epoch] + [
                            "%.2f" % x
                            for x in [dev_MAP, dev_MRR, dev_P1, dev_P5] +
                            [test_MAP, test_MRR, test_P1, test_P5]
                        ])
                        if args.model:
                            self.save_model(args.model + ".pkl.gz")

                    dropout_p = np.float64(args.dropout).astype(
                        theano.config.floatX)
                    self.dropout.set_value(dropout_p)

                    say("\r\n\n")
                    say( ( "Epoch {}\tcost={:.3f}\tloss={:.3f} {:.3f}\t" \
                        +"\tMRR={:.2f},{:.2f}\tPPL={:.1f}\t|g|={:.3f}\t[{:.3f}m]\n" ).format(
                            epoch,
                            train_cost / (i+1),
                            train_loss / (i+1),
                            train_loss2 / (i+1),
                            dev_MRR,
                            best_dev,
                            heldout_PPL,
                            float(grad_norm),
                            (time.time()-start_time)/60.0
                    ))
                    say("\tp_norm: {}\n".format(self.get_pnorm_stat()))

                    say("\n")
                    say("{}".format(result_table))
                    say("\n")
예제 #4
0
파일: main.py 프로젝트: AlTheEngineer/rcnn
    def train(self, ids_corpus, train, dev=None, test=None):
        dropout_prob = np.float64(args.dropout).astype(theano.config.floatX)
        batch_size = args.batch_size
        padding_id = self.padding_id

        #train_batches = myio.create_batches(ids_corpus, train, batch_size, padding_id)

        updates, lr, gnorm = create_optimization_updates(
                cost = self.cost,
                params = self.params,
                lr = args.learning_rate,
                method = args.learning
            )[:3]

        train_func = theano.function(
                inputs = [ self.idts, self.idbs, self.idps ],
                outputs = [ self.cost, self.loss, gnorm ],
                updates = updates
            )

        eval_func = theano.function(
                inputs = [ self.idts, self.idbs ],
                outputs = self.scores,
                on_unused_input='ignore'
            )

        say("\tp_norm: {}\n".format(
                self.get_pnorm_stat()
            ))

        result_table = PrettyTable(["Epoch", "dev MAP", "dev MRR", "dev P@1", "dev P@5"] +
                                    ["tst MAP", "tst MRR", "tst P@1", "tst P@5"])

        unchanged = 0
        best_dev = -1
        dev_MAP = dev_MRR = dev_P1 = dev_P5 = 0
        test_MAP = test_MRR = test_P1 = test_P5 = 0
        start_time = 0
        max_epoch = args.max_epoch
        for epoch in xrange(max_epoch):
            unchanged += 1
            if unchanged > 15: break

            start_time = time.time()

            train = myio.read_annotations(args.train)
            train_batches = myio.create_batches(ids_corpus, train, batch_size,
                                    padding_id, pad_left = not args.average)
            N =len(train_batches)

            train_loss = 0.0
            train_cost = 0.0

            for i in xrange(N):
                # get current batch
                idts, idbs, idps = train_batches[i]

                cur_cost, cur_loss, grad_norm = train_func(idts, idbs, idps)
                train_loss += cur_loss
                train_cost += cur_cost

                if i % 10 == 0:
                    say("\r{}/{}".format(i,N))

                if i == N-1:
                    self.dropout.set_value(0.0)

                    if dev is not None:
                        dev_MAP, dev_MRR, dev_P1, dev_P5 = self.evaluate(dev, eval_func)
                    if test is not None:
                        test_MAP, test_MRR, test_P1, test_P5 = self.evaluate(test, eval_func)

                    if dev_MRR > best_dev:
                        unchanged = 0
                        best_dev = dev_MRR
                        result_table.add_row(
                            [ epoch ] +
                            [ "%.2f" % x for x in [ dev_MAP, dev_MRR, dev_P1, dev_P5 ] +
                                        [ test_MAP, test_MRR, test_P1, test_P5 ] ]
                        )
                        if args.save_model:
                            self.save_model(args.save_model)

                    dropout_p = np.float64(args.dropout).astype(
                                theano.config.floatX)
                    self.dropout.set_value(dropout_p)

                    say("\r\n\n")
                    say( ( "Epoch {}\tcost={:.3f}\tloss={:.3f}" \
                        +"\tMRR={:.2f},{:.2f}\t|g|={:.3f}\t[{:.3f}m]\n" ).format(
                            epoch,
                            train_cost / (i+1),
                            train_loss / (i+1),
                            dev_MRR,
                            best_dev,
                            float(grad_norm),
                            (time.time()-start_time)/60.0
                    ))
                    say("\tp_norm: {}\n".format(
                            self.get_pnorm_stat()
                        ))

                    say("\n")
                    say("{}".format(result_table))
                    say("\n")
예제 #5
0
파일: main.py 프로젝트: Sundayxr/rcnn
    def train(self, train, dev, test):
        args = self.args
        trainx, trainy = train
        batch_size = args.batch

        if dev:
            dev_batches_x, dev_batches_y = create_batches(
                    range(len(dev[0])),
                    dev[0],
                    dev[1],
                    batch_size
            )

        if test:
            test_batches_x, test_batches_y = create_batches(
                    range(len(test[0])),
                    test[0],
                    test[1],
                    batch_size
            )

        cost = self.nll_loss + self.l2_sqr

        updates, lr, gnorm = create_optimization_updates(
                cost = cost,
                params = self.params,
                lr = args.learning_rate,
                method = args.learning
            )[:3]

        train_model = theano.function(
             inputs = [self.x, self.y],
             outputs = [ cost, gnorm ],
             updates = updates,
             allow_input_downcast = True
        )

        eval_acc = theano.function(
             inputs = [self.x],
             outputs = self.pred,
             allow_input_downcast = True
        )

        unchanged = 0
        best_dev = 0.0
        dropout_prob = np.float64(args.dropout_rate).astype(theano.config.floatX)

        start_time = time.time()
        eval_period = args.eval_period

        perm = range(len(trainx))

        say(str([ "%.2f" % np.linalg.norm(x.get_value(borrow=True)) for x in self.params ])+"\n")
        for epoch in xrange(args.max_epochs):
            unchanged += 1
            if unchanged > 20: return
            train_loss = 0.0

            random.shuffle(perm)
            batches_x, batches_y = create_batches(perm, trainx, trainy, batch_size)

            N = len(batches_x)
            for i in xrange(N):

                if i % 100 == 0:
                    sys.stdout.write("\r%d" % i)
                    sys.stdout.flush()

                x = batches_x[i]
                y = batches_y[i]

                va, grad_norm = train_model(x, y)
                train_loss += va

                # debug
                if math.isnan(va):
                    print ""
                    print i-1, i
                    print x
                    print y
                    return

                if (i == N-1) or (eval_period > 0 and (i+1) % eval_period == 0):
                    self.dropout.set_value(0.0)

                    say( "\n" )
                    say( "Epoch %.1f\tloss=%.4f\t|g|=%s  [%.2fm]\n" % (
                            epoch + (i+1)/(N+0.0),
                            train_loss / (i+1),
                            float(grad_norm),
                            (time.time()-start_time) / 60.0
                    ))
                    say(str([ "%.2f" % np.linalg.norm(x.get_value(borrow=True)) for x in self.params ])+"\n")

                    if dev:
                        preds = [ eval_acc(x) for x in dev_batches_x ]
                        nowf_dev = self.eval_accuracy(preds, dev_batches_y)
                        if nowf_dev > best_dev:
                            unchanged = 0
                            best_dev = nowf_dev
                            if args.save:
                                self.save_model(args.save, args)

                        say("\tdev accuracy=%.4f\tbest=%.4f\n" % (
                                nowf_dev,
                                best_dev
                        ))
                        if args.test and nowf_dev == best_dev:
                            preds = [ eval_acc(x) for x in test_batches_x ]
                            nowf_test = self.eval_accuracy(preds, test_batches_y)
                            say("\ttest accuracy=%.4f\n" % (
                                    nowf_test,
                            ))

                        if best_dev > nowf_dev + 0.05:
                            return

                    self.dropout.set_value(dropout_prob)

                    start_time = time.time()
예제 #6
0
파일: main.py 프로젝트: AlTheEngineer/rcnn
    def train(self, ids_corpus, train, dev=None, test=None, heldout=None):
        args = self.args
        dropout_prob = np.float64(args.dropout).astype(theano.config.floatX)
        batch_size = args.batch_size
        padding_id = self.padding_id
        bos_id = self.bos_id
        eos_id = self.eos_id

        #train_batches = myio.create_batches(ids_corpus, train, batch_size, padding_id, args.loss)

        updates, lr, gnorm = create_optimization_updates(
                cost = self.cost,
                params = self.params,
                lr = args.learning_rate,
                method = args.learning
            )[:3]

        train_func = theano.function(
                inputs = [ self.idxs, self.idys ],
                outputs = [ self.cost, self.loss, gnorm ],
                updates = updates
            )

        eval_func = theano.function(
                inputs = [ self.idxs ],
                #outputs = self.scores2
                outputs = self.scores
            )

        nll_func = theano.function(
                inputs = [ self.idxs, self.idys ],
                outputs = [ self.nll, self.mask ]
            )

        say("\tp_norm: {}\n".format(
                self.get_pnorm_stat()
            ))

        result_table = PrettyTable(["Epoch", "dev MAP", "dev MRR", "dev P@1", "dev P@5"] +
                                    ["tst MAP", "tst MRR", "tst P@1", "tst P@5"])

        unchanged = 0
        best_dev = -1
        dev_MAP = dev_MRR = dev_P1 = dev_P5 = 0
        test_MAP = test_MRR = test_P1 = test_P5 = 0
        heldout_PPL = -1

        start_time = 0
        max_epoch = args.max_epoch
        for epoch in xrange(max_epoch):
            unchanged += 1
            if unchanged > 8: break

            start_time = time.time()

            train_batches = myio.create_batches(ids_corpus, train, batch_size,
                                    padding_id, bos_id, eos_id, auto_encode=True)
            N =len(train_batches)

            train_cost = 0.0
            train_loss = 0.0
            train_loss2 = 0.0
            for i in xrange(N):
                # get current batch
                t1, b1, t2 = train_batches[i]

                if args.use_title:
                    idxs, idys = myio.create_one_batch(t1, t2, padding_id)
                    cur_cost, cur_loss, grad_norm = train_func(idxs, idys)
                    train_cost += cur_cost
                    train_loss += cur_loss
                    train_loss2 += cur_loss / idys.shape[0]

                if args.use_body:
                    idxs, idys = myio.create_one_batch(b1, t2, padding_id)
                    cur_cost, cur_loss, grad_norm = train_func(idxs, idys)
                    train_cost += cur_cost
                    train_loss += cur_loss
                    train_loss2 += cur_loss / idys.shape[0]

                if i % 10 == 0:
                    say("\r{}/{}".format(i,N))

                if i == N-1:
                    self.dropout.set_value(0.0)

                    if dev is not None:
                        dev_MAP, dev_MRR, dev_P1, dev_P5 = self.evaluate(dev, eval_func)
                    if test is not None:
                        test_MAP, test_MRR, test_P1, test_P5 = self.evaluate(test, eval_func)
                    if heldout is not None:
                        heldout_PPL = self.evaluate_perplexity(heldout, nll_func)

                    if dev_MRR > best_dev:
                        unchanged = 0
                        best_dev = dev_MRR
                        result_table.add_row(
                            [ epoch ] +
                            [ "%.2f" % x for x in [ dev_MAP, dev_MRR, dev_P1, dev_P5 ] +
                                        [ test_MAP, test_MRR, test_P1, test_P5 ] ]
                        )
                        if args.model:
                            self.save_model(args.model+".pkl.gz")

                    dropout_p = np.float64(args.dropout).astype(
                                theano.config.floatX)
                    self.dropout.set_value(dropout_p)

                    say("\r\n\n")
                    say( ( "Epoch {}\tcost={:.3f}\tloss={:.3f} {:.3f}\t" \
                        +"\tMRR={:.2f},{:.2f}\tPPL={:.1f}\t|g|={:.3f}\t[{:.3f}m]\n" ).format(
                            epoch,
                            train_cost / (i+1),
                            train_loss / (i+1),
                            train_loss2 / (i+1),
                            dev_MRR,
                            best_dev,
                            heldout_PPL,
                            float(grad_norm),
                            (time.time()-start_time)/60.0
                    ))
                    say("\tp_norm: {}\n".format(
                            self.get_pnorm_stat()
                        ))

                    say("\n")
                    say("{}".format(result_table))
                    say("\n")
예제 #7
0
    def train(self, args, train, dev, test=None):
        embedding_layer = self.layers[-2]

        dropout_prob = np.float64(args["dropout"]).astype(theano.config.floatX)
        rnn_dropout_prob = np.float64(args["rnn_dropout"]).astype(
            theano.config.floatX)
        batch_size = args["batch_size"]
        unroll_size = args["unroll_size"]

        train = create_batches(train, embedding_layer.map_to_ids, batch_size)

        dev = create_batches(dev, embedding_layer.map_to_ids, 1)

        if test is not None:
            test = create_batches(test, embedding_layer.map_to_ids, 1)

        cost = T.sum(self.nll) / self.idxs.shape[1]
        updates, lr, gnorm = create_optimization_updates(
            cost=cost,
            params=self.params,
            lr=args["learning_rate"],
            eps=args["eps"],
            method=args["learning"])[:3]

        train_func = theano.function(inputs=[self.idxs, self.idys] +
                                     self.init_state,
                                     outputs=[cost, gnorm] + self.last_state,
                                     updates=updates)
        eval_func = theano.function(
            inputs=[self.idxs, self.idys] + self.init_state,
            outputs=[self.nll] + self.last_state,
        )

        N = (len(train[0]) - 1) / unroll_size + 1
        say(" train: {} tokens, {} mini-batches\n".format(
            len(train[0].ravel()), N))
        say(" dev: {} tokens\n".format(len(dev[0].ravel())))

        say("\tp_norm: {}\n".format(self.get_pnorm_stat()))

        decay_epoch = args["lr_decay_epoch"]
        decay_rate = args["lr_decay"]
        lr_0 = args["learning_rate"]
        iter_cnt = 0

        depth = args["depth"]
        unchanged = 0
        best_dev = 1e+10
        start_time = 0
        max_epoch = args["max_epoch"]
        for epoch in xrange(max_epoch):
            unchanged += 1
            if unchanged > 20: break

            if decay_epoch > 0 and epoch >= decay_epoch:
                lr.set_value(np.float32(lr.get_value() * decay_rate))

            start_time = time.time()

            prev_state = [
                np.zeros((batch_size, self.n_d), dtype=theano.config.floatX)
                for i in xrange(depth * 2)
            ]

            train_loss = 0.0
            for i in xrange(N):
                # get current batch
                x = train[0][i * unroll_size:(i + 1) * unroll_size]
                y = train[1][i * unroll_size:(i + 1) * unroll_size]

                iter_cnt += 1
                ret = train_func(x, y, *prev_state)
                cur_loss, grad_norm, prev_state = ret[0], ret[1], ret[2:]
                train_loss += cur_loss / len(x)

                if i % 10 == 0:
                    say("\r{}".format(i))

                if i == N - 1:
                    self.dropout.set_value(0.0)
                    self.rnn_dropout.set_value(0.0)
                    dev_preds = self.evaluate(eval_func, dev, 1, unroll_size)
                    dev_loss = evaluate_average(predictions=dev_preds,
                                                masks=None)
                    dev_ppl = np.exp(dev_loss)
                    self.dropout.set_value(dropout_prob)
                    self.rnn_dropout.set_value(rnn_dropout_prob)

                    say("\r\n")
                    say( ( "Epoch={}  lr={:.4f}  train_loss={:.3f}  train_ppl={:.1f}  " \
                        +"dev_loss={:.3f}  dev_ppl={:.1f}\t|g|={:.3f}\t[{:.1f}m]\n" ).format(
                            epoch,
                            float(lr.get_value(borrow=True)),
                            train_loss/N,
                            np.exp(train_loss/N),
                            dev_loss,
                            dev_ppl,
                            float(grad_norm),
                            (time.time()-start_time)/60.0
                        ))
                    say("\tp_norm: {}\n".format(self.get_pnorm_stat()))

                    if dev_ppl < best_dev:
                        best_dev = dev_ppl
                        if test is None: continue
                        self.dropout.set_value(0.0)
                        self.rnn_dropout.set_value(0.0)
                        test_preds = self.evaluate(eval_func, test, 1,
                                                   unroll_size)
                        test_loss = evaluate_average(predictions=test_preds,
                                                     masks=None)
                        test_ppl = np.exp(test_loss)
                        self.dropout.set_value(dropout_prob)
                        self.rnn_dropout.set_value(rnn_dropout_prob)
                        say("\tbest_dev={:.1f}  test_loss={:.3f}  test_ppl={:.1f}\n"
                            .format(best_dev, test_loss, test_ppl))
                        if best_dev < 200: unchanged = 0

        say("\n")
예제 #8
0
    def train(self, train, dev, test, rationale_data, trained_max_epochs=None):
        args = self.args
        args.trained_max_epochs = self.trained_max_epochs = trained_max_epochs
        dropout = self.dropout
        padding_id = self.embedding_layer.vocab_map["<padding>"]

        if dev is not None:
            dev_batches_x, dev_batches_y = myio.create_batches(
                dev[0], dev[1], args.batch, padding_id)
        if test is not None:
            test_batches_x, test_batches_y = myio.create_batches(
                test[0], test[1], args.batch, padding_id)
        if rationale_data is not None:
            valid_batches_x, valid_batches_y = myio.create_batches(
                [u["xids"] for u in rationale_data],
                [u["y"] for u in rationale_data],
                args.batch,
                padding_id,
                sort=False)

        start_time = time.time()
        train_batches_x, train_batches_y = myio.create_batches(
            train[0], train[1], args.batch, padding_id)
        say("{:.2f}s to create training batches\n\n".format(time.time() -
                                                            start_time))
        updates_e, lr_e, gnorm_e = create_optimization_updates(
            cost=self.encoder.cost_e,
            params=self.encoder.params,
            method=args.learning,
            beta1=args.beta1,
            beta2=args.beta2,
            lr=args.learning_rate)[:3]

        updates_g, lr_g, gnorm_g = create_optimization_updates(
            cost=self.encoder.cost_g,
            params=self.generator.params,
            method=args.learning,
            beta1=args.beta1,
            beta2=args.beta2,
            lr=args.learning_rate)[:3]

        sample_generator = theano.function(
            inputs=[self.x],
            outputs=self.z,
            #updates = self.generator.sample_updates
        )

        get_loss_and_pred = theano.function(
            inputs=[self.x, self.y],
            outputs=[self.encoder.loss_vec, self.encoder.preds, self.z],
            #updates = self.generator.sample_updates
        )

        eval_generator = theano.function(
            inputs=[self.x, self.y],
            outputs=[
                self.z, self.encoder.obj, self.encoder.loss,
                self.encoder.pred_diff
            ],
            #updates = self.generator.sample_updates
        )
        sample_generator = theano.function(
            inputs=[self.x],
            outputs=self.z,
            #updates = self.generator.sample_updates
        )
        sample_encoder = theano.function(
            inputs=[self.x, self.y, self.z],
            outputs=[
                self.encoder.obj, self.encoder.loss, self.encoder.pred_diff
            ],
            #updates = self.generator.sample_updates
        )

        train_generator = theano.function(
                inputs = [ self.x, self.y ],
                outputs = [ self.encoder.obj, self.encoder.loss, \
                                self.encoder.sparsity_cost, self.z, self.word_embs, gnorm_e, gnorm_g ],
                updates = updates_e.items() + updates_g.items() #+ self.generator.sample_updates,
            )

        eval_period = args.eval_period
        unchanged = 0
        best_dev = 1e+2
        best_dev_e = 1e+2
        last_train_avg_cost = None
        last_dev_avg_cost = None
        tolerance = 0.10 + 1e-3
        dropout_prob = np.float64(args.dropout).astype(theano.config.floatX)

        for epoch_ in xrange(args.max_epochs -
                             50):  # -50 when max_epochs  = 100 given
            #print(" max epochs in train func: ", args.max_epochs)
            epoch = args.trained_max_epochs + epoch_
            unchanged += 1
            if unchanged > 25:
                print 'dev set increases more than 25 times after the best dev found'
                #return

            train_batches_x, train_batches_y = myio.create_batches(
                train[0], train[1], args.batch, padding_id)

            more = True
            if args.decay_lr:
                param_bak = [p.get_value(borrow=False) for p in self.params]

            start_train_generate = time.time()
            more_counter = 0
            while more:
                processed = 0
                train_cost = 0.0
                train_loss = 0.0
                train_sparsity_cost = 0.0
                p1 = 0.0
                start_time = time.time()

                N = len(train_batches_x)
                #print(" begining : ", train_cost )
                for i in xrange(N):
                    if (i + 1) % 100 == 0:
                        say("\r{}/{} {:.2f}       ".format(
                            i + 1, N, p1 / (i + 1)))

                    bx, by = train_batches_x[i], train_batches_y[i]
                    mask = bx != padding_id
                    start_train_time = time.time()
                    cost, loss, sparsity_cost, bz, emb, gl2_e, gl2_g = train_generator(
                        bx, by)
                    #print('gl2_g: ' , gl2_g)

                    k = len(by)
                    processed += k
                    train_cost += cost
                    train_loss += loss
                    train_sparsity_cost += sparsity_cost
                    p1 += np.sum(bz * mask) / (np.sum(mask) + 1e-8)

                cur_train_avg_cost = train_cost / N
                #print(" end : ", cur_train_avg_cost )
                say("train generate  time: {} \n".format(time.time() -
                                                         start_train_generate))
                if dev:
                    self.dropout.set_value(0.0)
                    start_dev_time = time.time()
                    dev_obj, dev_loss, dev_diff, dev_p1 = self.evaluate_data(
                        dev_batches_x,
                        dev_batches_y,
                        eval_generator,
                        sampling=True)
                    self.dropout.set_value(dropout_prob)
                    say("dev evaluate data time: {} \n".format(time.time() -
                                                               start_dev_time))
                    cur_dev_avg_cost = dev_obj

                more = False
                if args.decay_lr and last_train_avg_cost is not None:
                    if cur_train_avg_cost > last_train_avg_cost * (1 +
                                                                   tolerance):
                        more = True
                        say("\nTrain cost {} --> {}\n".format(
                            last_train_avg_cost, cur_train_avg_cost))
                    if dev and cur_dev_avg_cost > last_dev_avg_cost * (
                            1 + tolerance):
                        more = True
                        say("\nDev cost {} --> {}\n".format(
                            last_dev_avg_cost, cur_dev_avg_cost))
                if more:
                    more_counter += 1
                    if more_counter < 20: more = False
                if more:
                    more_counter = 0
                    lr_val = lr_g.get_value() * 0.5
                    lr_val = np.float64(lr_val).astype(theano.config.floatX)
                    lr_g.set_value(lr_val)
                    lr_e.set_value(lr_val)
                    say("Decrease learning rate to {} at epoch {}\n".format(
                        float(lr_val), epoch_ + 1))
                    for p, v in zip(self.params, param_bak):
                        #print ('param restoreing: ', p, v)
                        p.set_value(v)
                    continue

                last_train_avg_cost = cur_train_avg_cost
                if dev: last_dev_avg_cost = cur_dev_avg_cost

                say("\n")
                say((
                    "Generator Epoch {:.2f}  costg={:.4f}  scost={:.4f}  lossg={:.4f}  "
                    + "p[1]={:.2f}  |g|={:.4f} {:.4f}\t[{:.2f}m / {:.2f}m]\n"
                ).format(epoch + (i + 1.0) / N, train_cost / N,
                         train_sparsity_cost / N, train_loss / N, p1 / N,
                         float(gl2_e), float(gl2_g),
                         (time.time() - start_time) / 60.0,
                         (time.time() - start_time) / 60.0 / (i + 1) * N))
                say("\t"+str([ "{:.2f}".format(np.linalg.norm(x.get_value(borrow=True))) \
                                for x in self.encoder.params ])+"\n")
                say("\t"+str([ "{:.2f}".format(np.linalg.norm(x.get_value(borrow=True))) \
                                for x in self.generator.params ])+"\n")
                say("total encode time = {} total geneartor time = {} \n".
                    format(total_encode_time, total_generate_time))

                if epoch_ % args.save_every == 0:  #and epoch_>0:
                    print 'saving model after epoch -', epoch_ + 1, ' file name: ', args.save_model + str(
                        epoch_)
                    self.save_model(args.save_model + str(epoch_), args)

                if dev:
                    if dev_obj < best_dev:
                        best_dev = dev_obj
                        unchanged = 0
                        if args.dump and rationale_data:
                            self.dump_rationales(args.dump, valid_batches_x,
                                                 valid_batches_y,
                                                 get_loss_and_pred,
                                                 sample_generator)

                        if args.save_model:
                            print 'saving best model after epoch -', epoch_ + 1, ' file name: ', args.save_model
                            self.save_model(args.save_model, args)

                    say((
                        "\tsampling devg={:.4f}  mseg={:.4f}  avg_diffg={:.4f}"
                        + "  p[1]g={:.2f}  best_dev={:.4f}\n").format(
                            dev_obj, dev_loss, dev_diff, dev_p1, best_dev))

                    if rationale_data is not None:
                        self.dropout.set_value(0.0)

                        start_rational_time = time.time()
                        #r_mse, r_p1, r_prec1, r_prec2 = self.evaluate_rationale(
                        #        rationale_data, valid_batches_x,
                        #        valid_batches_y, eval_generator)

                        r_mse, r_p1, r_prec1, r_prec2, gen_time, enc_time, prec_cal_time = self.evaluate_rationale(
                            rationale_data, valid_batches_x, valid_batches_y,
                            sample_generator, sample_encoder, eval_generator)

                        self.dropout.set_value(dropout_prob)
                        say((
                            "\trationale mser={:.4f}  p[1]r={:.2f}  prec1={:.4f}"
                            + "  prec2={:.4f} time nedded for rational={}\n"
                        ).format(r_mse, r_p1, r_prec1, r_prec2,
                                 time.time() - start_rational_time))
예제 #9
0
    def train(self, train, dev, test):
        args = self.args
        x_train, y_train = train
        batch = args.batch
	test_batch = args.test_batch
	score_scale = args.score_scale        

	if dev:
	    x_dev_batches, y_dev_batches, ay_dev_batches, ayy_dev_batches, ay_mask_dev_batches, w_mask_dev_batches, w_len_dev_batches, sent_maxlen_dev_batches, sent_num_dev_batches= create_batches(
		    range(len(dev[0])),
		    dev[0],
		    dev[1],
		    test_batch,
		    score_scale
		)

        if test:
	    x_test_batches, y_test_batches, ay_test_batches, ayy_test_batches, ay_mask_test_batches, w_mask_test_batches, w_len_test_batches, sent_maxlen_test_batches, sent_num_test_batches = create_batches(
		    range(len(test[0])),
		    test[0],
		    test[1],
		    test_batch,
		    score_scale
		)

	cost = self.l2_sqr + self.nll_loss_ay

	print 'Building graph...'
	updates, lr, gnorm = create_optimization_updates(
                cost = cost,
                params = self.params,
                lr = args.learning_rate,
                method = args.learning
            )[:3]

        train_model = theano.function(
             	inputs = [self.x, self.y, self.ay, self.aay, self.ay_mask, self.w_masks, self.w_lens, self.s_maxlen, self.s_num],
             	outputs = [ cost, gnorm ],
             	updates = updates,
             	allow_input_downcast = True
        	)

        eval_acc = theano.function(
             	inputs = [self.x, self.w_masks, self.w_lens, self.s_maxlen, self.s_num],
             	outputs = [self.pred_ay], #, self.output],
             	allow_input_downcast = True
        	)
	
        unchanged = 0
        best_dev_result = 0.0
        dropout_rate = np.float64(args.dropout_rate).astype(theano.config.floatX)

        start_time = time.time()
        eval_period = args.eval_period

        perm = range(len(x_train))

        say(str([ "%.2f" % np.linalg.norm(x.get_value(borrow=True)) for x in self.params ])+"\n")
        
	if args.load:
	    self.dropout.set_value(0.0)
	    preds = [ eval_acc( x, wm, wl, sm, sn ) for x, wm, wl, sm, sn in zip(x_dev_batches, w_mask_dev_batches, w_len_dev_batches, sent_maxlen_dev_batches, sent_num_dev_batches)]
            
	    ay_pred = [ pred[0] for pred in preds ]
	    results = self.eval_accuracy(ay_pred, ay_dev_batches)
            
	    best_dev_result = results[1]
            say("\tDEV RMSE/BEST_ACCUARCY/ACCURACY=%.4f_%.4f_%.4f\n" % (
                results[0],
                best_dev_result,
                results[1]
            ))

            preds = [ eval_acc( x, wm, wl, sm, sn ) for x, wm, wl, sm, sn in zip(x_test_batches, w_mask_test_batches, w_len_test_batches, sent_maxlen_test_batches, sent_num_test_batches)]
            ay_pred = [ pred[0] for pred in preds ]
            results = self.eval_accuracy(ay_pred, ay_test_batches)
            say("\tTEST RMSE/ACCURACY=%.4f_%.4f\n" % (
                   results[0],
                   results[1],
             ))

	for epoch in xrange(args.max_epochs):
            self.dropout.set_value(dropout_rate)
	    unchanged += 1
            if unchanged > 20: return
            train_loss = 0.0

            random.shuffle(perm)
	    x_batches, y_batches, ay_batches, aay_batches, ay_mask_batches, w_mask_batches, w_len_batches, sent_maxlen_batches, sent_num_batches = create_batches(perm, x_train, y_train, batch, score_scale)
            
	    N = len(x_batches)
            for i in xrange(N):

                if (i + 1) % 100 == 0:
                    sys.stdout.write("\r%d" % i)
                    sys.stdout.flush()

                x = x_batches[i]
                y = y_batches[i]
		
		va, grad_norm = train_model(x, y, ay_batches[i], aay_batches[i], ay_mask_batches[i], w_mask_batches[i], w_len_batches[i], sent_maxlen_batches[i], sent_num_batches[i])
                train_loss += va

                # debug
                if math.isnan(va):
                    return

                if (i == N-1) or (eval_period > 0 and (i+1) % eval_period == 0):
                    self.dropout.set_value(0.0)

                    say( "\n" )
                    say( "Epoch %.3f\tloss=%.4f\t|g|=%s  [%.2fm]\n" % (
                            epoch + (i+1)/(N+0.0),
                            train_loss / (i+1),
                            float(grad_norm),
                            (time.time()-start_time) / 60.0
                    ))
                    say(str([ "%.2f" % np.linalg.norm(x.get_value(borrow=True)) for x in self.params ])+"\n")

                    if dev:
            		preds = [ eval_acc(x, wm, wl, sm, sn) for x, wm, wl, sm, sn in zip(x_dev_batches, w_mask_dev_batches, w_len_dev_batches, sent_maxlen_dev_batches, sent_num_dev_batches)]
			ay_pred = [ pred[0] for pred in preds ]
			results = self.eval_accuracy(ay_pred, ay_dev_batches)
                        say("\tDEV RMSE/BEST_ACCUARCY/ACCURACY=%.4f_%.4f_%.4f\n" % (
                            results[0],
                            best_dev_result,
                            results[1]
                        ))

                        if results[1] > best_dev_result:
                            unchanged = 0
                            best_dev_result = results[1]
                            if args.save:
                                self.save_model(args.save, args)

			    preds = [ eval_acc(x, wm, wl, sm, sn) for x, wm, wl, sm, sn in zip(x_test_batches, w_mask_test_batches, w_len_test_batches, sent_maxlen_test_batches, sent_num_test_batches)]
			    ay_pred = [ pred[0] for pred in preds ]
			   
			    results_test = self.eval_accuracy(ay_pred, ay_test_batches)
                            say("\tTEST RMSE/ACCURACY=%.4f_%.4f\n" % (
                                results_test[0],
                                results_test[1]
                                ))

                        if best_dev_result > results[0] + 0.2:
                            return

                    self.dropout.set_value(dropout_rate)
                    start_time = time.time()
예제 #10
0
    def train(self, args, train, dev, test=None):
        embedding_layer = self.layers[0]

        dropout_prob = np.float64(args["dropout"]).astype(theano.config.floatX)
        batch_size = args["batch_size"]
        unroll_size = args["unroll_size"]

        train = create_batches(train, embedding_layer.map_to_ids, batch_size)

        dev = create_batches(dev, embedding_layer.map_to_ids, batch_size)

        if test is not None:
            test = create_batches(test, embedding_layer.map_to_ids, batch_size)

        cost = T.sum(self.nll) / self.idxs.shape[1]
        updates, lr, gnorm = create_optimization_updates(
            cost=cost,
            params=self.params,
            lr=args["learning_rate"],
            beta1=args["beta1"],
            beta2=args["beta2"],
            rho=args["rho"],
            momentum=args["momentum"],
            gamma=args["gamma"],
            eps=args["eps"],
            method=args["learning"])[:3]
        #if args["learning"] == "adadelta":
        #    lr.set_value(args["learning_rate"])

        train_func = theano.function(
            inputs=[self.idxs, self.idys, self.init_state],
            outputs=[cost, self.last_state, gnorm],
            updates=updates)
        eval_func = theano.function(
            inputs=[self.idxs, self.idys, self.init_state],
            outputs=[self.nll, self.last_state])

        N = (len(train[0]) - 1) / unroll_size + 1
        say(" train: {} tokens, {} mini-batches\n".format(
            len(train[0].ravel()), N))
        say(" dev: {} tokens\n".format(len(dev[0].ravel())))

        say("\tp_norm: {}\n".format(self.get_pnorm_stat()))

        decay_lr = args["decay_lr"] and args["learning"].lower() != "adadelta" and \
                    args["learning"].lower() != "adagrad"
        lr_0 = args["learning_rate"]
        iter_cnt = 0

        unchanged = 0
        best_dev = 1e+10
        start_time = 0
        max_epoch = args["max_epoch"]
        for epoch in xrange(max_epoch):
            if unchanged > 5: break
            start_time = time.time()

            prev_state = np.zeros((batch_size, self.n_d * 2),
                                  dtype=theano.config.floatX)

            train_loss = 0.0
            for i in xrange(N):
                # get current batch
                x = train[0][i * unroll_size:(i + 1) * unroll_size]
                y = train[1][i * unroll_size:(i + 1) * unroll_size]

                iter_cnt += 1
                if decay_lr:
                    lr.set_value(np.float32(lr_0 / iter_cnt**0.5))
                cur_loss, prev_state, grad_norm = train_func(x, y, prev_state)
                train_loss += cur_loss / len(x)

                if math.isnan(cur_loss) or math.isnan(grad_norm):
                    say("\nNaN !!\n")
                    return

                if i % 10 == 0:
                    say("\r{}".format(i))

                if i == N - 1:
                    self.dropout.set_value(0.0)
                    dev_preds = self.evaluate(eval_func, dev, batch_size,
                                              unroll_size)
                    dev_loss = evaluate_average(predictions=dev_preds,
                                                masks=None)
                    dev_ppl = np.exp(dev_loss)
                    self.dropout.set_value(dropout_prob)

                    say("\r\n")
                    say( ( "Epoch={}  lr={:.3f}  train_loss={:.3f}  train_ppl={:.1f}  " \
                        +"dev_loss={:.3f}  dev_ppl={:.1f}\t|g|={:.3f}\t[{:.1f}m]\n" ).format(
                            epoch,
                            float(lr.get_value(borrow=True)),
                            train_loss/N,
                            np.exp(train_loss/N),
                            dev_loss,
                            dev_ppl,
                            float(grad_norm),
                            (time.time()-start_time)/60.0
                        ))
                    say("\tp_norm: {}\n".format(self.get_pnorm_stat()))

                    # halve the learning rate
                    #if args["learning"] == "sgd" and dev_ppl > best_dev-1:
                    #    lr.set_value(np.max([lr.get_value()/2.0, np.float32(0.0001)]))

                    if dev_ppl < best_dev:
                        best_dev = dev_ppl
                        if test is None: continue
                        self.dropout.set_value(0.0)
                        test_preds = self.evaluate(eval_func, test, batch_size,
                                                   unroll_size)
                        test_loss = evaluate_average(predictions=test_preds,
                                                     masks=None)
                        test_ppl = np.exp(test_loss)
                        self.dropout.set_value(dropout_prob)
                        say("\tbest_dev={:.1f}  test_loss={:.3f}  test_ppl={:.1f}\n"
                            .format(best_dev, test_loss, test_ppl))
                    if best_dev > 200: unchanged += 1

        say("\n")
예제 #11
0
    def train(self):
        args = self.args
        train_x, train_y = self.train_set
        dev_x, dev_y = self.dev_set
        test_x, test_y = self.test_set

        updates, lr, gnorm = create_optimization_updates(
                cost = self.cost,
                params = self.params,
                lr = args.learning_rate,
                rho = args.rho,
                beta1 = args.beta1,
                beta2 = args.beta2,
                momentum = args.momentum,
                gamma = args.gamma,
                method = args.learning
            )[:3]

        batch = args.batch
        index = self.index
        x = self.x
        y = self.y

        train_func = theano.function(
                inputs = [ index ],
                outputs = [ self.cost, gnorm ],
                givens = {
                    x: train_x[index*batch:(index+1)*batch],
                    y: train_y[index*batch:(index+1)*batch]
                },
                updates = updates
            )

        dev_func = theano.function(
                inputs = [ index ],
                outputs = [ self.err, self.loss ],
                givens = {
                    x: dev_x[index*batch:(index+1)*batch],
                    y: dev_y[index*batch:(index+1)*batch]
                }
            )

        test_func = theano.function(
                inputs = [ index ],
                outputs = [ self.err, self.loss ],
                givens = {
                    x: test_x[index*batch:(index+1)*batch],
                    y: test_y[index*batch:(index+1)*batch]
                }
            )

        decay_lr = args.decay_lr and args.learning.lower() != "adadelta" and \
                        args.learning.lower() != "adagrad"
        lr_0 = args.learning_rate
        iter_cnt = 0

        N = train_x.get_value(borrow=True).shape[0]
        num_batches = (N-1)/batch + 1
        processed = 0
        period = args.eval_period

        best_dev_err = 1.0

        max_epochs = args.max_epochs
        for epoch in xrange(max_epochs):
            start_time = time.time()
            tot_cost = 0
            for i in xrange(num_batches):
                iter_cnt += 1
                if decay_lr:
                    lr.set_value(np.float32(lr_0/iter_cnt**0.5))
                cost, grad_norm = train_func(i)
                tot_cost += cost

                if math.isnan(cost):
                    say("NaN !!\n")
                    return

                ed = min(N, (i+1)*batch)
                prev = processed/period
                processed += ed-i*batch

                if (i == num_batches-1) or (processed/period > prev):
                    say("Epoch={:.1f} Sample={} cost={:.4f} |g|={:.2f}\t[{:.1f}m]\n".format(
                            epoch + (i+1.0)/num_batches,
                            processed,
                            tot_cost/(i+1),
                            float(grad_norm),
                            (time.time()-start_time)/60.0
                        ))
                    dev_err, dev_loss = self.evaluate(dev_func, dev_x)
                    best_dev_err = min(best_dev_err, dev_err)
                    say("\tdev_err={:.4f} dev_loss={:.4f} best_dev={:.4f}\n".format(
                            dev_err, dev_loss, best_dev_err))
                    if dev_err == best_dev_err:
                        test_err, test_loss = self.evaluate(test_func, test_x)
                        say("\ttest_err={:.4f} test_loss={:.4f}\n".format(
                                test_err, test_loss))
                    say("\n")
예제 #12
0
    def train(self, ids_corpus, train, dev=None, test=None):
        dropout_prob = np.float64(args.dropout).astype(theano.config.floatX)
        batch_size = args.batch_size
        padding_id = self.padding_id

        #train_batches = myio.create_batches(ids_corpus, train, batch_size, padding_id)

        if dev is not None:
            dev, dev_raw = dev
        if test is not None:
            test, test_raw = test

        if args.joint:
            updates_e, lr_e, gnorm_e = create_optimization_updates(
                cost=self.encoder.cost_e,  #self.encoder.cost,
                params=self.encoder.params,
                lr=args.learning_rate * 0.1,
                method=args.learning)[:3]
        else:
            updates_e = {}

        updates_g, lr_g, gnorm_g = create_optimization_updates(
            cost=self.encoder.cost_g,
            params=self.generator.params,
            lr=args.learning_rate,
            method=args.learning)[:3]

        train_func = theano.function(
                inputs = [ self.x, self.triples, self.pairs ],
                outputs = [ self.encoder.obj, self.encoder.loss, \
                        self.encoder.sparsity_cost, self.generator.p1, gnorm_g ],
                # updates = updates_g.items() + updates_e.items() + self.generator.sample_updates,
                updates = collections.OrderedDict(list(updates_g.items()) + list(updates_e.items()) + list(self.generator.sample_updates.items())),
                #no_default_updates = True,
                on_unused_input= "ignore"
            )

        eval_func = theano.function(inputs=[self.x],
                                    outputs=self.encoder.scores)

        eval_func2 = theano.function(
            inputs=[self.x],
            outputs=[self.encoder.scores_z, self.generator.p1, self.z],
            updates=self.generator.sample_updates,
            #no_default_updates = True
        )

        say("\tp_norm: {}\n".format(self.get_pnorm_stat(self.encoder.params)))
        say("\tp_norm: {}\n".format(self.get_pnorm_stat(
            self.generator.params)))

        result_table = PrettyTable(
            ["Epoch", "dev MAP", "dev MRR", "dev P@1", "dev P@5"] +
            ["tst MAP", "tst MRR", "tst P@1", "tst P@5"])
        last_train_avg_cost = None
        tolerance = 0.5 + 1e-3
        unchanged = 0
        best_dev = -1
        dev_MAP = dev_MRR = dev_P1 = dev_P5 = 0
        test_MAP = test_MRR = test_P1 = test_P5 = 0
        start_time = 0
        max_epoch = args.max_epoch
        for epoch in range(max_epoch):
            unchanged += 1
            if unchanged > 20: break

            start_time = time.time()

            train = myio.read_annotations(args.train)
            train_batches = myio.create_batches(ids_corpus,
                                                train,
                                                batch_size,
                                                padding_id,
                                                pad_left=not args.average,
                                                merge=args.merge)
            N = len(train_batches)

            more = True
            param_bak = [p.get_value(borrow=False) for p in self.params]

            while more:

                train_loss = 0.0
                train_cost = 0.0
                train_scost = 0.0
                train_p1 = 0.0

                for i in range(N):
                    # get current batch
                    idts, triples, pairs = train_batches[i]

                    cur_cost, cur_loss, cur_scost, cur_p1, gnormg = train_func(
                        idts, triples, pairs)
                    train_loss += cur_loss
                    train_cost += cur_cost
                    train_scost += cur_scost
                    train_p1 += cur_p1

                    if i % 10 == 0:
                        say("\r{}/{} {:.3f}".format(i, N, train_p1 / (i + 1)))

                cur_train_avg_cost = train_cost / N
                more = False
                if last_train_avg_cost is not None:
                    if cur_train_avg_cost > last_train_avg_cost * (1 +
                                                                   tolerance):
                        more = True
                        say("\nTrain cost {} --> {}\n".format(
                            last_train_avg_cost, cur_train_avg_cost))

                if more:
                    lr_val = lr_g.get_value() * 0.5
                    if lr_val < 1e-5: return
                    lr_val = np.float64(lr_val).astype(theano.config.floatX)
                    lr_g.set_value(lr_val)
                    lr_e.set_value(lr_val)
                    say("Decrease learning rate to {}\n".format(float(lr_val)))
                    for p, v in zip(self.params, param_bak):
                        p.set_value(v)
                    continue

                last_train_avg_cost = cur_train_avg_cost

                say("\r\n\n")
                say( ( "Epoch {}  cost={:.3f}  loss={:.3f}  scost={:.3f}" \
                    +"  P[1]={:.3f}  |g|={:.3f}\t[{:.3f}m]\n" ).format(
                        epoch,
                        train_cost / N,
                        train_loss / N,
                        train_scost / N,
                        train_p1 / N,
                        float(gnormg),
                        (time.time()-start_time)/60.0
                ))
                say("\tp_norm: {}\n".format(
                    self.get_pnorm_stat(self.encoder.params)))
                say("\tp_norm: {}\n".format(
                    self.get_pnorm_stat(self.generator.params)))

                self.dropout.set_value(0.0)

                if dev is not None:
                    full_MAP, full_MRR, full_P1, full_P5 = self.evaluate(
                        dev, eval_func)
                    dev_MAP, dev_MRR, dev_P1, dev_P5, dev_PZ1, dev_PT = self.evaluate_z(
                        dev, dev_raw, ids_corpus, eval_func2)

                if test is not None:
                    test_MAP, test_MRR, test_P1, test_P5, test_PZ1, test_PT = \
                            self.evaluate_z(test, test_raw, ids_corpus, eval_func2)

                if dev_MAP > best_dev:
                    best_dev = dev_MAP
                    unchanged = 0

                say("\n")
                say("  fMAP={:.2f} fMRR={:.2f} fP1={:.2f} fP5={:.2f}\n".format(
                    full_MAP, full_MRR, full_P1, full_P5))

                say("\n")
                say(("  dMAP={:.2f} dMRR={:.2f} dP1={:.2f} dP5={:.2f}" +
                     " dP[1]={:.3f} d%T={:.3f} best_dev={:.2f}\n").format(
                         dev_MAP, dev_MRR, dev_P1, dev_P5, dev_PZ1, dev_PT,
                         best_dev))

                result_table.add_row([epoch] + [
                    "%.2f" % x for x in [dev_MAP, dev_MRR, dev_P1, dev_P5] +
                    [test_MAP, test_MRR, test_P1, test_P5]
                ])

                if unchanged == 0:
                    say("\n")
                    say(("  tMAP={:.2f} tMRR={:.2f} tP1={:.2f} tP5={:.2f}" +
                         " tP[1]={:.3f} t%T={:.3f}\n").format(
                             test_MAP, test_MRR, test_P1, test_P5, test_PZ1,
                             test_PT))
                    if args.dump_rationale:
                        self.evaluate_z(dev + test, dev_raw + test_raw,
                                        ids_corpus, eval_func2,
                                        args.dump_rationale)

                    #if args.save_model:
                    #    self.save_model(args.save_model)

                dropout_p = np.float64(args.dropout).astype(
                    theano.config.floatX)
                self.dropout.set_value(dropout_p)

                say("\n")
                say("{}".format(result_table))
                say("\n")

            if train_p1 / N <= 1e-4 or train_p1 / N + 1e-4 >= 1.0:
                break
예제 #13
0
    def train(self, train, dev, test):
        args = self.args
        trainx, trainy, trainu = train
        batch_size = args.batch

        if dev:
            dev_batches_x, dev_batches_y, dev_batches_u, dev_batches_w_masks, dev_batches_w_lens, dev_batches_sent_maxlen, dev_batches_sent_num = create_batches_doc(
                range(len(dev[0])),
                dev[0],
                dev[1],
                dev[2],
                128
            )

        if test:
            test_batches_x, test_batches_y, test_batches_u, test_batches_w_masks, test_batches_w_lens, test_batches_sent_maxlen, test_batches_sent_num = create_batches_doc(
                range(len(test[0])),
                test[0],
                test[1],
                test[2],
                128
            )

        cost = self.nll_loss + self.l2_sqr

        updates, lr, gnorm = create_optimization_updates(
            cost=cost,
            params=self.params,
            lr=args.learning_rate,
            method=args.learning
        )[:3]

        train_model = theano.function(
            inputs=[self.x, self.y, self.usr, self.w_masks,
                    self.w_lens, self.s_ml, self.s_num],
            outputs=[cost, gnorm],
            updates=updates,
            allow_input_downcast=True
        )

        eval_acc = theano.function(
            inputs=[self.x, self.usr, self.w_masks,
                    self.w_lens, self.s_ml, self.s_num],
            outputs=self.pred,
            allow_input_downcast=True
        )

        unchanged = 0
        best_dev = 0.0
        dropout_prob = np.float64(
            args.dropout_rate).astype(theano.config.floatX)

        start_time = time.time()
        eval_period = args.eval_period

        perm = range(len(trainx))

        say(str(["%.2f" % np.linalg.norm(x.get_value(borrow=True))
                 for x in self.params]) + "\n")
        if args.load:
            self.dropout.set_value(0.0)
            preds = [eval_acc(x, u, wm, wl, sm, sn) for x, u, wm, wl, sm, sn in zip(
                dev_batches_x, dev_batches_u, dev_batches_w_masks, dev_batches_w_lens, dev_batches_sent_maxlen, dev_batches_sent_num)]
            best_dev = self.eval_accuracy(preds, dev_batches_y)
            mse_dev = self.eval_accuracy_mse(preds, dev_batches_y)
            say("\tdev mse = %.4f\taccuracy=%.4f\tbest=%.4f\n" % (
                mse_dev,
                best_dev,
                best_dev
            ))
            preds = [eval_acc(x, u, wm, wl, sm, sn) for x, u, wm, wl, sm, sn in zip(
                test_batches_x, test_batches_u, test_batches_w_masks, test_batches_w_lens, test_batches_sent_maxlen, test_batches_sent_num)]
            nowf_test = self.eval_accuracy(preds, test_batches_y)
            mse_test = self.eval_accuracy_mse(preds, test_batches_y)
            say("\tdev mse = %.4f\ttest accuracy=%.4f\n" % (
                mse_test,
                nowf_test
            ))

        test_a = 0.0
        for epoch in xrange(args.max_epochs):
            unchanged += 1
            if unchanged > 20:
                return
            train_loss = 0.0

            random.shuffle(perm)

            batches_x, batches_y, batches_u, batches_w_masks, batches_w_lens, batches_sent_maxlen, batches_sent_num = create_batches_doc(
                perm, trainx, trainy, trainu, batch_size)
            N = len(batches_x)
            for i in xrange(N):
                self.dropout.set_value(dropout_prob)
                if (i + 1) % 100 == 0:
                    sys.stdout.write("\r%d" % i)
                    sys.stdout.flush()

                x = batches_x[i]
                y = batches_y[i]

                va, grad_norm = train_model(
                    x, y, batches_u[i], batches_w_masks[i], batches_w_lens[i], batches_sent_maxlen[i], batches_sent_num[i])
                train_loss += va

                # debug
                if math.isnan(va):
                    print()
                    print(i - 1, i)
                    print(x)
                    print(y)
                    #print(batches_w_masks[i])
                    #print(batches_w_lens[i])
                    print(batches_sent_maxlen[i])
                    print(batches_sent_num[i])
                    return

                if (i == N - 1) or (eval_period > 0 and (i + 1) % eval_period == 0):
                    self.dropout.set_value(0.0)

                    say("\n")
                    say("Epoch %.3f\tloss=%.4f\t|g|=%s  [%.2fm]\n" % (
                        epoch + (i + 1) / (N + 0.0),
                        train_loss / (i + 1),
                        float(grad_norm),
                        (time.time() - start_time) / 60.0
                    ))
                    say(str(["%.2f" % np.linalg.norm(x.get_value(borrow=True))
                             for x in self.params]) + "\n")

                    if dev:
                        preds = [eval_acc(x, u, wm, wl, sm, sn) for x, u, wm, wl, sm, sn in zip(
                            dev_batches_x, dev_batches_u, dev_batches_w_masks, dev_batches_w_lens, dev_batches_sent_maxlen, dev_batches_sent_num)]
                        nowf_dev = self.eval_accuracy(preds, dev_batches_y)
                        if nowf_dev > best_dev:
                            unchanged = 0
                            best_dev = nowf_dev
                            if args.save:
                                self.save_model(args.save, args)

                        say("\tdev accuracy=%.4f\tbest=%.4f\n" % (
                            nowf_dev,
                            best_dev
                        ))
                        say("\ttest current_accuracy=%.4f\n" % (
                            test_a
                        ))
                        if args.test and nowf_dev == best_dev:
                            preds = [eval_acc(x, u, wm, wl, sm, sn) for x, u, wm, wl, sm, sn in zip(
                                test_batches_x, test_batches_u, test_batches_w_masks, test_batches_w_lens, test_batches_sent_maxlen, test_batches_sent_num)]
                            nowf_test = self.eval_accuracy(
                                preds, test_batches_y)
                            say("\ttest accuracy=%.4f\n" % (
                                nowf_test,
                            ))
                            test_a = nowf_test

                        if best_dev > nowf_dev + 0.5:
                            return

                    self.dropout.set_value(dropout_prob)

                    start_time = time.time()
예제 #14
0
파일: rationale.py 프로젝트: Sundayxr/rcnn
    def train(self, train, dev, test, rationale_data):
        args = self.args
        dropout = self.dropout
        padding_id = self.embedding_layer.vocab_map["<padding>"]

        if dev is not None:
            dev_batches_x, dev_batches_y = myio.create_batches(
                            dev[0], dev[1], args.batch, padding_id
                        )
        if test is not None:
            test_batches_x, test_batches_y = myio.create_batches(
                            test[0], test[1], args.batch, padding_id
                        )
        if rationale_data is not None:
            valid_batches_x, valid_batches_y = myio.create_batches(
                    [ u["xids"] for u in rationale_data ],
                    [ u["y"] for u in rationale_data ],
                    args.batch,
                    padding_id,
                    sort = False
                )

        start_time = time.time()
        train_batches_x, train_batches_y = myio.create_batches(
                            train[0], train[1], args.batch, padding_id
                        )
        say("{:.2f}s to create training batches\n\n".format(
                time.time()-start_time
            ))

        updates_e, lr_e, gnorm_e = create_optimization_updates(
                               cost = self.generator.cost_e,
                               params = self.encoder.params,
                               method = args.learning,
                               lr = args.learning_rate
                        )[:3]


        updates_g, lr_g, gnorm_g = create_optimization_updates(
                               cost = self.generator.cost,
                               params = self.generator.params,
                               method = args.learning,
                               lr = args.learning_rate
                        )[:3]

        sample_generator = theano.function(
                inputs = [ self.x ],
                outputs = self.z_pred,
                #updates = self.generator.sample_updates
                #allow_input_downcast = True
            )

        get_loss_and_pred = theano.function(
                inputs = [ self.x, self.z, self.y ],
                outputs = [ self.generator.loss_vec, self.encoder.preds ]
            )

        eval_generator = theano.function(
                inputs = [ self.x, self.y ],
                outputs = [ self.z, self.generator.obj, self.generator.loss,
                                self.encoder.pred_diff ],
                givens = {
                    self.z : self.generator.z_pred
                },
                #updates = self.generator.sample_updates,
                #no_default_updates = True
            )

        train_generator = theano.function(
                inputs = [ self.x, self.y ],
                outputs = [ self.generator.obj, self.generator.loss, \
                                self.generator.sparsity_cost, self.z, gnorm_g, gnorm_e ],
                givens = {
                    self.z : self.generator.z_pred
                },
                #updates = updates_g,
                updates = updates_g.items() + updates_e.items() #+ self.generator.sample_updates,
                #no_default_updates = True
            )

        eval_period = args.eval_period
        unchanged = 0
        best_dev = 1e+2
        best_dev_e = 1e+2
        dropout_prob = np.float64(args.dropout).astype(theano.config.floatX)

        for epoch in xrange(args.max_epochs):
            unchanged += 1
            if unchanged > 10: return

            train_batches_x, train_batches_y = myio.create_batches(
                            train[0], train[1], args.batch, padding_id
                        )

            processed = 0
            train_cost = 0.0
            train_loss = 0.0
            train_sparsity_cost = 0.0
            p1 = 0.0
            start_time = time.time()

            N = len(train_batches_x)
            for i in xrange(N):
                if (i+1) % 100 == 0:
                    say("\r{}/{}     ".format(i+1,N))

                bx, by = train_batches_x[i], train_batches_y[i]
                mask = bx != padding_id

                cost, loss, sparsity_cost, bz, gl2_g, gl2_e = train_generator(bx, by)

                k = len(by)
                processed += k
                train_cost += cost
                train_loss += loss
                train_sparsity_cost += sparsity_cost
                p1 += np.sum(bz*mask) / (np.sum(mask)+1e-8)

                if (i == N-1) or (eval_period > 0 and processed/eval_period >
                                    (processed-k)/eval_period):
                    say("\n")
                    say(("Generator Epoch {:.2f}  costg={:.4f}  scost={:.4f}  lossg={:.4f}  " +
                        "p[1]={:.2f}  |g|={:.4f} {:.4f}\t[{:.2f}m / {:.2f}m]\n").format(
                            epoch+(i+1.0)/N,
                            train_cost / (i+1),
                            train_sparsity_cost / (i+1),
                            train_loss / (i+1),
                            p1 / (i+1),
                            float(gl2_g),
                            float(gl2_e),
                            (time.time()-start_time)/60.0,
                            (time.time()-start_time)/60.0/(i+1)*N
                        ))
                    say("\t"+str([ "{:.1f}".format(np.linalg.norm(x.get_value(borrow=True))) \
                                    for x in self.encoder.params ])+"\n")
                    say("\t"+str([ "{:.1f}".format(np.linalg.norm(x.get_value(borrow=True))) \
                                    for x in self.generator.params ])+"\n")

                    if dev:
                        self.dropout.set_value(0.0)
                        dev_obj, dev_loss, dev_diff, dev_p1 = self.evaluate_data(
                                dev_batches_x, dev_batches_y, eval_generator, sampling=True)

                        if dev_obj < best_dev:
                            best_dev = dev_obj
                            unchanged = 0
                            if args.dump and rationale_data:
                                self.dump_rationales(args.dump, valid_batches_x, valid_batches_y,
                                            get_loss_and_pred, sample_generator)

                            if args.save_model:
                                self.save_model(args.save_model, args)

                        say(("\tsampling devg={:.4f}  mseg={:.4f}  avg_diffg={:.4f}" +
                                    "  p[1]g={:.2f}  best_dev={:.4f}\n").format(
                            dev_obj,
                            dev_loss,
                            dev_diff,
                            dev_p1,
                            best_dev
                        ))

                        if rationale_data is not None:
                            r_mse, r_p1, r_prec1, r_prec2 = self.evaluate_rationale(
                                    rationale_data, valid_batches_x,
                                    valid_batches_y, eval_generator)
                            say(("\trationale mser={:.4f}  p[1]r={:.2f}  prec1={:.4f}" +
                                        "  prec2={:.4f}\n").format(
                                    r_mse,
                                    r_p1,
                                    r_prec1,
                                    r_prec2
                            ))

                        self.dropout.set_value(dropout_prob)
예제 #15
0
    def train(self, train, dev, test, rationale_data):
        args = self.args
        dropout = self.dropout
        padding_id = self.embedding_layer.vocab_map["<padding>"]

        if dev is not None:
            dev_batches_x, dev_batches_y = myio.create_batches(
                dev[0], dev[1], args.batch, padding_id)
        if test is not None:
            test_batches_x, test_batches_y = myio.create_batches(
                test[0], test[1], args.batch, padding_id)
        if rationale_data is not None:
            valid_batches_x, valid_batches_y = myio.create_batches(
                [u["xids"] for u in rationale_data],
                [u["y"] for u in rationale_data],
                args.batch,
                padding_id,
                sort=False)

        start_time = time.time()
        train_batches_x, train_batches_y = myio.create_batches(
            train[0], train[1], args.batch, padding_id)
        say("{:.2f}s to create training batches\n\n".format(time.time() -
                                                            start_time))

        updates_e, lr_e, gnorm_e = create_optimization_updates(
            cost=self.generator.cost_e,
            params=self.encoder.params,
            method=args.learning,
            lr=args.learning_rate)[:3]

        updates_g, lr_g, gnorm_g = create_optimization_updates(
            cost=self.generator.cost,
            params=self.generator.params,
            method=args.learning,
            lr=args.learning_rate)[:3]

        sample_generator = theano.function(
            inputs=[self.x],
            outputs=self.z_pred,
            #updates = self.generator.sample_updates
            #allow_input_downcast = True
        )

        get_loss_and_pred = theano.function(
            inputs=[self.x, self.z, self.y],
            outputs=[self.generator.loss_vec, self.encoder.preds])

        eval_generator = theano.function(
            inputs=[self.x, self.y],
            outputs=[
                self.z, self.generator.obj, self.generator.loss,
                self.encoder.pred_diff
            ],
            givens={self.z: self.generator.z_pred},
            #updates = self.generator.sample_updates,
            #no_default_updates = True
        )

        train_generator = theano.function(
                inputs = [ self.x, self.y ],
                outputs = [ self.generator.obj, self.generator.loss, \
                                self.generator.sparsity_cost, self.z, gnorm_g, gnorm_e ],
                givens = {
                    self.z : self.generator.z_pred
                },
                #updates = updates_g,
                updates = updates_g.items() | updates_e.items() #+ self.generator.sample_updates,
                #no_default_updates = True
            )

        eval_period = args.eval_period
        unchanged = 0
        best_dev = 1e+2
        best_dev_e = 1e+2
        dropout_prob = np.float64(args.dropout).astype(theano.config.floatX)

        for epoch in range(args.max_epochs):
            unchanged += 1
            if unchanged > 10: return

            train_batches_x, train_batches_y = myio.create_batches(
                train[0], train[1], args.batch, padding_id)

            processed = 0
            train_cost = 0.0
            train_loss = 0.0
            train_sparsity_cost = 0.0
            p1 = 0.0
            start_time = time.time()

            N = len(train_batches_x)
            for i in range(N):
                if (i + 1) % 100 == 0:
                    say("\r{}/{}     ".format(i + 1, N))

                bx, by = train_batches_x[i], train_batches_y[i]
                mask = bx != padding_id

                cost, loss, sparsity_cost, bz, gl2_g, gl2_e = train_generator(
                    bx, by)

                k = len(by)
                processed += k
                train_cost += cost
                train_loss += loss
                train_sparsity_cost += sparsity_cost
                p1 += np.sum(bz * mask) / (np.sum(mask) + 1e-8)

                if (i == N - 1) or (eval_period > 0
                                    and processed / eval_period >
                                    (processed - k) / eval_period):
                    say("\n")
                    say((
                        "Generator Epoch {:.2f}  costg={:.4f}  scost={:.4f}  lossg={:.4f}  "
                        +
                        "p[1]={:.2f}  |g|={:.4f} {:.4f}\t[{:.2f}m / {:.2f}m]\n"
                    ).format(epoch + (i + 1.0) / N, train_cost / (i + 1),
                             train_sparsity_cost / (i + 1),
                             train_loss / (i + 1), p1 / (i + 1), float(gl2_g),
                             float(gl2_e), (time.time() - start_time) / 60.0,
                             (time.time() - start_time) / 60.0 / (i + 1) * N))
                    say("\t"+str([ "{:.1f}".format(np.linalg.norm(x.get_value(borrow=True))) \
                                    for x in self.encoder.params ])+"\n")
                    say("\t"+str([ "{:.1f}".format(np.linalg.norm(x.get_value(borrow=True))) \
                                    for x in self.generator.params ])+"\n")

                    if dev:
                        self.dropout.set_value(0.0)
                        dev_obj, dev_loss, dev_diff, dev_p1 = self.evaluate_data(
                            dev_batches_x,
                            dev_batches_y,
                            eval_generator,
                            sampling=True)

                        if dev_obj < best_dev:
                            best_dev = dev_obj
                            unchanged = 0
                            if args.dump and rationale_data:
                                self.dump_rationales(args.dump,
                                                     valid_batches_x,
                                                     valid_batches_y,
                                                     get_loss_and_pred,
                                                     sample_generator)

                            if args.save_model:
                                self.save_model(args.save_model, args)

                        say((
                            "\tsampling devg={:.4f}  mseg={:.4f}  avg_diffg={:.4f}"
                            + "  p[1]g={:.2f}  best_dev={:.4f}\n").format(
                                dev_obj, dev_loss, dev_diff, dev_p1, best_dev))

                        if rationale_data is not None:
                            r_mse, r_p1, r_prec1, r_prec2 = self.evaluate_rationale(
                                rationale_data, valid_batches_x,
                                valid_batches_y, eval_generator)
                            say((
                                "\trationale mser={:.4f}  p[1]r={:.2f}  prec1={:.4f}"
                                + "  prec2={:.4f}\n").format(
                                    r_mse, r_p1, r_prec1, r_prec2))

                        self.dropout.set_value(dropout_prob)
예제 #16
0
    def train(self, args, train, dev, test=None):
        embedding_layer = self.layers[0]

        dropout_prob = np.float64(args["dropout"]).astype(theano.config.floatX)
        batch_size = args["batch_size"]
        unroll_size = args["unroll_size"]

        train = create_batches(train, embedding_layer.map_to_ids, batch_size)

        dev = create_batches(dev, embedding_layer.map_to_ids, batch_size)

        if test is not None:
            test = create_batches(test, embedding_layer.map_to_ids, batch_size)

        cost = T.sum(self.nll) / self.idxs.shape[1]
        updates, lr, gnorm = create_optimization_updates(
                cost = cost,
                params = self.params,
                lr = args["learning_rate"],
                beta1 = args["beta1"],
                beta2 = args["beta2"],
                rho = args["rho"],
                momentum = args["momentum"],
                gamma = args["gamma"],
                eps = args["eps"],
                method = args["learning"]
            )[:3]
        #if args["learning"] == "adadelta":
        #    lr.set_value(args["learning_rate"])

        train_func = theano.function(
                inputs = [ self.idxs, self.idys, self.init_state ],
                outputs = [cost, self.last_state, gnorm ],
                updates = updates
            )
        eval_func = theano.function(
                inputs = [ self.idxs, self.idys, self.init_state ],
                outputs = [self.nll, self.last_state ]
            )

        N = (len(train[0])-1)/unroll_size + 1
        say(" train: {} tokens, {} mini-batches\n".format(
                len(train[0].ravel()), N
            ))
        say(" dev: {} tokens\n".format(len(dev[0].ravel())))

        say("\tp_norm: {}\n".format(
                self.get_pnorm_stat()
            ))

        decay_lr = args["decay_lr"] and args["learning"].lower() != "adadelta" and \
                    args["learning"].lower() != "adagrad"
        lr_0 = args["learning_rate"]
        iter_cnt = 0

        unchanged = 0
        best_dev = 1e+10
        start_time = 0
        max_epoch = args["max_epoch"]
        for epoch in xrange(max_epoch):
            if unchanged > 5: break
            start_time = time.time()

            prev_state = np.zeros((batch_size, self.n_d*2),
                            dtype=theano.config.floatX)

            train_loss = 0.0
            for i in xrange(N):
                # get current batch
                x = train[0][i*unroll_size:(i+1)*unroll_size]
                y = train[1][i*unroll_size:(i+1)*unroll_size]

                iter_cnt += 1
                if decay_lr:
                    lr.set_value(np.float32(lr_0/iter_cnt**0.5))
                cur_loss, prev_state, grad_norm = train_func(x, y, prev_state)
                train_loss += cur_loss/len(x)

                if math.isnan(cur_loss) or math.isnan(grad_norm):
                    say("\nNaN !!\n")
                    return

                if i % 10 == 0:
                    say("\r{}".format(i))

                if i == N-1:
                    self.dropout.set_value(0.0)
                    dev_preds = self.evaluate(eval_func, dev, batch_size, unroll_size)
                    dev_loss = evaluate_average(
                            predictions = dev_preds,
                            masks = None
                        )
                    dev_ppl = np.exp(dev_loss)
                    self.dropout.set_value(dropout_prob)

                    say("\r\n")
                    say( ( "Epoch={}  lr={:.3f}  train_loss={:.3f}  train_ppl={:.1f}  " \
                        +"dev_loss={:.3f}  dev_ppl={:.1f}\t|g|={:.3f}\t[{:.1f}m]\n" ).format(
                            epoch,
                            float(lr.get_value(borrow=True)),
                            train_loss/N,
                            np.exp(train_loss/N),
                            dev_loss,
                            dev_ppl,
                            float(grad_norm),
                            (time.time()-start_time)/60.0
                        ))
                    say("\tp_norm: {}\n".format(
                            self.get_pnorm_stat()
                        ))

                    # halve the learning rate
                    #if args["learning"] == "sgd" and dev_ppl > best_dev-1:
                    #    lr.set_value(np.max([lr.get_value()/2.0, np.float32(0.0001)]))

                    if dev_ppl < best_dev:
                        best_dev = dev_ppl
                        if test is None: continue
                        self.dropout.set_value(0.0)
                        test_preds = self.evaluate(eval_func, test, batch_size, unroll_size)
                        test_loss = evaluate_average(
                                predictions = test_preds,
                                masks = None
                            )
                        test_ppl = np.exp(test_loss)
                        self.dropout.set_value(dropout_prob)
                        say("\tbest_dev={:.1f}  test_loss={:.3f}  test_ppl={:.1f}\n".format(
                                best_dev, test_loss, test_ppl))
                    if best_dev > 200: unchanged += 1

        say("\n")
예제 #17
0
파일: rationale.py 프로젝트: Two222/rcnn-1
    def train(self, train, dev, test):
        args = self.args
        dropout = self.dropout
        padding_id = self.embedding_layer.vocab_map["<padding>"]

        if dev is not None:
            dev_batches_x, dev_batches_y = myio.create_batches(
                            dev[0], dev[1], args.batch, padding_id
                        )
        if test is not None:
            test_batches_x, test_batches_y = myio.create_batches(
                            test[0], test[1], args.batch, padding_id
                        )

        start_time = time.time()
        train_batches_x, train_batches_y = myio.create_batches(
                            train[0], train[1], args.batch, padding_id
                        )
        say("{:.2f}s to create training batches\n\n".format(
                time.time()-start_time
            ))

        updates_e, lr_e, gnorm_e = create_optimization_updates(
                               cost = self.encoder.cost_e,
                               params = self.encoder.params,
                               method = args.learning,
                               lr = args.learning_rate
                        )[:3]


        updates_g, lr_g, gnorm_g = create_optimization_updates(
                               cost = self.encoder.cost_g,
                               params = self.generator.params,
                               method = args.learning,
                               lr = args.learning_rate
                        )[:3]

        sample_generator = theano.function(
                inputs = [ self.x ],
                outputs = self.z
            )

        get_loss_and_pred = theano.function(
                inputs = [ self.x, self.y ],
                outputs = [ self.encoder.loss_vec, self.encoder.preds, self.z ]
            )

        train_generator = theano.function(
                inputs = [ self.x, self.y ],
                outputs = [ self.encoder.obj, self.encoder.loss, \
                                self.encoder.sparsity_cost, self.z, gnorm_e, gnorm_g ],
                updates = updates_e.items() + updates_g.items(),
            )

        eval_func = theano.function(
                inputs = [ self.x, self.y ],
                outputs = [ self.z, self.encoder.obj, self.true_pos, self.tot_pos, self.tot_true ]
            )

        eval_period = args.eval_period
        unchanged = 0
        best_dev = 1e+2
        best_dev_e = 1e+2
        last_train_avg_cost = None
        last_dev_avg_cost = None
        tolerance = 0.10 + 1e-3
        dropout_prob = np.float64(args.dropout).astype(theano.config.floatX)

        for epoch in xrange(args.max_epochs):
            unchanged += 1
            if unchanged > 50: return

            train_batches_x, train_batches_y = myio.create_batches(
                            train[0], train[1], args.batch, padding_id
                        )

            more = True
            if args.decay_lr:
                param_bak = [ p.get_value(borrow=False) for p in self.params ]

            while more:
                processed = 0
                train_cost = 0.0
                train_loss = 0.0
                train_sparsity_cost = 0.0
                p1 = 0.0
                start_time = time.time()

                N = len(train_batches_x)
                for i in xrange(N):
                    if (i+1) % 100 == 0:
                        say("\r{}/{} {:.2f}       ".format(i+1,N,p1/(i+1)))

                    bx, by = train_batches_x[i], train_batches_y[i]
                    mask = bx != padding_id

                    cost, loss, sparsity_cost, bz, gl2_e, gl2_g = train_generator(bx, by)

                    k = len(by)
                    processed += k
                    train_cost += cost
                    train_loss += loss
                    train_sparsity_cost += sparsity_cost
                    p1 += np.sum(bz*mask) / (np.sum(mask)+1e-8)

                cur_train_avg_cost = train_cost / N

                if dev:
                    self.dropout.set_value(0.0)
                    dev_obj, dev_prec, dev_recall, dev_f1, dev_p1 = self.evaluate_data(
                            dev_batches_x, dev_batches_y, eval_func)
                    self.dropout.set_value(dropout_prob)
                    cur_dev_avg_cost = dev_obj

                more = False
                if args.decay_lr and last_train_avg_cost is not None:
                    if cur_train_avg_cost > last_train_avg_cost*(1+tolerance):
                        more = True
                        say("\nTrain cost {} --> {}\n".format(
                                last_train_avg_cost, cur_train_avg_cost
                            ))
                    if dev and cur_dev_avg_cost > last_dev_avg_cost*(1+tolerance):
                        more = True
                        say("\nDev cost {} --> {}\n".format(
                                last_dev_avg_cost, cur_dev_avg_cost
                            ))

                if more:
                    lr_val = lr_g.get_value()*0.5
                    lr_val = np.float64(lr_val).astype(theano.config.floatX)
                    lr_g.set_value(lr_val)
                    lr_e.set_value(lr_val)
                    say("Decrease learning rate to {}\n".format(float(lr_val)))
                    for p, v in zip(self.params, param_bak):
                        p.set_value(v)
                    continue

                last_train_avg_cost = cur_train_avg_cost
                if dev: last_dev_avg_cost = cur_dev_avg_cost

                say("\n")
                say(("Generator Epoch {:.2f}  costg={:.4f}  scost={:.4f}  lossg={:.4f}  " +
                    "p[1]={:.3f}  |g|={:.4f} {:.4f}\t[{:.2f}m / {:.2f}m]\n").format(
                        epoch+(i+1.0)/N,
                        train_cost / N,
                        train_sparsity_cost / N,
                        train_loss / N,
                        p1 / N,
                        float(gl2_e),
                        float(gl2_g),
                        (time.time()-start_time)/60.0,
                        (time.time()-start_time)/60.0/(i+1)*N
                    ))
                say("\t"+str([ "{:.2f}".format(np.linalg.norm(x.get_value(borrow=True))) \
                                for x in self.encoder.params ])+"\n")
                say("\t"+str([ "{:.2f}".format(np.linalg.norm(x.get_value(borrow=True))) \
                                for x in self.generator.params ])+"\n")

                if dev:
                    if dev_obj < best_dev:
                        best_dev = dev_obj
                        unchanged = 0
                        if args.dump and test:
                            self.dump_rationales(args.dump, test_batches_x, test_batches_y,
                                        get_loss_and_pred, sample_generator)

                    say(("\tdevg={:.4f}  f1g={:.4f}  preg={:.4f}  recg={:.4f}" +
                                "  p[1]g={:.3f}  best_dev={:.4f}\n").format(
                        dev_obj,
                        dev_f1,
                        dev_prec,
                        dev_recall,
                        dev_p1,
                        best_dev
                    ))

                    if test is not None:
                        self.dropout.set_value(0.0)
                        test_obj, test_prec, test_recall, test_f1, test_p1 = self.evaluate_data(
                            test_batches_x, test_batches_y, eval_func)
                        self.dropout.set_value(dropout_prob)
                        say(("\ttestt={:.4f}  f1t={:.4f}  pret={:.4f}  rect={:.4f}" +
                                    "  p[1]t={:.3f}\n").format(
                            test_obj,
                            test_f1,
                            test_prec,
                            test_recall,
                            test_p1
                        ))
예제 #18
0
    def train(self, source_train, target_train, source_ul, target_ul, dev, test):
        args = self.args
        n_domain = 2
        padding_id = self.padding_id

        start_time = time.time()
        if source_train is not None:
            s_train_batches, source_train = io_util.create_batches(
                                source_train, args.batch, padding_id)
            for b in s_train_batches:
                b.append(self.get_domain_ids(domain_id=0, n_domain=n_domain, batch=len(b[1])))
                
        if target_train is not None:
            t_train_batches, target_train = io_util.create_batches(
                                target_train, args.batch, padding_id)
            for b in t_train_batches:
                b.append(self.get_domain_ids(domain_id=1, n_domain=n_domain, batch=len(b[1])))

        if dev is not None:
            dev_batches, dev = io_util.create_batches(
                            dev, args.batch, padding_id
                        )
            for b in dev_batches:
                b.append(self.get_domain_ids(domain_id=0, n_domain=n_domain, batch=len(b[1])))
            tot = 0
            for b in dev_batches:
                tot += len(b[0].T)
            print "dev size:", tot, len(dev)
            
        if test is not None:
            test_batches, test = io_util.create_batches(
                            test, args.batch, padding_id
                        )
            for b in test_batches:
                b.append(self.get_domain_ids(domain_id=1, n_domain=n_domain, batch=len(b[1])))
            tot = 0
            for b in test_batches:
                tot += len(b[0].T)
            print "test size:", tot, len(test)

        print 'load source unlabeled data'        
        s_ul_batches, source_ul = io_util.create_batches(
                            source_ul, args.batch, padding_id, label=False)
        for b in s_ul_batches:
            b.append(self.get_domain_ids(domain_id=0, n_domain=n_domain, batch=len(b[1])))

        print 'load target unlabeled data'        
        t_ul_batches, target_ul = io_util.create_batches(
                            target_ul, args.batch, padding_id, label=False)
        for b in t_ul_batches:
            b.append(self.get_domain_ids(domain_id=1, n_domain=n_domain, batch=len(b[1])))

        say("{:.2f}s to create training batches\n\n".format(
                time.time()-start_time
            ))

        dom_updates, dom_lr, dom_gnorm = create_optimization_updates(
                               cost = self.dom_cost,
                               params = self.dom_params,
                               method = args.learning,
                               lr = args.learning_rate,
                               gsums = self.dom_accums[0],
                               xsums = self.dom_accums[1],
                        )[:3]
                        
        other_updates, other_lr, other_gnorm = create_optimization_updates(
                               cost = self.other_cost_except_dom,
                               params = self.other_params_except_dom,
                               method = args.learning,
                               lr = args.learning_rate,
                               gsums = self.other_accums_except_dom[0],
                               xsums = self.other_accums_except_dom[1],
                        )[:3]
        
        BNupdates = self.cnn_layer.get_updates()
        train_func = theano.function(
                inputs = [ self.s_idxs, self.t_idxs, self.s_idys, self.t_idys, self.s_gold_rels, self.t_gold_rels, \
                          self.s_dom_ids, self.t_dom_ids, self.s_has_lab, self.t_has_lab ],
                outputs = [ self.dom_cost, self.other_cost_except_dom, dom_gnorm, other_gnorm, \
                            self.s_lab_loss, self.t_lab_loss, self.s_rel_loss, self.t_rel_loss, \
                            self.s_dom_loss, self.t_dom_loss, self.s_adv_loss, self.t_adv_loss, self.trans_reg, \
                            self.s_recon_loss, self.t_recon_loss ],
                updates = dom_updates.items() + other_updates.items() + BNupdates,
            )

        s_get_loss_and_pred = theano.function(
                inputs = [ self.s_idxs, self.s_idys, self.s_gold_rels, self.s_dom_ids ],
                outputs = [ self.s_lab_prob, self.s_lab_loss, self.s_rel_loss, self.s_dom_loss, self.s_adv_loss, self.s_recon_loss ]
            )
        t_get_loss_and_pred = theano.function(
                inputs = [ self.t_idxs, self.t_idys, self.t_gold_rels, self.t_dom_ids ],
                outputs = [ self.t_lab_prob, self.t_lab_loss, self.t_rel_loss, self.t_dom_loss, self.t_adv_loss, self.t_recon_loss ]
            )

        unchanged = 0
        best_dev = 0
        dropout_prob = np.float64(args.dropout).astype(theano.config.floatX)
        
        s_ul_batch_ptr = 0
        t_ul_batch_ptr = 0
        s_train_ptr = 0
        t_train_ptr = 0
        test_ptr = 0
        
        print 'Training'
        say("\t"+str([ "{:.1f}".format(np.linalg.norm(x.get_value(borrow=True))) \
                        for x in self.params ])+"\n")
        for epoch in xrange(args.epochs):
            unchanged += 1
            if unchanged > 100: break
                
            s_avg_lab_loss, s_avg_rel_loss, s_avg_dom_loss, s_avg_adv_loss, s_avg_recon_loss = 0.0, 0.0, 0.0, 0.0, 0.0
            t_avg_lab_loss, t_avg_rel_loss, t_avg_dom_loss, t_avg_adv_loss, t_avg_recon_loss = 0.0, 0.0, 0.0, 0.0, 0.0
            avg_dom_cost, avg_other_cost, dom_g, other_g, avg_trans_reg = 0.0, 0.0, 0.0, 0.0, 0.0
            start_time = time.time()

            source_k = self.source_k
            if source_train is not None:
                N = len(s_train_batches) * source_k
            else:
                raise Exception(), "no source training data?"
                
            N_s_ul = len(s_ul_batches)
            N_t_ul = len(t_ul_batches)
            n_s_lab, n_t_lab, n_s_ul, n_t_ul = 0, 0, 0, 0
                
            for t in xrange(N):
                progress = epoch + (t+0.0)/N
                rho_t = 2.0 / (1.0 + np.exp(-0.5*progress)) - 1.0
                rho_t = np.float64(rho_t * args.rho).astype(theano.config.floatX)
                self.rho.set_value(rho_t)
                
                lr_t = args.learning_rate / (1.0 + 0.5*progress) ** 0.75
                lr_t = np.float64(lr_t).astype(theano.config.floatX)
                other_lr.set_value(lr_t)

                s_task = t % source_k
                
                if s_task == 0 and source_train is not None:
                    s_bx, s_by, s_brel, s_bid = s_train_batches[s_train_ptr]
                    s_has_lab = 1
                    s_train_ptr = (s_train_ptr+1)%len(s_train_batches)
                    n_s_lab += 1
                else:
                    s_bx, s_by, s_brel, s_bid = s_ul_batches[s_ul_batch_ptr]
                    s_has_lab = 0
                    s_ul_batch_ptr = (s_ul_batch_ptr+1)%N_s_ul
                    n_s_ul += 1
                    
                t_bx, t_by, t_brel, t_bid = t_ul_batches[t_ul_batch_ptr]
                t_has_lab = 0
                t_ul_batch_ptr = (t_ul_batch_ptr+1)%N_t_ul
                n_t_ul += 1
                    
                dom_cost, other_cost, dom_g, other_g, \
                s_lab_loss, t_lab_loss, s_rel_loss, t_rel_loss, \
                s_dom_loss, t_dom_loss, s_adv_loss, t_adv_loss, trans_reg, \
                s_recon_loss, t_recon_loss = train_func( \
                        s_bx, t_bx, s_by, t_by, s_brel, t_brel, s_bid, t_bid, s_has_lab, t_has_lab)
                    
                avg_dom_cost += dom_cost
                avg_other_cost += other_cost
                avg_trans_reg += trans_reg
                if s_has_lab: s_avg_lab_loss += s_lab_loss
                if t_has_lab: t_avg_lab_loss += t_lab_loss
                s_avg_rel_loss += s_rel_loss
                t_avg_rel_loss += t_rel_loss
                s_avg_dom_loss += s_dom_loss
                t_avg_dom_loss += t_dom_loss
                s_avg_adv_loss += s_adv_loss
                t_avg_adv_loss += t_adv_loss
                s_avg_recon_loss += s_recon_loss
                t_avg_recon_loss += t_recon_loss
                
                say("\r{}/{}/{} {}/{}/{} {}/{}/{} {}/{}/{}/{}    ".format(n_s_lab,s_train_ptr,N, \
                                                                          n_t_lab,t_train_ptr,N, \
                                                                          n_s_ul,s_ul_batch_ptr,N_s_ul, \
                                                                          n_t_ul,t_ul_batch_ptr,test_ptr,N_t_ul))
                    
            say(("Epoch {:.2f}  [{:.2f}m]\n").format(
                        epoch,
                        (time.time()-start_time)/60.0,
                    ))
            say("Source:\t")
            if source_train is not None:
                say(("lab_loss={:.4f}  ").format(s_avg_lab_loss / n_s_lab,))
            say(("rel_loss={:.4f}  dom_loss={:.4f}  adv_loss={:.4f}  recon_loss={:.4f}\n").format(
                    s_avg_rel_loss / N,
                    s_avg_dom_loss / N,
                    s_avg_adv_loss / N,
                    s_avg_recon_loss / N,
                ))
            
            say("Target:\t")
            if target_train is not None:
                say(("lab_loss={:.4f}  ").format(t_avg_lab_loss / n_t_lab,))
            say(("rel_loss={:.4f}  dom_loss={:.4f}  adv_loss={:.4f}  recon_loss={:.4f}\n").format(
                    t_avg_rel_loss / N,
                    t_avg_dom_loss / N,
                    t_avg_adv_loss / N,
                    t_avg_recon_loss / N,
                ))

            say(("Domain cost={:.4f}  |g|={:.4f}  Other cost={:.4f}  |g|={:.4f}  trans_reg={:.4f}\n").format(
                    avg_dom_cost / N,
                    float(dom_g),
                    avg_other_cost / N,
                    float(other_g),
                    avg_trans_reg / N,
                ))
            say("\t"+str([ "{:.1f}".format(np.linalg.norm(x.get_value(borrow=True))) \
                            for x in self.params ])+"\n")

            if dev:
                self.dropout.set_value(0.0)
                self.cnn_layer.set_runmode(1)
                dev_lab_loss, dev_rel_loss, dev_dom_loss, dev_adv_loss, dev_recon_loss, dev_acc, dev_f1 = self.evaluate_data(dev_batches, s_get_loss_and_pred)
                self.dropout.set_value(dropout_prob)
                self.cnn_layer.set_runmode(0)

                if dev_acc > best_dev:
                    best_dev = dev_acc
                    unchanged = 0

                say(("\tdev_lab_loss={:.4f}  dev_rel_loss={:.4f}  dom_loss={:.4f}  adv_loss={:.4f}  recon_loss={:.4f}  dev_acc={:.4f}  dev_f1={}" +
                            "  best_dev={:.4f}\n").format(
                    dev_lab_loss,
                    dev_rel_loss,
                    dev_dom_loss,
                    dev_adv_loss,
                    dev_recon_loss,
                    dev_acc,
                    " ".join(['{:.4f}'.format(x) for x in dev_f1]),
                    best_dev,
                ))
                

            if test:
                self.dropout.set_value(0.0)
                self.cnn_layer.set_runmode(1)
                test_lab_loss, test_rel_loss, test_dom_loss, test_adv_loss, test_recon_loss, test_acc, test_f1 = self.evaluate_data(test_batches, t_get_loss_and_pred)
                self.dropout.set_value(dropout_prob)
                self.cnn_layer.set_runmode(0)
                say(("\ttest_lab_loss={:.4f}  test_rel_loss={:.4f}  dom_loss={:.4f}  adv_loss={:.4f}  recon_loss={:.4f}  test_acc={:.4f}  test_f1={}\n").format(
                    test_lab_loss,
                    test_rel_loss,
                    test_dom_loss,
                    test_adv_loss,
                    test_recon_loss,
                    test_acc,
                    " ".join(['{:.4f}'.format(x) for x in test_f1]),
                ))
예제 #19
0
    def train(self, ids_corpus, train, dev=None, test=None):
        dropout_prob = np.float64(args.dropout).astype(theano.config.floatX)
        batch_size = args.batch_size
        padding_id = self.padding_id

        #train_batches = myio.create_batches(ids_corpus, train, batch_size, padding_id)

        updates, lr, gnorm = create_optimization_updates(
            cost=self.cost,
            params=self.params,
            lr=args.learning_rate,
            method=args.learning)[:3]

        train_func = theano.function(inputs=[self.idts, self.idbs, self.idps],
                                     outputs=[self.cost, self.loss, gnorm],
                                     updates=updates)

        eval_func = theano.function(inputs=[self.idts, self.idbs],
                                    outputs=self.scores,
                                    on_unused_input='ignore')

        say("\tp_norm: {}\n".format(self.get_pnorm_stat()))

        result_table = PrettyTable(
            ["Epoch", "dev MAP", "dev MRR", "dev P@1", "dev P@5"] +
            ["tst MAP", "tst MRR", "tst P@1", "tst P@5"])

        unchanged = 0
        best_dev = -1
        dev_MAP = dev_MRR = dev_P1 = dev_P5 = 0
        test_MAP = test_MRR = test_P1 = test_P5 = 0
        start_time = 0
        max_epoch = args.max_epoch
        for epoch in xrange(max_epoch):
            unchanged += 1
            if unchanged > 15: break

            start_time = time.time()

            train = myio.read_annotations(args.train)
            train_batches = myio.create_batches(ids_corpus,
                                                train,
                                                batch_size,
                                                padding_id,
                                                pad_left=not args.average)
            N = len(train_batches)

            train_loss = 0.0
            train_cost = 0.0

            for i in xrange(N):
                # get current batch
                idts, idbs, idps = train_batches[i]

                cur_cost, cur_loss, grad_norm = train_func(idts, idbs, idps)
                train_loss += cur_loss
                train_cost += cur_cost

                if i % 10 == 0:
                    say("\r{}/{}".format(i, N))

                if i == N - 1:
                    self.dropout.set_value(0.0)

                    if dev is not None:
                        dev_MAP, dev_MRR, dev_P1, dev_P5 = self.evaluate(
                            dev, eval_func)
                    if test is not None:
                        test_MAP, test_MRR, test_P1, test_P5 = self.evaluate(
                            test, eval_func)

                    if dev_MRR > best_dev:
                        unchanged = 0
                        best_dev = dev_MRR
                        result_table.add_row([epoch] + [
                            "%.2f" % x
                            for x in [dev_MAP, dev_MRR, dev_P1, dev_P5] +
                            [test_MAP, test_MRR, test_P1, test_P5]
                        ])
                        if args.save_model:
                            self.save_model(args.save_model)

                    dropout_p = np.float64(args.dropout).astype(
                        theano.config.floatX)
                    self.dropout.set_value(dropout_p)

                    say("\r\n\n")
                    say( ( "Epoch {}\tcost={:.3f}\tloss={:.3f}" \
                        +"\tMRR={:.2f},{:.2f}\t|g|={:.3f}\t[{:.3f}m]\n" ).format(
                            epoch,
                            train_cost / (i+1),
                            train_loss / (i+1),
                            dev_MRR,
                            best_dev,
                            float(grad_norm),
                            (time.time()-start_time)/60.0
                    ))
                    say("\tp_norm: {}\n".format(self.get_pnorm_stat()))

                    say("\n")
                    say("{}".format(result_table))
                    say("\n")
예제 #20
0
파일: rationale.py 프로젝트: Sundayxr/rcnn
    def train(self, ids_corpus, train, dev=None, test=None):
        dropout_prob = np.float64(args.dropout).astype(theano.config.floatX)
        batch_size = args.batch_size
        padding_id = self.padding_id

        #train_batches = myio.create_batches(ids_corpus, train, batch_size, padding_id)

        if dev is not None:
            dev, dev_raw = dev
        if test is not None:
            test, test_raw = test

        if args.joint:
            updates_e, lr_e, gnorm_e = create_optimization_updates(
                    cost = self.encoder.cost_e, #self.encoder.cost,
                    params = self.encoder.params,
                    lr = args.learning_rate*0.1,
                    method = args.learning
                )[:3]
        else:
            updates_e = {}

        updates_g, lr_g, gnorm_g = create_optimization_updates(
                cost = self.encoder.cost_g,
                params = self.generator.params,
                lr = args.learning_rate,
                method = args.learning
            )[:3]

        train_func = theano.function(
                inputs = [ self.x, self.triples, self.pairs ],
                outputs = [ self.encoder.obj, self.encoder.loss, \
                        self.encoder.sparsity_cost, self.generator.p1, gnorm_g ],
                updates = updates_g.items() + updates_e.items() + self.generator.sample_updates,
                #no_default_updates = True,
                on_unused_input= "ignore"
            )

        eval_func = theano.function(
                inputs = [ self.x ],
                outputs = self.encoder.scores
            )

        eval_func2 = theano.function(
                inputs = [ self.x ],
                outputs = [ self.encoder.scores_z, self.generator.p1, self.z ],
                updates = self.generator.sample_updates,
                #no_default_updates = True
            )


        say("\tp_norm: {}\n".format(
                self.get_pnorm_stat(self.encoder.params)
            ))
        say("\tp_norm: {}\n".format(
                self.get_pnorm_stat(self.generator.params)
            ))

        result_table = PrettyTable(["Epoch", "dev MAP", "dev MRR", "dev P@1", "dev P@5"] +
                                    ["tst MAP", "tst MRR", "tst P@1", "tst P@5"])
        last_train_avg_cost = None
        tolerance = 0.5 + 1e-3
        unchanged = 0
        best_dev = -1
        dev_MAP = dev_MRR = dev_P1 = dev_P5 = 0
        test_MAP = test_MRR = test_P1 = test_P5 = 0
        start_time = 0
        max_epoch = args.max_epoch
        for epoch in xrange(max_epoch):
            unchanged += 1
            if unchanged > 20: break

            start_time = time.time()

            train = myio.read_annotations(args.train)
            train_batches = myio.create_batches(ids_corpus, train, batch_size,
                                    padding_id, pad_left=not args.average, merge=args.merge)
            N =len(train_batches)

            more = True
            param_bak = [ p.get_value(borrow=False) for p in self.params ]

            while more:

                train_loss = 0.0
                train_cost = 0.0
                train_scost = 0.0
                train_p1 = 0.0

                for i in xrange(N):
                    # get current batch
                    idts, triples, pairs = train_batches[i]

                    cur_cost, cur_loss, cur_scost, cur_p1, gnormg = train_func(idts,
                                                                                triples, pairs)
                    train_loss += cur_loss
                    train_cost += cur_cost
                    train_scost += cur_scost
                    train_p1 += cur_p1

                    if i % 10 == 0:
                        say("\r{}/{} {:.3f}".format(i,N,train_p1/(i+1)))

                cur_train_avg_cost = train_cost / N
                more = False
                if last_train_avg_cost is not None:
                    if cur_train_avg_cost > last_train_avg_cost*(1+tolerance):
                        more = True
                        say("\nTrain cost {} --> {}\n".format(
                                last_train_avg_cost, cur_train_avg_cost
                            ))

                if more:
                    lr_val = lr_g.get_value()*0.5
                    if lr_val < 1e-5: return
                    lr_val = np.float64(lr_val).astype(theano.config.floatX)
                    lr_g.set_value(lr_val)
                    lr_e.set_value(lr_val)
                    say("Decrease learning rate to {}\n".format(float(lr_val)))
                    for p, v in zip(self.params, param_bak):
                        p.set_value(v)
                    continue

                last_train_avg_cost = cur_train_avg_cost

                say("\r\n\n")
                say( ( "Epoch {}  cost={:.3f}  loss={:.3f}  scost={:.3f}" \
                    +"  P[1]={:.3f}  |g|={:.3f}\t[{:.3f}m]\n" ).format(
                        epoch,
                        train_cost / N,
                        train_loss / N,
                        train_scost / N,
                        train_p1 / N,
                        float(gnormg),
                        (time.time()-start_time)/60.0
                ))
                say("\tp_norm: {}\n".format(
                        self.get_pnorm_stat(self.encoder.params)
                    ))
                say("\tp_norm: {}\n".format(
                        self.get_pnorm_stat(self.generator.params)
                    ))

                self.dropout.set_value(0.0)

                if dev is not None:
                    full_MAP, full_MRR, full_P1, full_P5 = self.evaluate(dev, eval_func)
                    dev_MAP, dev_MRR, dev_P1, dev_P5, dev_PZ1, dev_PT = self.evaluate_z(dev,
                            dev_raw, ids_corpus, eval_func2)

                if test is not None:
                    test_MAP, test_MRR, test_P1, test_P5, test_PZ1, test_PT = \
                            self.evaluate_z(test, test_raw, ids_corpus, eval_func2)

                if dev_MAP > best_dev:
                    best_dev = dev_MAP
                    unchanged = 0

                say("\n")
                say("  fMAP={:.2f} fMRR={:.2f} fP1={:.2f} fP5={:.2f}\n".format(
                        full_MAP, full_MRR,
                        full_P1, full_P5
                    ))

                say("\n")
                say(("  dMAP={:.2f} dMRR={:.2f} dP1={:.2f} dP5={:.2f}" +
                     " dP[1]={:.3f} d%T={:.3f} best_dev={:.2f}\n").format(
                        dev_MAP, dev_MRR,
                        dev_P1, dev_P5,
                        dev_PZ1, dev_PT, best_dev
                    ))

                result_table.add_row(
                        [ epoch ] +
                        [ "%.2f" % x for x in [ dev_MAP, dev_MRR, dev_P1, dev_P5 ] +
                                    [ test_MAP, test_MRR, test_P1, test_P5 ] ]
                    )

                if unchanged == 0:
                    say("\n")
                    say(("  tMAP={:.2f} tMRR={:.2f} tP1={:.2f} tP5={:.2f}" +
                        " tP[1]={:.3f} t%T={:.3f}\n").format(
                        test_MAP, test_MRR,
                        test_P1, test_P5,
                        test_PZ1, test_PT
                    ))
                    if args.dump_rationale:
                        self.evaluate_z(dev+test, dev_raw+test_raw, ids_corpus,
                                eval_func2, args.dump_rationale)

                    #if args.save_model:
                    #    self.save_model(args.save_model)

                dropout_p = np.float64(args.dropout).astype(
                            theano.config.floatX)
                self.dropout.set_value(dropout_p)

                say("\n")
                say("{}".format(result_table))
                say("\n")

            if train_p1/N <= 1e-4 or train_p1/N+1e-4 >= 1.0:
                break