Esempio n. 1
0
 def train_batch(self, triples, dev, test, batch_size=500, max_iter=500, C=5):
     neg_entity_list = dict()
     for _, e2, r in triples:
         if r not in neg_entity_list:
             neg_entity_list[r] = list()
         neg_entity_list[r].append(e2)
     train_data = np.zeros((len(triples), 3 + C), dtype=np.int64)
     for triple_index in xrange(len(triples)):
         e1_index, e2_index, r_index = triples[triple_index]
         train_data[triple_index][0] = e1_index
         train_data[triple_index][1] = e2_index
         train_data[triple_index][2] = r_index
         for j in xrange(C):
             train_data[triple_index][3 + j] = np.random.choice(neg_entity_list[r_index])
     train_indexs = align_batch_size(range(train_data.shape[0]), batch_size=batch_size)
     num_batch = len(train_indexs) / batch_size
     for i in xrange(max_iter):
         for triple_index in xrange(len(triples)):
             _, _, r_index = triples[triple_index]
             train_data[triple_index][2] = np.random.choice(neg_entity_list[r_index])
         np.random.permutation(train_indexs)
         loss = 0
         max_margin_loss = 0
         l2_loss = 0
         for j in xrange(num_batch):
             losses = self.train_batch_instance(train_data[train_indexs[j * batch_size: (j + 1) * batch_size], 0],
                                                train_data[train_indexs[j * batch_size: (j + 1) * batch_size], 1],
                                                train_data[train_indexs[j * batch_size: (j + 1) * batch_size], 2],
                                                train_data[train_indexs[j * batch_size: (j + 1) * batch_size], 3])
             loss += losses[0]
             max_margin_loss += losses[1]
             l2_loss += losses[2]
         dev_acc, test_acc = self.accuracy(dev, test)
         logger.info("Iter %d: train loss: %s, margin loss: %s, l2 loss: %s" % (i + 1, loss, max_margin_loss, l2_loss))
         logger.info("Iter %d: dev acc: %s, test acc: %s" % (i + 1, dev_acc, test_acc))
Esempio n. 2
0
 def predict(x, predict_function, batch_size=100):
     x_length = np.sum(x > 0, axis=1)
     predict_indexs = align_batch_size(range(len(x)), batch_size)
     num_batch = len(predict_indexs) / batch_size
     predict = list()
     for i in xrange(num_batch):
         indexs = predict_indexs[i * batch_size:(i + 1) * batch_size]
         max_len = np.max(x_length[indexs])
         p1 = predict_function(x[indexs], max_len)[0]
         predict.append(p1)
     return np.concatenate(predict)[:len(x)]
Esempio n. 3
0
 def train(self, train, dev, test):
     train_x, train_y = train
     dev_x, dev_y = dev
     test_x, test_y = test
     self.set_gpu_data(train, dev, test)
     train_index = align_batch_size(range(len(train_x)), self.batch_size)
     dev_index = align_batch_size(range(len(dev_x)), self.batch_size)
     test_index = align_batch_size(range(len(test_x)), self.batch_size)
     num_batch = len(train_index) / self.batch_size
     batch_list = range(num_batch)
     from sklearn.metrics import accuracy_score
     logger.info("epoch_num train_loss train_acc dev_acc test_acc")
     dev_result = list()
     test_result = list()
     for j in xrange(25):
         loss_list = list()
         batch_list = np.random.permutation(batch_list)
         for i in batch_list:
             indexs = train_index[i * self.batch_size:(i + 1) *
                                  self.batch_size]
             output, loss = self.train_batch(indexs)
             loss_list.append(loss)
         train_pred = self.predict(train_x, train_index,
                                   self.pred_train_batch)
         dev_pred = self.predict(dev_x, dev_index, self.pred_dev_batch)
         test_pred = self.predict(test_x, test_index, self.pred_test_batch)
         train_acc = accuracy_score(train_y, train_pred)
         dev_acc = accuracy_score(dev_y, dev_pred)
         test_acc = accuracy_score(test_y, test_pred)
         dev_result.append(dev_acc)
         test_result.append(test_acc)
         logger.info(
             "epoch %d, loss: %f, train: %f, dev: %f, test: %f" %
             (j, float(np.mean(loss_list)), train_acc, dev_acc, test_acc))
     best_dev_index = np.argmax(dev_result)
     print "Best Dev:", dev_result[best_dev_index], "Test:", test_result[
         best_dev_index]
Esempio n. 4
0
 def predict(self, x):
     self.test_x.set_value(x)
     predict_indexs = align_batch_size(range(len(x)), self.batch_size)
     num_batch = len(predict_indexs) / self.batch_size
     predict = [list() for i in xrange(self.num_task)]
     for i in xrange(num_batch):
         indexs = predict_indexs[i * self.batch_size:(i + 1) *
                                 self.batch_size]
         for task_index in xrange(self.num_task):
             predict[task_index].append(
                 self.pred_test_batch_list[task_index](indexs))
     predict = [
         np.concatenate(predict[task_index])
         for task_index in xrange(self.num_task)
     ]
     self.test_x.set_value(None)
     return predict
Esempio n. 5
0
    def train_relation(self, triples, dev, test, max_iter=50, C=1, batch_size=2000):
        neg_entity_list = dict()
        for _, e2, r in triples:
            if r not in neg_entity_list:
                neg_entity_list[r] = list()
            neg_entity_list[r].append(e2)
        '''train_data = np.zeros((len(triples), 3 + C), dtype=np.int64)
        logger.info("Start Generate Negative Instance ...")
        for triple_index in xrange(len(triples)):
            e1_index, e2_index, r_index = triples[triple_index]
            train_data[triple_index][0] = e1_index
            train_data[triple_index][1] = e2_index
            train_data[triple_index][2] = r_index
            for j in xrange(C):
                train_data[triple_index][3 + j] = np.random.choice(neg_entity_list[r_index])'''
        import cPickle
        out = open("data.txt", 'rb')
        data = cPickle.load(out)
        out.close()
        logger.info("Finish Generate Negative Instance")
        params_size = list()
        params_shape = list()
        for param in self.params:
            shape = param.get_value().shape
            params_size.append(np.prod(shape))
            params_shape.append(shape)
        iter_index = [0]

        train_data = None
        dev_acc_list = list()
        test_acc_list = list()

        def minimize_me(vars):
            # unpack param
            vars_index = 0
            for param, size, shape in zip(self.params, params_size, params_shape):
                param.set_value(vars[vars_index: vars_index + size].reshape(shape))
                vars_index += size
            # get loss and gradients from theano function
            grad = np.zeros(np.sum(params_size))
            loss, max_margin_loss, l2_loss = 0, 0, 0
            for relation_index in xrange(self.relation_num):
                train_relation_data = train_data[train_data[:, 2] == relation_index, :]
                for c_index in xrange(C):
                    losses = self.grad_relation_margin(train_relation_data[:, 0],
                                                       train_relation_data[:, 1],
                                                       train_relation_data[:, 3 + c_index],
                                                       relation_index,
                                                       )
                    max_margin_loss += losses[0]
                    dloss = np.concatenate([param.ravel() for param in losses[1: ]])
                    grad += dloss
            grad = grad / train_data.shape[0]
            max_margin_loss = max_margin_loss / train_data.shape[0]
            losses = self.grad_l2()
            l2_loss = losses[0]
            dloss = np.concatenate([param.ravel() for param in losses[1: ]])
            grad += dloss
            loss = max_margin_loss + l2_loss
            # fmin_l_bfgs_b needs double precision...
            return loss.astype('float64'), grad.astype('float64')

        def test_me(x):
            iter_index[0] += 1
            loss, max_margin_loss, l2_loss = 0, 0, 0
            for relation_index in xrange(self.relation_num):
                train_relation_data = train_data[train_data[:, 2] == relation_index, :]
                for c_index in xrange(C):
                    losses = self.forward_relation_margin(train_relation_data[:, 0],
                                                          train_relation_data[:, 1],
                                                          train_relation_data[:, 3 + c_index],
                                                          relation_index,
                                                          )
                    max_margin_loss += losses[0]
            max_margin_loss = max_margin_loss / train_data.shape[0]
            losses = self.forward_l2()
            l2_loss = losses[0]
            loss = max_margin_loss + l2_loss
            logger.info("Iter %d: train loss: %s, margin loss: %s, l2 loss: %s" % (iter_index[0], loss,
                                                                                   max_margin_loss, l2_loss))
            dev_acc, test_acc = self.accuracy(dev, test)
            dev_acc_list.append(dev_acc)
            test_acc_list.append(test_acc)
            logger.info("Iter %d: dev acc: %s, test acc: %s" % (iter_index[0], dev_acc, test_acc))

        print "Start Minimize"
        DEFAULT_LBFGS_PARAMS = dict(iprint=0, factr=10, maxfun=1e4, maxiter=5)
        from scipy.optimize import fmin_l_bfgs_b
        train_indexs = align_batch_size(range(data.shape[0]), batch_size=batch_size)
        for i in xrange(max_iter):
            np.random.permutation(train_indexs)
            train_index = train_indexs[:batch_size]
            vars = np.concatenate([param.get_value().ravel() for param in self.params])
            train_data = data[train_index]
            best, bestval, info = fmin_l_bfgs_b(minimize_me, vars, **DEFAULT_LBFGS_PARAMS)
            vars_index = 0
            for param, size, shape in zip(self.params, params_size, params_shape):
                param.set_value(best[vars_index: vars_index + size].reshape(shape))
                vars_index += size
            test_me(None)
        dev_acc_list, test_acc_list = np.array(dev_acc_list), np.array(test_acc_list)
        max_index = np.argmax(dev_acc_list)
        logger.info("Max Dev Acc Iter %s: dev acc: %s, test acc: %s" % (max_index + 1, dev_acc_list[max_index],
                                                                        test_acc_list[max_index]))
Esempio n. 6
0
    def train(self, train, test, dev=None, iter_num=25):

        task_num = train[1].shape[1]

        if dev is None:
            train_part = int(round(train[0].shape[0] * 0.9))
            train_x, dev_x = train[0][:train_part], train[0][train_part:]
            train_y, dev_y = train[1][:train_part], train[1][train_part:]
        else:
            train_x, train_y = train
            dev_x, dev_y = dev
        test_x, test_y = test

        # Each task have different valid instances
        # valid_train_indexs element: Distinct Valid instances
        # train_indexs       element: actual train indexs
        valid_train_indexs = self.get_train_valid_index(train_y)
        train_indexs = self.get_train_valid_index(train_y)
        train_indexs = [
            align_batch_size(indexs, self.batch_size)
            for indexs in train_indexs
        ]

        valid_dev_indexs = self.get_train_valid_index(dev_y)
        dev_indexs = self.get_train_valid_index(dev_y)
        dev_indexs = [
            align_batch_size(indexs, self.batch_size) for indexs in dev_indexs
        ]

        valid_test_indexs = self.get_train_valid_index(test_y)
        test_indexs = self.get_train_valid_index(test_y)
        test_indexs = [
            align_batch_size(indexs, self.batch_size) for indexs in test_indexs
        ]

        self.set_gpu_data([train_x, train_y], [dev_x], [test_x])

        # train_list each element: (train_batch_index, task_index)
        train_batch_task_list = list()
        batch_task_count = [0, 0, 0]
        for task_index in xrange(task_num):
            for task_batch_index in xrange(
                    len(train_indexs[task_index]) / self.batch_size):
                batch_task_count[task_index] += 1
                train_batch_task_list.append((task_batch_index, task_index))
        task_info = "\n".join([
            "Task %d: Batch %d" % (task_index, batch_task_count[task_index])
            for task_index in xrange(task_num)
        ])
        logger.info(task_info)
        logger.info("epoch_num train_loss train_acc dev_acc test_acc")
        over_task_dev_acc = [list() for i in xrange(task_num)]
        over_task_test_acc = [list() for i in xrange(task_num)]
        for j in xrange(iter_num):
            losses_list = list()
            for task_index in xrange(task_num):
                losses_list.append(list())
            np.random.shuffle(train_batch_task_list)
            set_dropout_on(True)
            for batch_index, task_index in train_batch_task_list:
                indexs = train_indexs[task_index][batch_index *
                                                  self.batch_size:
                                                  (batch_index + 1) *
                                                  self.batch_size]
                output, loss = self.train_batch_list[task_index](indexs)
                losses_list[task_index].append(loss)
            logger.info("epoch %d" % j)
            set_dropout_on(False)
            for task_index in xrange(task_num):
                train_true = train_y[valid_train_indexs[task_index],
                                     task_index]
                dev_true = dev_y[valid_dev_indexs[task_index], task_index]
                test_true = test_y[valid_test_indexs[task_index], task_index]
                # Align Pred ( means duplicate pred in it
                train_pred = self.predict_model_data(
                    train_indexs[task_index],
                    self.pred_train_batch_list[task_index])
                dev_pred = self.predict_model_data(
                    dev_indexs[task_index],
                    self.pred_dev_batch_list[task_index])
                test_pred = self.predict_model_data(
                    test_indexs[task_index],
                    self.pred_test_batch_list[task_index])

                train_acc = accuracy_score(train_true,
                                           train_pred[:len(train_true)])
                dev_acc = accuracy_score(dev_true, dev_pred[:len(dev_true)])
                test_acc = accuracy_score(test_true,
                                          test_pred[:len(test_true)])

                over_task_dev_acc[task_index].append(dev_acc)
                over_task_test_acc[task_index].append(test_acc)

                output_string = "Task %d: Error: %s Acc: %f %f %f" % (
                    task_index, np.mean(
                        losses_list[task_index]), train_acc, dev_acc, test_acc)
                logger.info(output_string)
        max_indexs = [
            np.argmax(over_task_dev_acc[task_index])
            for task_index in xrange(task_num)
        ]
        max_test_score = np.mean([
            over_task_test_acc[task_index][max_index]
            for max_index, task_index in zip(max_indexs, xrange(task_num))
        ])
        return max_test_score
Esempio n. 7
0
    def train(self, train, dev, test=None, iter_num=25):
        train_x, train_y = train
        dev_x, dev_y = dev
        test_x, test_y = None, None
        task_num = train_y.shape[1]

        # Each task have different valid instances
        # valid_train_indexs element: Distinct Valid instances
        # train_indexs       element: actual train indexs
        valid_train_indexs = self.get_train_valid_index(train_y)
        train_indexs = self.get_train_valid_index(train_y)
        train_indexs = [
            align_batch_size(indexs, self.batch_size)
            for indexs in train_indexs
        ]
        valid_dev_indexs = self.get_train_valid_index(dev_y)
        dev_indexs = self.get_train_valid_index(dev_y)
        dev_indexs = [
            align_batch_size(indexs, self.batch_size) for indexs in dev_indexs
        ]

        # Consider test data case
        if test is not None:
            test_x, test_y = test
            valid_test_indexs = self.get_train_valid_index(test_y)
            test_indexs = self.get_train_valid_index(test_y)
            test_indexs = [
                align_batch_size(indexs, self.batch_size)
                for indexs in test_indexs
            ]
            self.set_gpu_data(train, dev, test)
        else:
            self.set_gpu_data(train, dev)

        # train_list each element: (train_batch_index, task_index)
        train_batch_task_list = list()
        batch_task_count = [0, 0, 0]
        for task_index in xrange(task_num):
            for task_batch_index in xrange(
                    len(train_indexs[task_index]) / self.batch_size):
                batch_task_count[task_index] += 1
                train_batch_task_list.append((task_batch_index, task_index))
        task_info = "\n".join([
            "Task %d: Batch %d" % (task_index, batch_task_count[task_index])
            for task_index in xrange(task_num)
        ])
        logger.info(task_info)
        logger.info("epoch_num train_loss train_acc dev_acc test_acc")
        over_task_dev_acc = [list() for i in xrange(task_num)]
        predict_result = list()
        for j in xrange(iter_num):
            losses_list = list()
            for task_index in xrange(task_num):
                losses_list.append(list())
            np.random.shuffle(train_batch_task_list)
            set_dropout_on(True)
            for batch_index, task_index in train_batch_task_list:
                indexs = train_indexs[task_index][batch_index *
                                                  self.batch_size:
                                                  (batch_index + 1) *
                                                  self.batch_size]
                output, loss = self.train_batch_list[task_index](indexs)
                losses_list[task_index].append(loss)
            logger.info("epoch %d" % j)
            set_dropout_on(False)
            for task_index in xrange(task_num):
                train_true = train_y[valid_train_indexs[task_index],
                                     task_index]
                dev_true = dev_y[valid_dev_indexs[task_index], task_index]
                # Align Pred ( means duplicate pred in it
                train_pred = self.predict_model_data(
                    train_indexs[task_index],
                    self.pred_train_batch_list[task_index])
                dev_pred = self.predict_model_data(
                    dev_indexs[task_index],
                    self.pred_dev_batch_list[task_index])
                train_pred = train_pred[:len(train_true)]
                dev_pred = dev_pred[:len(dev_true)]
                train_acc = accuracy_score(train_true, train_pred)
                dev_acc = accuracy_score(dev_true, dev_pred)
                over_task_dev_acc[task_index].append(dev_acc)
                output_string = "Task %d: Error: %s Acc: %f %f " % (
                    task_index, np.mean(
                        losses_list[task_index]), train_acc, dev_acc)
                if test is not None:
                    test_true = test_y[valid_test_indexs[task_index],
                                       task_index]
                    test_pred = self.predict_model_data(
                        test_indexs[task_index],
                        self.pred_test_batch_list[task_index])
                    test_pred = test_pred[:len(test_true)]
                    test_acc = accuracy_score(test_true, test_pred)
                    output_string += "%d " % test_acc
                logger.info(output_string)
            if to_predict is not None:
                predict_result.append(self.predict(to_predict))
        max_indexs = [
            np.argmax(over_task_dev_acc[task_index])
            for task_index in xrange(task_num)
        ]
        max_dev_score = np.mean([
            over_task_dev_acc[task_index][max_index]
            for max_index, task_index in zip(max_indexs, xrange(task_num))
        ])
        if to_predict is not None:
            max_predict_result = [
                predict_result[max_indexs[task_index]][task_index]
                for task_index in xrange(task_num)
            ]
        return max_dev_score
Esempio n. 8
0
    def train(self,
              train,
              dev=None,
              test=None,
              to_predict=None,
              max_iter=5,
              batch_size=128,
              test_batch_size=1000,
              pre_test_batch=25,
              predict_path=None):
        train_x, train_y = train
        self.set_train_data(train)
        train_index = align_batch_size(range(len(train_y)), batch_size)
        train_x_length = np.sum((train_x > 0), axis=1)
        num_batch = len(train_index) / batch_size
        batch_list = range(num_batch)
        log_loss_history, acc_history = list(), list()
        batch_log_loss_history, batch_acc_history = list(), list()
        logger.info("start training")
        batch_count = 0
        best_dev_acc = 0
        for i in xrange(max_iter):
            iter_loss_list = list()
            iter_acc_list = list()
            batch_list = np.random.permutation(batch_list)
            for j in batch_list:
                set_dropout_on(True)
                batch_count += 1
                indexs = train_index[j * batch_size:(j + 1) * batch_size]
                max_len = np.max(train_x_length[indexs])
                self.train_batch(indexs, max_len)
                if batch_count % pre_test_batch == 0:
                    set_dropout_on(False)
                    batch_log_loss, batch_acc = [batch_count], [batch_count]
                    if dev is not None:
                        dev_x, dev_y = dev
                        dev_acc, dev_log_loss = self.predict_data_log_loss_acc(
                            dev_x, dev_y, test_batch_size)
                        batch_log_loss.append(dev_log_loss)
                        batch_acc.append(dev_acc)
                        if dev_acc > best_dev_acc:
                            best_dev_acc = dev_acc
                            save_model("model/%s.best.model" % predict_path,
                                       self)
                        logger.info("batch %d,   dev log loss %s, acc %s" %
                                    (batch_count, dev_log_loss, dev_acc))
                    if test is not None:
                        test_x, test_y = test
                        test_acc, test_log_loss = self.predict_data_log_loss_acc(
                            test_x, test_y, test_batch_size)
                        batch_log_loss.append(test_log_loss)
                        batch_acc.append(test_acc)
                        logger.info("batch %d,  test log loss %s, acc %s" %
                                    (batch_count, test_log_loss, test_acc))
                    batch_log_loss_history.append(batch_log_loss)
                    batch_acc_history.append(batch_acc)
            set_dropout_on(False)
            train_acc, train_log_loss = self.predict_data_log_loss_acc(
                train_x, train_y, test_batch_size)
            iter_loss_list.append(train_log_loss)
            iter_acc_list.append(train_acc)
            iter_l2_loss, iter_l2_norm = self.get_l2_loss()
            logger.info("epoch %d, param l2 losss %s, l2 norm %s" %
                        (i, iter_l2_loss, iter_l2_norm))
            logger.info("epoch %d, train log loss %s, acc %s" %
                        (i, train_log_loss, train_acc))
            if dev is not None:
                dev_x, dev_y = dev
                dev_acc, dev_log_loss = self.predict_data_log_loss_acc(
                    dev_x, dev_y, test_batch_size)
                logger.info("epoch %d,   dev log loss %s, acc %s" %
                            (i, dev_log_loss, dev_acc))
                if dev_acc > best_dev_acc:
                    best_dev_acc = dev_acc
                    save_model("model/%s.best.model" % predict_path, self)
                iter_loss_list.append(dev_log_loss)
                iter_acc_list.append(dev_acc)
            if test is not None:
                test_x, test_y = test
                test_acc, test_log_loss = self.predict_data_log_loss_acc(
                    test_x, test_y, test_batch_size)
                logger.info("epoch %d,  test log loss %s, acc %s" %
                            (i, test_log_loss, test_acc))
                iter_loss_list.append(test_log_loss)
                iter_acc_list.append(test_acc)
            log_loss_history.append(iter_loss_list)
            acc_history.append(iter_acc_list)

        # Log Best Epoch
        log_loss_history = np.array(log_loss_history)
        acc_history = np.array(acc_history)

        # Log Best Batch
        batch_log_loss_history = np.array(batch_log_loss_history)
        batch_acc_history = np.array(batch_acc_history)
        self.log_to_file("Epoch", log_loss_history, acc_history)
        self.log_to_file("Batch", batch_log_loss_history, batch_acc_history)
        save_model("model/%s.final.model" % predict_path, self)
Esempio n. 9
0
    def cross_validation_train(self,
                               data,
                               cv_times=5,
                               max_iter=5,
                               batch_size=128,
                               test_batch_size=1000,
                               pre_test_batch=25,
                               model_path=""):
        data_x, data_y = data
        cv_i = -1
        cv_epoch_log_loss = list()
        cv_epoch_acc = list()
        cv_batch_log_loss = list()
        cv_batch_acc = list()
        self.save_model_param_to_file("model/%s.cv_init.param.model" %
                                      model_path)
        for train_index, dev_index, test_index in generate_cross_validation_index(
                data_x, cv_times=cv_times, dev_ratio=DEV_RATIO, random=True):
            self.load_model_param_from_file("model/%s.cv_init.param.model" %
                                            model_path)
            logger.debug("Train Size %s, Dev Size %s, Test Size %s" %
                         (train_index.shape[0], dev_index.shape[0],
                          test_index.shape[0]))
            cv_i += 1
            train_x, train_y = data_x[train_index], data_y[train_index]
            dev_x, dev_y = data_x[dev_index], data_y[dev_index]
            test_x, test_y = data_x[test_index], data_y[test_index]
            self.set_train_data([train_x, train_y])
            train_index = align_batch_size(range(len(train_y)), batch_size)
            train_x_length = np.sum((train_x > 0), axis=1)
            num_batch = len(train_index) / batch_size
            batch_list = range(num_batch)
            log_loss_history, acc_history = list(), list()
            batch_log_loss_history, batch_acc_history = list(), list()
            logger.info("start training")
            batch_count = 0
            for i in xrange(max_iter):
                iter_loss_list = list()
                iter_acc_list = list()
                batch_list = np.random.permutation(batch_list)
                for j in batch_list:
                    set_dropout_on(True)
                    batch_count += 1
                    indexs = train_index[j * batch_size:(j + 1) * batch_size]
                    max_len = np.max(train_x_length[indexs])
                    self.train_batch(indexs, max_len)
                    if batch_count % pre_test_batch == 0:
                        set_dropout_on(False)
                        dev_acc, dev_log_loss = self.predict_data_log_loss_acc(
                            dev_x, dev_y, test_batch_size)
                        logger.info(
                            "cv %d batch %d,   dev log loss %s, acc %s" %
                            (cv_i, batch_count, dev_log_loss, dev_acc))
                        test_acc, test_log_loss = self.predict_data_log_loss_acc(
                            test_x, test_y, test_batch_size)
                        logger.info(
                            "cv %d batch %d,  test log loss %s, acc %s" %
                            (cv_i, batch_count, test_log_loss, test_acc))
                        batch_log_loss_history.append([
                            batch_count,
                            dev_log_loss,
                            test_log_loss,
                        ])
                        batch_acc_history.append(
                            [batch_count, dev_acc, test_acc])
                set_dropout_on(False)
                train_acc, train_log_loss = self.predict_data_log_loss_acc(
                    train_x, train_y, test_batch_size)
                iter_loss_list.append(train_log_loss)
                iter_acc_list.append(train_acc)
                iter_l2_loss, iter_l2_norm = self.get_l2_loss()
                logger.info("cv %d epoch %d, param l2 losss %s, l2 norm %s" %
                            (cv_i, i, iter_l2_loss, iter_l2_norm))
                logger.info("cv %d epoch %d, train log loss %s, acc %s" %
                            (cv_i, i, train_log_loss, train_acc))

                dev_acc, dev_log_loss = self.predict_data_log_loss_acc(
                    dev_x, dev_y, test_batch_size)
                logger.info("cv %d epoch %d,   dev log loss %s, acc %s" %
                            (cv_i, i, dev_log_loss, dev_acc))
                iter_loss_list.append(dev_log_loss)
                iter_acc_list.append(dev_acc)

                test_acc, test_log_loss = self.predict_data_log_loss_acc(
                    test_x, test_y, test_batch_size)
                logger.info("cv %d epoch %d,   test log loss %s, acc %s" %
                            (cv_i, i, test_log_loss, test_acc))
                iter_loss_list.append(test_log_loss)
                iter_acc_list.append(test_acc)

                log_loss_history.append(iter_loss_list)
                acc_history.append(iter_acc_list)

            # Log Best Epoch
            log_loss_history = np.array(log_loss_history)
            acc_history = np.array(acc_history)

            # Log Best Batch
            batch_log_loss_history = np.array(batch_log_loss_history)
            batch_acc_history = np.array(batch_acc_history)

            self.log_to_file("Epoch",
                             log_loss_history,
                             acc_history,
                             cv_iter=cv_i)
            self.log_to_file("Batch",
                             batch_log_loss_history,
                             batch_acc_history,
                             cv_iter=cv_i)

            # record best epoch
            best_loss_epoch = np.argmin(log_loss_history[:, 1])
            best_acc_epoch = np.argmax(acc_history[:, 1])
            cv_epoch_acc.append([
                log_loss_history[best_acc_epoch, 2],
                acc_history[best_acc_epoch, 2]
            ])
            cv_epoch_log_loss.append([
                log_loss_history[best_loss_epoch, 2],
                acc_history[best_loss_epoch, 2]
            ])

            # record best batch
            best_loss_batch = np.argmin(batch_log_loss_history[:, 1])
            best_acc_batch = np.argmax(batch_acc_history[:, 1])
            cv_batch_acc.append([
                batch_log_loss_history[best_acc_batch, 2],
                batch_acc_history[best_acc_batch, 2]
            ])
            cv_batch_log_loss.append([
                batch_log_loss_history[best_loss_batch, 2],
                batch_acc_history[best_loss_batch, 2]
            ])

        cv_epoch_acc = np.array(cv_epoch_acc)
        cv_epoch_log_loss = np.array(cv_epoch_log_loss)
        logger.info(
            "%s Times CV Best Epoch Dev Acc test  log loss %s, acc %s" %
            (cv_times, np.mean(cv_epoch_acc[:, 0]), np.mean(cv_epoch_acc[:,
                                                                         1])))
        logger.info(
            "%s Times CV Best Epoch Dev Log Loss test  log loss %s, acc %s" %
            (cv_times, np.mean(
                cv_epoch_log_loss[:, 0]), np.mean(cv_epoch_log_loss[:, 1])))