Ejemplo n.º 1
0
 def test(self,
          test_words,
          test_chars,
          test_labels,
          batch_size,
          print_info=True,
          restore=True):
     if restore:
         self.restore_last_session()
     accuracies, predictions = list(), list()
     for i, (b_words, b_seq_len, b_chars, b_char_seq_len,
             b_labels) in enumerate(
                 batch_iter(test_words, test_chars, test_labels,
                            batch_size)):
         batch_labels = []
         for j in range(self.num_classifier):
             ecoc_array = self.nary_ecoc[:, j]
             b_lbs = remap_labels(b_labels.copy(), ecoc_array)
             b_lbs = dense_to_one_hot(b_lbs, self.num_class)
             batch_labels.append(b_lbs)
         feed_dict = self.get_feed_dict(b_words, b_seq_len, b_chars,
                                        b_char_seq_len, batch_labels)
         pred_labels = self.sess.run(self.pred_labels, feed_dict=feed_dict)
         acc = compute_ensemble_accuracy(pred_labels, self.nary_ecoc,
                                         b_labels)
         accuracies.append(acc)
         predictions.append(pred_labels)
     accuracy = np.mean(accuracies)
     predictions = np.concatenate(predictions, axis=0)
     if print_info:
         self.logger.info(" -- Test Accuracy: {:.4f}".format(accuracy *
                                                             100))
     return accuracy, np.reshape(predictions,
                                 newshape=(predictions.shape[0],
                                           self.num_classifier))
Ejemplo n.º 2
0
    def train(self, train_words, train_chars, train_labels, test_words,
              test_chars, test_labels):
        global_test_acc, global_step, lr = 0.0, 0, self.cfg.lr
        num_batches = math.ceil(float(len(train_words) / self.cfg.batch_size))

        self.logger.info("start training...")
        for epoch in range(1, self.cfg.epochs + 1):
            self.logger.info("Epoch {}/{}:".format(epoch, self.cfg.epochs))
            train_words, train_chars, train_labels = sklearn.utils.shuffle(
                train_words, train_chars, train_labels)
            prog = Progbar(target=num_batches)

            for i, (b_words, b_seq_len, b_chars, b_char_seq_len,
                    b_labels) in enumerate(
                        batch_iter(train_words, train_chars, train_labels,
                                   self.cfg.batch_size)):
                global_step += 1
                batch_labels = []
                for j in range(self.num_classifier):
                    ecoc_array = self.nary_ecoc[:, j]
                    b_lbs = remap_labels(b_labels.copy(), ecoc_array)
                    b_lbs = dense_to_one_hot(b_lbs, self.num_class)
                    batch_labels.append(b_lbs)
                feed_dict = self.get_feed_dict(b_words,
                                               b_seq_len,
                                               b_chars,
                                               b_char_seq_len,
                                               batch_labels,
                                               lr=lr,
                                               training=True)
                _, pred_labels, loss = self.sess.run(
                    [self.train_op, self.pred_labels, self.loss],
                    feed_dict=feed_dict)
                acc = compute_ensemble_accuracy(pred_labels, self.nary_ecoc,
                                                b_labels)
                prog.update(i + 1, [("Global Step", global_step),
                                    ("Train Loss", loss),
                                    ("Train Acc", acc * 100)])
            accuracy, _ = self.test(test_words,
                                    test_chars,
                                    test_labels,
                                    batch_size=200,
                                    print_info=True,
                                    restore=False)

            if accuracy > global_test_acc:
                global_test_acc = accuracy
                self.save_session(epoch)
            lr = self.cfg.lr / (1 + epoch * self.cfg.lr_decay)
Ejemplo n.º 3
0
 def test(self, test_dataset, print_info=True):
     self.restore_last_session()
     test_imgs, test_labels = test_dataset.images, test_dataset.labels
     y_labels = []
     for j in range(self.num_classifier):
         ecoc_array = self.nary_ecoc[:, j]
         b_lbs = remap_labels(test_labels.copy(), ecoc_array)
         b_lbs = dense_to_one_hot(b_lbs, self.num_class)
         y_labels.append(b_lbs)
     feed_dict = self._get_feed_dict(test_imgs, y_labels)
     pred_labels, test_loss = self.sess.run([self.pred_labels, self.cost],
                                            feed_dict=feed_dict)
     test_acc = compute_ensemble_accuracy(pred_labels, self.nary_ecoc,
                                          test_dataset.labels)
     if print_info:
         self.logger.info(" -- Test Loss: {}, Test Accuracy: {}".format(
             test_loss, test_acc))
     return pred_labels
Ejemplo n.º 4
0
 def test(self, x_test, y_test, print_info=True):
     self.restore_last_session()
     if self.num_class > 10:
         x_test = self.normalize_100_production(x_test)
     else:
         x_test = self.normalize_10_production(x_test)
     y_labels = []
     for j in range(self.num_classifier):
         ecoc_array = self.nary_ecoc[:, j]
         b_lbs = remap_labels(y_test.copy(), ecoc_array)
         b_lbs = dense_to_one_hot(b_lbs, self.num_class)
         y_labels.append(b_lbs)
     feed_dict = self._get_feed_dict(x_test, y_labels)
     pred_labels, test_loss = self.sess.run([self.pred_labels, self.cost],
                                            feed_dict=feed_dict)
     test_acc = compute_ensemble_accuracy(pred_labels, self.nary_ecoc,
                                          y_test)
     if print_info:
         self.logger.info(" -- Test Loss: {}, Test Accuracy: {}".format(
             test_loss, test_acc))
     return pred_labels
Ejemplo n.º 5
0
    if config.training:
        nary_ecoc = gen_nary_ecoc(num_class=num_class, num_meta_class=num_meta_class, num_classifier=num_classifier)
        np.savez_compressed(save_path + "nary_ecoc.npz", embeddings=nary_ecoc)
    else:
        nary_ecoc = np.load(save_path + "nary_ecoc.npz")["embeddings"]

    # start training...
    nary_ecoc_test_result = []
    for i in range(num_classifier):
        sys.stdout.write("\nThe {}/{} classifier:\n".format(i + 1, num_classifier))
        sys.stdout.flush()
        ecoc_array = nary_ecoc[:, i]
        train_words_ith, train_chars_ith, train_labels_ith = train_words.copy(), train_chars.copy(), train_labels.copy()
        test_words_ith, test_chars_ith, test_labels_ith = test_words.copy(), test_chars.copy(), test_labels.copy()
        train_labels_ith = remap_labels(train_labels_ith, ecoc_array)
        test_labels_ith = remap_labels(test_labels_ith, ecoc_array)
        model = TextModel(config, num_meta_class, word_dict, char_dict, vectors, ckpt_path=ckpt_path.format(i))
        if config.training:
            model.train(train_words_ith, train_chars_ith, train_labels_ith, test_words_ith, test_chars_ith,
                        test_labels_ith)
        _, pred_labels = model.test(test_words_ith, test_chars_ith, test_labels_ith, batch_size=200)
        model.close_session()
        nary_ecoc_test_result.append(pred_labels)

    nary_ecoc_labels = np.concatenate(nary_ecoc_test_result, axis=1)
    np.savez_compressed(save_path + "pred_labels.npz", embeddings=nary_ecoc_labels)

    if config.ablation:
        nl = list(range(5, num_classifier + 1, 5))
        for n in nl:
Ejemplo n.º 6
0
    def train(self,
              x_train,
              y_train,
              x_test,
              y_test,
              batch_size=200,
              epochs=10):
        x_train, x_test = self.normalize(x_train, x_test)

        self.logger.info("data augmentation...")
        datagen = ImageDataGenerator(featurewise_center=True,
                                     samplewise_center=False,
                                     horizontal_flip=True,
                                     cval=0.0,
                                     featurewise_std_normalization=False,
                                     preprocessing_function=None,
                                     rescale=None,
                                     samplewise_std_normalization=False,
                                     zca_whitening=False,
                                     zca_epsilon=1e-06,
                                     rotation_range=15,
                                     width_shift_range=0.1,
                                     height_shift_range=0.1,
                                     shear_range=0.0,
                                     zoom_range=0.0,
                                     channel_shift_range=0.0,
                                     fill_mode='nearest',
                                     vertical_flip=False,
                                     data_format="channels_last")
        datagen.fit(x_train)
        x_aug, y_aug = x_train.copy(), y_train.copy()
        x_aug = datagen.flow(x_aug,
                             np.zeros(x_train.shape[0]),
                             batch_size=x_train.shape[0],
                             shuffle=False).next()[0]
        x_train, y_train = np.concatenate((x_train, x_aug)), np.concatenate(
            (y_train, y_aug))
        self.logger.info("start training...")
        global_step, lr, global_test_acc = 0, self.learning_rate, 0.0
        num_batches = x_train.shape[0] // batch_size

        for epoch in range(1, epochs + 1):
            self.logger.info("Epoch {}/{}:".format(epoch, epochs))
            x_train, y_train = utils.shuffle(
                x_train, y_train, random_state=0)  # shuffle training dataset
            prog = Progbar(target=num_batches)
            prog.update(0, [("Global Step", 0), ("Train Loss", 0.0),
                            ("Train Acc", 0.0), ("Test Loss", 0.0),
                            ("Test Acc", 0.0)])
            for i, (batch_imgs, batch_labels) in enumerate(
                    batch_dataset(x_train, y_train, batch_size)):
                global_step += 1
                b_labels = []
                for j in range(self.num_classifier):
                    ecoc_array = self.nary_ecoc[:, j]
                    b_lbs = remap_labels(batch_labels.copy(), ecoc_array)
                    b_lbs = dense_to_one_hot(b_lbs, self.num_class)
                    b_labels.append(b_lbs)
                feed_dict = self._get_feed_dict(batch_imgs, b_labels, lr, True)
                _, pred_labels, loss = self.sess.run(
                    [self.train_op, self.pred_labels, self.cost],
                    feed_dict=feed_dict)
                acc = compute_ensemble_accuracy(pred_labels, self.nary_ecoc,
                                                batch_labels)
                if global_step % 200 == 0:
                    y_labels = []
                    for j in range(self.num_classifier):
                        ecoc_array = self.nary_ecoc[:, j]
                        b_lbs = remap_labels(y_test.copy(), ecoc_array)
                        b_lbs = dense_to_one_hot(b_lbs, self.num_class)
                        y_labels.append(b_lbs)
                    feed_dict = self._get_feed_dict(x_test, y_labels)
                    test_pred_labels, test_loss = self.sess.run(
                        [self.pred_labels, self.cost], feed_dict=feed_dict)
                    test_acc = compute_ensemble_accuracy(
                        test_pred_labels, self.nary_ecoc, y_test)
                    prog.update(i + 1, [("Global Step", int(global_step)),
                                        ("Train Loss", loss),
                                        ("Train Acc", acc),
                                        ("Test Loss", test_loss),
                                        ("Test Acc", test_acc)])
                    if test_acc > global_test_acc:
                        global_test_acc = test_acc
                        self.save_session(global_step)
                else:
                    prog.update(i + 1, [("Global Step", int(global_step)),
                                        ("Train Loss", loss),
                                        ("Train Acc", acc)])
            if epoch > 10:
                lr = self.learning_rate / (1 + (epoch - 10) * self.lr_decay)
            y_labels = []
            for j in range(self.num_classifier):
                ecoc_array = self.nary_ecoc[:, j]
                b_lbs = remap_labels(y_test.copy(), ecoc_array)
                b_lbs = dense_to_one_hot(b_lbs, self.num_class)
                y_labels.append(b_lbs)
            feed_dict = self._get_feed_dict(x_test, y_labels)
            test_pred_labels, test_loss = self.sess.run(
                [self.pred_labels, self.cost], feed_dict=feed_dict)
            test_acc = compute_ensemble_accuracy(test_pred_labels,
                                                 self.nary_ecoc, y_test)
            self.logger.info(
                "Epoch: {}, Global Step: {}, Test Loss: {}, Test Accuracy: {}".
                format(epoch, global_step, test_loss, test_acc))
Ejemplo n.º 7
0
 def train(self, train_dataset, test_dataset):
     global_test_acc = 0.0
     global_step = 0
     test_imgs, test_labels = test_dataset.images, test_dataset.labels
     self.logger.info("start training...")
     for epoch in range(1, self.epochs + 1):
         self.logger.info("Epoch {}/{}:".format(epoch, self.epochs))
         num_batches = train_dataset.num_examples // self.batch_size
         prog = Progbar(target=num_batches)
         prog.update(0, [("Global Step", 0), ("Train Loss", 0.0),
                         ("Train Acc", 0.0), ("Test Loss", 0.0),
                         ("Test Acc", 0.0)])
         for i in range(num_batches):
             global_step += 1
             train_imgs, train_labels = train_dataset.next_batch(
                 self.batch_size)
             b_labels = []
             for j in range(self.num_classifier):
                 ecoc_array = self.nary_ecoc[:, j]
                 b_lbs = remap_labels(train_labels.copy(), ecoc_array)
                 b_lbs = dense_to_one_hot(b_lbs, self.num_class)
                 b_labels.append(b_lbs)
             feed_dict = self._get_feed_dict(train_imgs, b_labels, True)
             _, pred_labels, loss = self.sess.run(
                 [self.train_op, self.pred_labels, self.cost],
                 feed_dict=feed_dict)
             acc = compute_ensemble_accuracy(pred_labels, self.nary_ecoc,
                                             train_labels)
             if global_step % 100 == 0:
                 y_labels = []
                 for j in range(self.num_classifier):
                     ecoc_array = self.nary_ecoc[:, j]
                     b_lbs = remap_labels(test_labels.copy(), ecoc_array)
                     b_lbs = dense_to_one_hot(b_lbs, self.num_class)
                     y_labels.append(b_lbs)
                 feed_dict = self._get_feed_dict(test_imgs, y_labels)
                 test_pred_labels, test_loss = self.sess.run(
                     [self.pred_labels, self.cost], feed_dict=feed_dict)
                 test_acc = compute_ensemble_accuracy(
                     test_pred_labels, self.nary_ecoc, test_labels)
                 prog.update(i + 1, [("Global Step", int(global_step)),
                                     ("Train Loss", loss),
                                     ("Train Acc", acc),
                                     ("Test Loss", test_loss),
                                     ("Test Acc", test_acc)])
                 if test_acc > global_test_acc:
                     global_test_acc = test_acc
                     self.save_session(global_step)
             else:
                 prog.update(i + 1, [("Global Step", int(global_step)),
                                     ("Train Loss", loss),
                                     ("Train Acc", acc)])
         y_labels = []
         for j in range(self.num_classifier):
             ecoc_array = self.nary_ecoc[:, j]
             b_lbs = remap_labels(test_labels.copy(), ecoc_array)
             b_lbs = dense_to_one_hot(b_lbs, self.num_class)
             y_labels.append(b_lbs)
         feed_dict = self._get_feed_dict(test_imgs, y_labels)
         test_pred_labels, test_loss = self.sess.run(
             [self.pred_labels, self.cost], feed_dict=feed_dict)
         test_acc = compute_ensemble_accuracy(test_pred_labels,
                                              self.nary_ecoc, test_labels)
         self.logger.info(
             "Epoch: {}, Global Step: {}, Test Loss: {}, Test Accuracy: {}".
             format(epoch, global_step, test_loss, test_acc))