コード例 #1
0
class TestAssemblePredicted(unittest.TestCase):

    def setUp(self):
        self.dataset = Dataset("test", "train", 1, sequence=True)
        self.dataset_validate = Dataset("test", "validate", 1, sequence=True)
        self.dataset_test = Dataset("test", "test", 1, sequence=True)
        doc2vec = TempDoc2vec()
        self.dataset.change_to_Doc2Vec(doc2vec)
        self.dataset_validate.change_to_Doc2Vec(doc2vec)
        self.dataset_test.change_to_Doc2Vec(doc2vec)
        hidden = [5] * self.dataset.number_of_level()
        batch_size = [3] * self.dataset.number_of_level()
        target_hidden = [3] * (self.dataset.number_of_level() - 1)
        self.model = ESLNN(
            "test", self.dataset, self.dataset_validate, self.dataset_test, 30, hidden, target_hidden, stopping_time=3, batch_size=batch_size)
        self.model.classifier[0].dense.weight.data.fill_(1)
        self.model.classifier[0].dense.bias.data.zero_()
        self.model.classifier[0].logit.weight.data.fill_(0.2)
        self.model.classifier[0].logit.bias.data.zero_()

    def test_initial_model(self):
        for i in range(self.dataset.number_of_level()):
            test_model = self.model.classifier[i]
            number_of_class = self.dataset.check_each_number_of_class(i)
            self.assertEqual(test_model.input_size, 7)
            self.assertEqual(test_model.hidden_size, 5)
            self.assertEqual(test_model.number_of_class, number_of_class)

    def test_score_each_level(self):
        f1_macro, f1_micro = self.model.evaluate_each_level(0, "train")
        real_score = 1
        self.assertAlmostEqual(real_score, f1_macro, 6)
        self.assertAlmostEqual(real_score, f1_micro, 6)

    def test_evaluate(self):
        f1_macro, f1_micro, f1_each = self.model.evaluate("train")
        real_score = [1, 4 / 5, 4 / 5, 4 / 5, 1 / 2, 0]
        self.assertAlmostEqual(0.7125, f1_macro, 6)
        for f1, real in zip(f1_each, real_score):
            self.assertAlmostEqual(real, f1[0], 6)
            # self.assertAlmostEqual(real, f1[1], 6)

    def test_train(self):
        # just train successfully
        self.model.train()
        f1_macro, f1_micro = self.model.evaluate_each_level(0, "train")
        real_score = 1
        self.assertAlmostEqual(real_score, f1_macro, 6)
        self.assertAlmostEqual(real_score, f1_micro, 6)
        f1_macro, f1_micro = self.model.evaluate_each_level(1, "train")
        self.assertAlmostEqual(0.0, f1_macro, 6)

    def test_threshold_tuning(self):
        self.model.train()
        self.model.tuning_threshold()
        f1_macro, f1_micro = self.model.evaluate_each_level(0, "train")
        real_score = 1
        self.assertAlmostEqual(real_score, f1_macro, 6)
        self.assertAlmostEqual(real_score, f1_micro, 6)
        f1_macro, f1_micro = self.model.evaluate_each_level(1, "train")
        self.assertAlmostEqual(0.8, f1_macro, 6)
        self.assertAlmostEqual(0.8, f1_micro, 6)

    def test_correction(self):
        test_label = [[0, 0, 0, 1, 0, 0, 0, 0],
                      [0, 0, 0, 0, 0, 0, 0, 1],
                      [0, 0, 0, 0, 0, 1, 0, 0]]
        real_result_label = [[1, 0, 0, 1, 0, 0, 0, 0],
                             [1, 1, 1, 1, 1, 1, 1, 1],
                             [1, 1, 1, 1, 1, 1, 0, 0]]
        torch_label = ByteTensor(test_label)
        result = self.model.child_based_correction(
            torch_label).cpu().numpy().tolist()
        for label, real_label in zip(result, real_result_label):
            self.assertListEqual(
                real_label, label)
コード例 #2
0
class DatasetDoc2vecUnitTest(unittest.TestCase):

    def setUp(self):
        self.dataset = Dataset("test", "train", 1, sequence=True)
        doc2vec = TempDoc2vec()
        self.dataset.change_to_Doc2Vec(doc2vec)

    def test_change_to_Doc2Vec(self):
        label = self.dataset.labels.toarray().astype(int).tolist()
        data = self.dataset.datas.tolist()
        real_data = [
            [0, 0, 1, 0, 0, 1, 0],
            [0, 1, 0, 0, 0, 0, 1],
            [0, 1, 0, 0, 1, 0, 0],
        ]
        real_label = [
            [1, 1, 1, 1, 1, 1, 1, 0],
            [1, 1, 1, 1, 1, 1, 0, 0],
            [1, 1, 0, 0, 0, 0, 0, 0],
        ]
        self.assertListEqual(data, real_data)
        self.assertListEqual(label, real_label)

    def test_generate_batch(self):
        real_label = [[
            [1, 1],
            [1, 1],
            [1, 1],
        ], [
            [1, 1],
            [1, 1],
            [0, 0],
        ], [
            [1, 1, 1, 1, 1, 1, 1, 0],
            [1, 1, 1, 1, 1, 1, 0, 0],
            [1, 1, 0, 0, 0, 0, 0, 0],
        ]]
        real_data = [
            [0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0],
            [0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0],
            [0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0],
        ]
        for i in range(20):
            for l in range(3):
                level = l
                if l == 2:
                    level = -1
                label_index = 0
                for data, label in self.dataset.generate_batch(level, 1):
                    self.assertSequenceEqual(
                        label.numpy().reshape(-1).tolist(), real_label[l][label_index])
                    self.assertSequenceEqual(
                        data.numpy().reshape(-1).tolist(), real_data[label_index])
                    label_index = label_index + 1

    def test_number_of_data_in_class(self):
        real_number = [3, 3, 2, 2, 2, 2, 1, 0]
        number = self.dataset.number_of_data_in_each_class()
        self.assertListEqual(real_number, number)

    def test_size_of_feature(self):
        size_of_data = self.dataset.size_of_feature()
        self.assertEqual(7, size_of_data)

    def test_number_of_each_class(self):
        self.assertIsInstance(
            self.dataset.check_each_number_of_class(0), int)
        self.assertEqual(2, self.dataset.check_each_number_of_class(0))
        self.assertEqual(2, self.dataset.check_each_number_of_class(1))
        self.assertEqual(1, self.dataset.check_each_number_of_class(5))
コード例 #3
0
class DatasetUnitTest(unittest.TestCase):
    def setUp(self):
        self.dataset_train = Dataset(data_name="test",
                                     mode="train",
                                     fold_number=1,
                                     sequence=True)
        self.dataset_validate = Dataset(data_name="test",
                                        mode="validate",
                                        fold_number=1,
                                        sequence=True)
        self.dataset_test = Dataset(data_name="test",
                                    mode="test",
                                    fold_number=1,
                                    sequence=True)

    def test_hierarchy(self):
        real_all_name = ['1', '2', '3', '4', '5', '6', '7', '8']
        real_hierarchy = {
            0: set([2, 3]),
            1: set([4, 6]),
            2: set([5]),
            3: set([4]),
            4: set([5]),
            5: set([6]),
            6: set([7])
        }
        real_parent_of = {
            2: set([0]),
            3: set([0]),
            4: set([1, 3]),
            5: set([2, 4]),
            6: set([1, 5]),
            7: set([6])
        }
        real_name_to_index = {
            '1': 0,
            '2': 1,
            '3': 2,
            '4': 3,
            '5': 4,
            '6': 5,
            '7': 6,
            '8': 7
        }
        real_level = [0, 2, 4, 5, 6, 7, 8]
        self.assertSequenceEqual(real_hierarchy, self.dataset_train.hierarchy)
        self.assertSequenceEqual(real_parent_of, self.dataset_train.parent_of)
        self.assertSequenceEqual(real_all_name, self.dataset_train.all_name)
        self.assertSequenceEqual(real_name_to_index,
                                 self.dataset_train.name_to_index)
        self.assertSequenceEqual(real_level, self.dataset_train.level.tolist())

    def test_load_data(self):
        file_name = "test/data.txt"
        datas, labels = prep.import_data(file_name)
        hierarchy_file_name = "test/hierarchy.pickle"
        labels = prep.map_index_of_label(hierarchy_file_name, labels)

        train = self.dataset_train.datas
        validate = self.dataset_validate.datas
        test = self.dataset_test.datas
        train_label = self.dataset_train.labels
        validate_label = self.dataset_validate.labels
        test_label = self.dataset_test.labels
        fold_datas = np.concatenate([train, validate, test])
        fold_labels = np.concatenate([train_label, validate_label, test_label])
        self.assertListEqual(sorted(fold_datas.tolist()), sorted(datas))
        a = sorted(map(list, fold_labels.tolist()))
        b = sorted(map(list, labels))
        self.assertListEqual(a, b)

    def test_cant_use_generate_batch(self):
        with self.assertRaises(NotEmbeddingState):
            for _ in self.dataset_train.generate_batch(0, 1):
                pass

    def test_number_of_each_class(self):
        self.assertIsInstance(self.dataset_train.check_each_number_of_class(0),
                              int)
        self.assertEqual(2, self.dataset_train.check_each_number_of_class(0))
        self.assertEqual(2, self.dataset_train.check_each_number_of_class(1))
        self.assertEqual(1, self.dataset_train.check_each_number_of_class(5))

    def test_number_of_level(self):
        self.assertEqual(6, self.dataset_train.number_of_level())