class DatasetDoc2vecUnitTest(unittest.TestCase):

    def setUp(self):
        self.dataset = Dataset("test", "train", 1, sequence=True)
        doc2vec = TempDoc2vec()
        self.dataset.change_to_Doc2Vec(doc2vec)

    def test_change_to_Doc2Vec(self):
        label = self.dataset.labels.toarray().astype(int).tolist()
        data = self.dataset.datas.tolist()
        real_data = [
            [0, 0, 1, 0, 0, 1, 0],
            [0, 1, 0, 0, 0, 0, 1],
            [0, 1, 0, 0, 1, 0, 0],
        ]
        real_label = [
            [1, 1, 1, 1, 1, 1, 1, 0],
            [1, 1, 1, 1, 1, 1, 0, 0],
            [1, 1, 0, 0, 0, 0, 0, 0],
        ]
        self.assertListEqual(data, real_data)
        self.assertListEqual(label, real_label)

    def test_generate_batch(self):
        real_label = [[
            [1, 1],
            [1, 1],
            [1, 1],
        ], [
            [1, 1],
            [1, 1],
            [0, 0],
        ], [
            [1, 1, 1, 1, 1, 1, 1, 0],
            [1, 1, 1, 1, 1, 1, 0, 0],
            [1, 1, 0, 0, 0, 0, 0, 0],
        ]]
        real_data = [
            [0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0],
            [0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0],
            [0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0],
        ]
        for i in range(20):
            for l in range(3):
                level = l
                if l == 2:
                    level = -1
                label_index = 0
                for data, label in self.dataset.generate_batch(level, 1):
                    self.assertSequenceEqual(
                        label.numpy().reshape(-1).tolist(), real_label[l][label_index])
                    self.assertSequenceEqual(
                        data.numpy().reshape(-1).tolist(), real_data[label_index])
                    label_index = label_index + 1

    def test_number_of_data_in_class(self):
        real_number = [3, 3, 2, 2, 2, 2, 1, 0]
        number = self.dataset.number_of_data_in_each_class()
        self.assertListEqual(real_number, number)

    def test_size_of_feature(self):
        size_of_data = self.dataset.size_of_feature()
        self.assertEqual(7, size_of_data)

    def test_number_of_each_class(self):
        self.assertIsInstance(
            self.dataset.check_each_number_of_class(0), int)
        self.assertEqual(2, self.dataset.check_each_number_of_class(0))
        self.assertEqual(2, self.dataset.check_each_number_of_class(1))
        self.assertEqual(1, self.dataset.check_each_number_of_class(5))
Exemple #2
0
class TestEachLevel(unittest.TestCase):

    def setUp(self):
        self.model = LCPL_ESLNN_First(7, 5, 2, use_dropout=False)
        self.model.dense.weight.data.fill_(1)
        self.model.dense.bias.data.zero_()
        self.model.logit.weight.data.fill_(0.2)
        self.model.logit.bias.data.zero_()
        if torch.cuda.is_available():
            self.model = self.model.cuda()
        self.dataset = Dataset("test", "train", 1, sequence=True)
        doc2vec = TempDoc2vec()
        self.dataset.change_to_Doc2Vec(doc2vec)

    def test_initial_weight(self):
        number_of_data = self.dataset.number_of_data()
        count = self.dataset.number_of_data_in_each_class()
        self.model.initial_weight(number_of_data, count)
        self.assertListEqual(
            [1.0, 1.0, 1.5, 1.5, 1.5, 1.5, 3.0, 10000.0], self.model.pos_weight.cpu().numpy().tolist())

        first_index = self.dataset.index_of_level(0)
        first_count = self.dataset.number_of_data_in_each_class()[
            first_index[0]:first_index[1]]
        self.model.initial_weight(number_of_data, first_count)
        self.assertListEqual(
            [1.0, 1.0], self.model.pos_weight.cpu().numpy().tolist())

    def test_forward(self):
        real_result = [2.0, 2.0]
        for datas, _ in self.dataset.generate_batch(0, 1):
            if torch.cuda.is_available():
                datas = datas.cuda()
            datas = Variable(datas, volatile=True)
            result = self.model.forward(datas)
            self.assertListEqual(
                result.data.cpu().numpy().tolist()[0], real_result)
            self.assertFalse(result.requires_grad)

    def test_forward_dropout(self):
        torch.manual_seed = 12345
        self.model.use_dropout = True
        self.model.initial_structure()
        self.model.dense.weight.data.fill_(1)
        self.model.dense.bias.data.zero_()
        self.model.logit.weight.data.fill_(2)
        self.model.logit.bias.data.zero_()
        if torch.cuda.is_available():
            self.model = self.model.cuda()
        self.model.eval()
        real_result = [20.0, 20.0]
        for datas, _ in self.dataset.generate_batch(0, 1):
            if torch.cuda.is_available():
                datas = datas.cuda()
            datas = Variable(datas)
            result = self.model.forward(datas)
            self.assertListEqual(
                result.data.cpu().numpy().tolist()[0], real_result)
            result = Variable(result.data)
            self.assertFalse(result.requires_grad)

    def test_train_data(self):
        number_of_data = self.dataset.number_of_data()
        first_index = self.dataset.index_of_level(0)
        first_count = self.dataset.number_of_data_in_each_class()[
            first_index[0]:first_index[1]]
        self.model.initial_weight(number_of_data, first_count)
        real_loss = - math.log(1 / (1 + math.exp(-2)))
        for datas, labels in self.dataset.generate_batch(0, 3):
            if torch.cuda.is_available():
                datas = datas.cuda()
                labels = labels.cuda()
            datas = Variable(datas)
            labels = Variable(labels)
            loss = self.model.train_model(datas, labels)
            self.assertAlmostEqual(real_loss, loss, 5)

    def test_eval_data(self):
        number_of_data = self.dataset.number_of_data()
        first_index = self.dataset.index_of_level(0)
        first_count = self.dataset.number_of_data_in_each_class()[
            first_index[0]:first_index[1]]
        self.model.initial_weight(number_of_data, first_count)
        real_score = 1
        for datas, labels, in self.dataset.generate_batch(0, 3):
            if torch.cuda.is_available():
                datas = datas.cuda()
                labels = labels.cuda()
            datas = Variable(datas, volatile=True)
            labels = Variable(labels, volatile=True)
            f1_macro, f1_micro = self.model.evaluate(datas, labels)
            self.assertAlmostEqual(real_score, f1_macro, 6)
            self.assertAlmostEqual(real_score, f1_micro, 6)
Exemple #3
0
class DatasetUnitTest(unittest.TestCase):
    def setUp(self):
        self.dataset_train = Dataset(data_name="test",
                                     mode="train",
                                     fold_number=1,
                                     sequence=True)
        self.dataset_validate = Dataset(data_name="test",
                                        mode="validate",
                                        fold_number=1,
                                        sequence=True)
        self.dataset_test = Dataset(data_name="test",
                                    mode="test",
                                    fold_number=1,
                                    sequence=True)

    def test_hierarchy(self):
        real_all_name = ['1', '2', '3', '4', '5', '6', '7', '8']
        real_hierarchy = {
            0: set([2, 3]),
            1: set([4, 6]),
            2: set([5]),
            3: set([4]),
            4: set([5]),
            5: set([6]),
            6: set([7])
        }
        real_parent_of = {
            2: set([0]),
            3: set([0]),
            4: set([1, 3]),
            5: set([2, 4]),
            6: set([1, 5]),
            7: set([6])
        }
        real_name_to_index = {
            '1': 0,
            '2': 1,
            '3': 2,
            '4': 3,
            '5': 4,
            '6': 5,
            '7': 6,
            '8': 7
        }
        real_level = [0, 2, 4, 5, 6, 7, 8]
        self.assertSequenceEqual(real_hierarchy, self.dataset_train.hierarchy)
        self.assertSequenceEqual(real_parent_of, self.dataset_train.parent_of)
        self.assertSequenceEqual(real_all_name, self.dataset_train.all_name)
        self.assertSequenceEqual(real_name_to_index,
                                 self.dataset_train.name_to_index)
        self.assertSequenceEqual(real_level, self.dataset_train.level.tolist())

    def test_load_data(self):
        file_name = "test/data.txt"
        datas, labels = prep.import_data(file_name)
        hierarchy_file_name = "test/hierarchy.pickle"
        labels = prep.map_index_of_label(hierarchy_file_name, labels)

        train = self.dataset_train.datas
        validate = self.dataset_validate.datas
        test = self.dataset_test.datas
        train_label = self.dataset_train.labels
        validate_label = self.dataset_validate.labels
        test_label = self.dataset_test.labels
        fold_datas = np.concatenate([train, validate, test])
        fold_labels = np.concatenate([train_label, validate_label, test_label])
        self.assertListEqual(sorted(fold_datas.tolist()), sorted(datas))
        a = sorted(map(list, fold_labels.tolist()))
        b = sorted(map(list, labels))
        self.assertListEqual(a, b)

    def test_cant_use_generate_batch(self):
        with self.assertRaises(NotEmbeddingState):
            for _ in self.dataset_train.generate_batch(0, 1):
                pass

    def test_number_of_each_class(self):
        self.assertIsInstance(self.dataset_train.check_each_number_of_class(0),
                              int)
        self.assertEqual(2, self.dataset_train.check_each_number_of_class(0))
        self.assertEqual(2, self.dataset_train.check_each_number_of_class(1))
        self.assertEqual(1, self.dataset_train.check_each_number_of_class(5))

    def test_number_of_level(self):
        self.assertEqual(6, self.dataset_train.number_of_level())