Ejemplo n.º 1
0
class DatasetDoc2vecUnitTest(unittest.TestCase):

    def setUp(self):
        self.dataset = Dataset("test", "train", 1, sequence=True)
        doc2vec = TempDoc2vec()
        self.dataset.change_to_Doc2Vec(doc2vec)

    def test_change_to_Doc2Vec(self):
        label = self.dataset.labels.toarray().astype(int).tolist()
        data = self.dataset.datas.tolist()
        real_data = [
            [0, 0, 1, 0, 0, 1, 0],
            [0, 1, 0, 0, 0, 0, 1],
            [0, 1, 0, 0, 1, 0, 0],
        ]
        real_label = [
            [1, 1, 1, 1, 1, 1, 1, 0],
            [1, 1, 1, 1, 1, 1, 0, 0],
            [1, 1, 0, 0, 0, 0, 0, 0],
        ]
        self.assertListEqual(data, real_data)
        self.assertListEqual(label, real_label)

    def test_generate_batch(self):
        real_label = [[
            [1, 1],
            [1, 1],
            [1, 1],
        ], [
            [1, 1],
            [1, 1],
            [0, 0],
        ], [
            [1, 1, 1, 1, 1, 1, 1, 0],
            [1, 1, 1, 1, 1, 1, 0, 0],
            [1, 1, 0, 0, 0, 0, 0, 0],
        ]]
        real_data = [
            [0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0],
            [0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0],
            [0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0],
        ]
        for i in range(20):
            for l in range(3):
                level = l
                if l == 2:
                    level = -1
                label_index = 0
                for data, label in self.dataset.generate_batch(level, 1):
                    self.assertSequenceEqual(
                        label.numpy().reshape(-1).tolist(), real_label[l][label_index])
                    self.assertSequenceEqual(
                        data.numpy().reshape(-1).tolist(), real_data[label_index])
                    label_index = label_index + 1

    def test_number_of_data_in_class(self):
        real_number = [3, 3, 2, 2, 2, 2, 1, 0]
        number = self.dataset.number_of_data_in_each_class()
        self.assertListEqual(real_number, number)

    def test_size_of_feature(self):
        size_of_data = self.dataset.size_of_feature()
        self.assertEqual(7, size_of_data)

    def test_number_of_each_class(self):
        self.assertIsInstance(
            self.dataset.check_each_number_of_class(0), int)
        self.assertEqual(2, self.dataset.check_each_number_of_class(0))
        self.assertEqual(2, self.dataset.check_each_number_of_class(1))
        self.assertEqual(1, self.dataset.check_each_number_of_class(5))
Ejemplo n.º 2
0
class TestEachLevel(unittest.TestCase):

    def setUp(self):
        self.model = LCPL_ESLNN_First(7, 5, 2, use_dropout=False)
        self.model.dense.weight.data.fill_(1)
        self.model.dense.bias.data.zero_()
        self.model.logit.weight.data.fill_(0.2)
        self.model.logit.bias.data.zero_()
        if torch.cuda.is_available():
            self.model = self.model.cuda()
        self.dataset = Dataset("test", "train", 1, sequence=True)
        doc2vec = TempDoc2vec()
        self.dataset.change_to_Doc2Vec(doc2vec)

    def test_initial_weight(self):
        number_of_data = self.dataset.number_of_data()
        count = self.dataset.number_of_data_in_each_class()
        self.model.initial_weight(number_of_data, count)
        self.assertListEqual(
            [1.0, 1.0, 1.5, 1.5, 1.5, 1.5, 3.0, 10000.0], self.model.pos_weight.cpu().numpy().tolist())

        first_index = self.dataset.index_of_level(0)
        first_count = self.dataset.number_of_data_in_each_class()[
            first_index[0]:first_index[1]]
        self.model.initial_weight(number_of_data, first_count)
        self.assertListEqual(
            [1.0, 1.0], self.model.pos_weight.cpu().numpy().tolist())

    def test_forward(self):
        real_result = [2.0, 2.0]
        for datas, _ in self.dataset.generate_batch(0, 1):
            if torch.cuda.is_available():
                datas = datas.cuda()
            datas = Variable(datas, volatile=True)
            result = self.model.forward(datas)
            self.assertListEqual(
                result.data.cpu().numpy().tolist()[0], real_result)
            self.assertFalse(result.requires_grad)

    def test_forward_dropout(self):
        torch.manual_seed = 12345
        self.model.use_dropout = True
        self.model.initial_structure()
        self.model.dense.weight.data.fill_(1)
        self.model.dense.bias.data.zero_()
        self.model.logit.weight.data.fill_(2)
        self.model.logit.bias.data.zero_()
        if torch.cuda.is_available():
            self.model = self.model.cuda()
        self.model.eval()
        real_result = [20.0, 20.0]
        for datas, _ in self.dataset.generate_batch(0, 1):
            if torch.cuda.is_available():
                datas = datas.cuda()
            datas = Variable(datas)
            result = self.model.forward(datas)
            self.assertListEqual(
                result.data.cpu().numpy().tolist()[0], real_result)
            result = Variable(result.data)
            self.assertFalse(result.requires_grad)

    def test_train_data(self):
        number_of_data = self.dataset.number_of_data()
        first_index = self.dataset.index_of_level(0)
        first_count = self.dataset.number_of_data_in_each_class()[
            first_index[0]:first_index[1]]
        self.model.initial_weight(number_of_data, first_count)
        real_loss = - math.log(1 / (1 + math.exp(-2)))
        for datas, labels in self.dataset.generate_batch(0, 3):
            if torch.cuda.is_available():
                datas = datas.cuda()
                labels = labels.cuda()
            datas = Variable(datas)
            labels = Variable(labels)
            loss = self.model.train_model(datas, labels)
            self.assertAlmostEqual(real_loss, loss, 5)

    def test_eval_data(self):
        number_of_data = self.dataset.number_of_data()
        first_index = self.dataset.index_of_level(0)
        first_count = self.dataset.number_of_data_in_each_class()[
            first_index[0]:first_index[1]]
        self.model.initial_weight(number_of_data, first_count)
        real_score = 1
        for datas, labels, in self.dataset.generate_batch(0, 3):
            if torch.cuda.is_available():
                datas = datas.cuda()
                labels = labels.cuda()
            datas = Variable(datas, volatile=True)
            labels = Variable(labels, volatile=True)
            f1_macro, f1_micro = self.model.evaluate(datas, labels)
            self.assertAlmostEqual(real_score, f1_macro, 6)
            self.assertAlmostEqual(real_score, f1_micro, 6)
class TestAssemblePredicted(unittest.TestCase):

    def setUp(self):
        self.dataset = Dataset("test", "train", 1, sequence=True)
        self.dataset_validate = Dataset("test", "validate", 1, sequence=True)
        self.dataset_test = Dataset("test", "test", 1, sequence=True)
        doc2vec = TempDoc2vec()
        self.dataset.change_to_Doc2Vec(doc2vec)
        self.dataset_validate.change_to_Doc2Vec(doc2vec)
        self.dataset_test.change_to_Doc2Vec(doc2vec)
        hidden = [5] * self.dataset.number_of_level()
        batch_size = [3] * self.dataset.number_of_level()
        target_hidden = [3] * (self.dataset.number_of_level() - 1)
        self.model = ESLNN(
            "test", self.dataset, self.dataset_validate, self.dataset_test, 30, hidden, target_hidden, stopping_time=3, batch_size=batch_size)
        self.model.classifier[0].dense.weight.data.fill_(1)
        self.model.classifier[0].dense.bias.data.zero_()
        self.model.classifier[0].logit.weight.data.fill_(0.2)
        self.model.classifier[0].logit.bias.data.zero_()

    def test_initial_model(self):
        for i in range(self.dataset.number_of_level()):
            test_model = self.model.classifier[i]
            number_of_class = self.dataset.check_each_number_of_class(i)
            self.assertEqual(test_model.input_size, 7)
            self.assertEqual(test_model.hidden_size, 5)
            self.assertEqual(test_model.number_of_class, number_of_class)

    def test_score_each_level(self):
        f1_macro, f1_micro = self.model.evaluate_each_level(0, "train")
        real_score = 1
        self.assertAlmostEqual(real_score, f1_macro, 6)
        self.assertAlmostEqual(real_score, f1_micro, 6)

    def test_evaluate(self):
        f1_macro, f1_micro, f1_each = self.model.evaluate("train")
        real_score = [1, 4 / 5, 4 / 5, 4 / 5, 1 / 2, 0]
        self.assertAlmostEqual(0.7125, f1_macro, 6)
        for f1, real in zip(f1_each, real_score):
            self.assertAlmostEqual(real, f1[0], 6)
            # self.assertAlmostEqual(real, f1[1], 6)

    def test_train(self):
        # just train successfully
        self.model.train()
        f1_macro, f1_micro = self.model.evaluate_each_level(0, "train")
        real_score = 1
        self.assertAlmostEqual(real_score, f1_macro, 6)
        self.assertAlmostEqual(real_score, f1_micro, 6)
        f1_macro, f1_micro = self.model.evaluate_each_level(1, "train")
        self.assertAlmostEqual(0.0, f1_macro, 6)

    def test_threshold_tuning(self):
        self.model.train()
        self.model.tuning_threshold()
        f1_macro, f1_micro = self.model.evaluate_each_level(0, "train")
        real_score = 1
        self.assertAlmostEqual(real_score, f1_macro, 6)
        self.assertAlmostEqual(real_score, f1_micro, 6)
        f1_macro, f1_micro = self.model.evaluate_each_level(1, "train")
        self.assertAlmostEqual(0.8, f1_macro, 6)
        self.assertAlmostEqual(0.8, f1_micro, 6)

    def test_correction(self):
        test_label = [[0, 0, 0, 1, 0, 0, 0, 0],
                      [0, 0, 0, 0, 0, 0, 0, 1],
                      [0, 0, 0, 0, 0, 1, 0, 0]]
        real_result_label = [[1, 0, 0, 1, 0, 0, 0, 0],
                             [1, 1, 1, 1, 1, 1, 1, 1],
                             [1, 1, 1, 1, 1, 1, 0, 0]]
        torch_label = ByteTensor(test_label)
        result = self.model.child_based_correction(
            torch_label).cpu().numpy().tolist()
        for label, real_label in zip(result, real_result_label):
            self.assertListEqual(
                real_label, label)