class DatasetDoc2vecUnitTest(unittest.TestCase): def setUp(self): self.dataset = Dataset("test", "train", 1, sequence=True) doc2vec = TempDoc2vec() self.dataset.change_to_Doc2Vec(doc2vec) def test_change_to_Doc2Vec(self): label = self.dataset.labels.toarray().astype(int).tolist() data = self.dataset.datas.tolist() real_data = [ [0, 0, 1, 0, 0, 1, 0], [0, 1, 0, 0, 0, 0, 1], [0, 1, 0, 0, 1, 0, 0], ] real_label = [ [1, 1, 1, 1, 1, 1, 1, 0], [1, 1, 1, 1, 1, 1, 0, 0], [1, 1, 0, 0, 0, 0, 0, 0], ] self.assertListEqual(data, real_data) self.assertListEqual(label, real_label) def test_generate_batch(self): real_label = [[ [1, 1], [1, 1], [1, 1], ], [ [1, 1], [1, 1], [0, 0], ], [ [1, 1, 1, 1, 1, 1, 1, 0], [1, 1, 1, 1, 1, 1, 0, 0], [1, 1, 0, 0, 0, 0, 0, 0], ]] real_data = [ [0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0], [0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], [0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0], ] for i in range(20): for l in range(3): level = l if l == 2: level = -1 label_index = 0 for data, label in self.dataset.generate_batch(level, 1): self.assertSequenceEqual( label.numpy().reshape(-1).tolist(), real_label[l][label_index]) self.assertSequenceEqual( data.numpy().reshape(-1).tolist(), real_data[label_index]) label_index = label_index + 1 def test_number_of_data_in_class(self): real_number = [3, 3, 2, 2, 2, 2, 1, 0] number = self.dataset.number_of_data_in_each_class() self.assertListEqual(real_number, number) def test_size_of_feature(self): size_of_data = self.dataset.size_of_feature() self.assertEqual(7, size_of_data) def test_number_of_each_class(self): self.assertIsInstance( self.dataset.check_each_number_of_class(0), int) self.assertEqual(2, self.dataset.check_each_number_of_class(0)) self.assertEqual(2, self.dataset.check_each_number_of_class(1)) self.assertEqual(1, self.dataset.check_each_number_of_class(5))
class TestEachLevel(unittest.TestCase): def setUp(self): self.model = LCPL_ESLNN_First(7, 5, 2, use_dropout=False) self.model.dense.weight.data.fill_(1) self.model.dense.bias.data.zero_() self.model.logit.weight.data.fill_(0.2) self.model.logit.bias.data.zero_() if torch.cuda.is_available(): self.model = self.model.cuda() self.dataset = Dataset("test", "train", 1, sequence=True) doc2vec = TempDoc2vec() self.dataset.change_to_Doc2Vec(doc2vec) def test_initial_weight(self): number_of_data = self.dataset.number_of_data() count = self.dataset.number_of_data_in_each_class() self.model.initial_weight(number_of_data, count) self.assertListEqual( [1.0, 1.0, 1.5, 1.5, 1.5, 1.5, 3.0, 10000.0], self.model.pos_weight.cpu().numpy().tolist()) first_index = self.dataset.index_of_level(0) first_count = self.dataset.number_of_data_in_each_class()[ first_index[0]:first_index[1]] self.model.initial_weight(number_of_data, first_count) self.assertListEqual( [1.0, 1.0], self.model.pos_weight.cpu().numpy().tolist()) def test_forward(self): real_result = [2.0, 2.0] for datas, _ in self.dataset.generate_batch(0, 1): if torch.cuda.is_available(): datas = datas.cuda() datas = Variable(datas, volatile=True) result = self.model.forward(datas) self.assertListEqual( result.data.cpu().numpy().tolist()[0], real_result) self.assertFalse(result.requires_grad) def test_forward_dropout(self): torch.manual_seed = 12345 self.model.use_dropout = True self.model.initial_structure() self.model.dense.weight.data.fill_(1) self.model.dense.bias.data.zero_() self.model.logit.weight.data.fill_(2) self.model.logit.bias.data.zero_() if torch.cuda.is_available(): self.model = self.model.cuda() self.model.eval() real_result = [20.0, 20.0] for datas, _ in self.dataset.generate_batch(0, 1): if torch.cuda.is_available(): datas = datas.cuda() datas = Variable(datas) result = self.model.forward(datas) self.assertListEqual( result.data.cpu().numpy().tolist()[0], real_result) result = Variable(result.data) self.assertFalse(result.requires_grad) def test_train_data(self): number_of_data = self.dataset.number_of_data() first_index = self.dataset.index_of_level(0) first_count = self.dataset.number_of_data_in_each_class()[ first_index[0]:first_index[1]] self.model.initial_weight(number_of_data, first_count) real_loss = - math.log(1 / (1 + math.exp(-2))) for datas, labels in self.dataset.generate_batch(0, 3): if torch.cuda.is_available(): datas = datas.cuda() labels = labels.cuda() datas = Variable(datas) labels = Variable(labels) loss = self.model.train_model(datas, labels) self.assertAlmostEqual(real_loss, loss, 5) def test_eval_data(self): number_of_data = self.dataset.number_of_data() first_index = self.dataset.index_of_level(0) first_count = self.dataset.number_of_data_in_each_class()[ first_index[0]:first_index[1]] self.model.initial_weight(number_of_data, first_count) real_score = 1 for datas, labels, in self.dataset.generate_batch(0, 3): if torch.cuda.is_available(): datas = datas.cuda() labels = labels.cuda() datas = Variable(datas, volatile=True) labels = Variable(labels, volatile=True) f1_macro, f1_micro = self.model.evaluate(datas, labels) self.assertAlmostEqual(real_score, f1_macro, 6) self.assertAlmostEqual(real_score, f1_micro, 6)
class TestAssemblePredicted(unittest.TestCase): def setUp(self): self.dataset = Dataset("test", "train", 1, sequence=True) self.dataset_validate = Dataset("test", "validate", 1, sequence=True) self.dataset_test = Dataset("test", "test", 1, sequence=True) doc2vec = TempDoc2vec() self.dataset.change_to_Doc2Vec(doc2vec) self.dataset_validate.change_to_Doc2Vec(doc2vec) self.dataset_test.change_to_Doc2Vec(doc2vec) hidden = [5] * self.dataset.number_of_level() batch_size = [3] * self.dataset.number_of_level() target_hidden = [3] * (self.dataset.number_of_level() - 1) self.model = ESLNN( "test", self.dataset, self.dataset_validate, self.dataset_test, 30, hidden, target_hidden, stopping_time=3, batch_size=batch_size) self.model.classifier[0].dense.weight.data.fill_(1) self.model.classifier[0].dense.bias.data.zero_() self.model.classifier[0].logit.weight.data.fill_(0.2) self.model.classifier[0].logit.bias.data.zero_() def test_initial_model(self): for i in range(self.dataset.number_of_level()): test_model = self.model.classifier[i] number_of_class = self.dataset.check_each_number_of_class(i) self.assertEqual(test_model.input_size, 7) self.assertEqual(test_model.hidden_size, 5) self.assertEqual(test_model.number_of_class, number_of_class) def test_score_each_level(self): f1_macro, f1_micro = self.model.evaluate_each_level(0, "train") real_score = 1 self.assertAlmostEqual(real_score, f1_macro, 6) self.assertAlmostEqual(real_score, f1_micro, 6) def test_evaluate(self): f1_macro, f1_micro, f1_each = self.model.evaluate("train") real_score = [1, 4 / 5, 4 / 5, 4 / 5, 1 / 2, 0] self.assertAlmostEqual(0.7125, f1_macro, 6) for f1, real in zip(f1_each, real_score): self.assertAlmostEqual(real, f1[0], 6) # self.assertAlmostEqual(real, f1[1], 6) def test_train(self): # just train successfully self.model.train() f1_macro, f1_micro = self.model.evaluate_each_level(0, "train") real_score = 1 self.assertAlmostEqual(real_score, f1_macro, 6) self.assertAlmostEqual(real_score, f1_micro, 6) f1_macro, f1_micro = self.model.evaluate_each_level(1, "train") self.assertAlmostEqual(0.0, f1_macro, 6) def test_threshold_tuning(self): self.model.train() self.model.tuning_threshold() f1_macro, f1_micro = self.model.evaluate_each_level(0, "train") real_score = 1 self.assertAlmostEqual(real_score, f1_macro, 6) self.assertAlmostEqual(real_score, f1_micro, 6) f1_macro, f1_micro = self.model.evaluate_each_level(1, "train") self.assertAlmostEqual(0.8, f1_macro, 6) self.assertAlmostEqual(0.8, f1_micro, 6) def test_correction(self): test_label = [[0, 0, 0, 1, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 1], [0, 0, 0, 0, 0, 1, 0, 0]] real_result_label = [[1, 0, 0, 1, 0, 0, 0, 0], [1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 0, 0]] torch_label = ByteTensor(test_label) result = self.model.child_based_correction( torch_label).cpu().numpy().tolist() for label, real_label in zip(result, real_result_label): self.assertListEqual( real_label, label)