class DatasetDoc2vecUnitTest(unittest.TestCase): def setUp(self): self.dataset = Dataset("test", "train", 1, sequence=True) doc2vec = TempDoc2vec() self.dataset.change_to_Doc2Vec(doc2vec) def test_change_to_Doc2Vec(self): label = self.dataset.labels.toarray().astype(int).tolist() data = self.dataset.datas.tolist() real_data = [ [0, 0, 1, 0, 0, 1, 0], [0, 1, 0, 0, 0, 0, 1], [0, 1, 0, 0, 1, 0, 0], ] real_label = [ [1, 1, 1, 1, 1, 1, 1, 0], [1, 1, 1, 1, 1, 1, 0, 0], [1, 1, 0, 0, 0, 0, 0, 0], ] self.assertListEqual(data, real_data) self.assertListEqual(label, real_label) def test_generate_batch(self): real_label = [[ [1, 1], [1, 1], [1, 1], ], [ [1, 1], [1, 1], [0, 0], ], [ [1, 1, 1, 1, 1, 1, 1, 0], [1, 1, 1, 1, 1, 1, 0, 0], [1, 1, 0, 0, 0, 0, 0, 0], ]] real_data = [ [0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0], [0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0], [0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0], ] for i in range(20): for l in range(3): level = l if l == 2: level = -1 label_index = 0 for data, label in self.dataset.generate_batch(level, 1): self.assertSequenceEqual( label.numpy().reshape(-1).tolist(), real_label[l][label_index]) self.assertSequenceEqual( data.numpy().reshape(-1).tolist(), real_data[label_index]) label_index = label_index + 1 def test_number_of_data_in_class(self): real_number = [3, 3, 2, 2, 2, 2, 1, 0] number = self.dataset.number_of_data_in_each_class() self.assertListEqual(real_number, number) def test_size_of_feature(self): size_of_data = self.dataset.size_of_feature() self.assertEqual(7, size_of_data) def test_number_of_each_class(self): self.assertIsInstance( self.dataset.check_each_number_of_class(0), int) self.assertEqual(2, self.dataset.check_each_number_of_class(0)) self.assertEqual(2, self.dataset.check_each_number_of_class(1)) self.assertEqual(1, self.dataset.check_each_number_of_class(5))
class TestEachLevel(unittest.TestCase): def setUp(self): self.model = LCPL_ESLNN_First(7, 5, 2, use_dropout=False) self.model.dense.weight.data.fill_(1) self.model.dense.bias.data.zero_() self.model.logit.weight.data.fill_(0.2) self.model.logit.bias.data.zero_() if torch.cuda.is_available(): self.model = self.model.cuda() self.dataset = Dataset("test", "train", 1, sequence=True) doc2vec = TempDoc2vec() self.dataset.change_to_Doc2Vec(doc2vec) def test_initial_weight(self): number_of_data = self.dataset.number_of_data() count = self.dataset.number_of_data_in_each_class() self.model.initial_weight(number_of_data, count) self.assertListEqual( [1.0, 1.0, 1.5, 1.5, 1.5, 1.5, 3.0, 10000.0], self.model.pos_weight.cpu().numpy().tolist()) first_index = self.dataset.index_of_level(0) first_count = self.dataset.number_of_data_in_each_class()[ first_index[0]:first_index[1]] self.model.initial_weight(number_of_data, first_count) self.assertListEqual( [1.0, 1.0], self.model.pos_weight.cpu().numpy().tolist()) def test_forward(self): real_result = [2.0, 2.0] for datas, _ in self.dataset.generate_batch(0, 1): if torch.cuda.is_available(): datas = datas.cuda() datas = Variable(datas, volatile=True) result = self.model.forward(datas) self.assertListEqual( result.data.cpu().numpy().tolist()[0], real_result) self.assertFalse(result.requires_grad) def test_forward_dropout(self): torch.manual_seed = 12345 self.model.use_dropout = True self.model.initial_structure() self.model.dense.weight.data.fill_(1) self.model.dense.bias.data.zero_() self.model.logit.weight.data.fill_(2) self.model.logit.bias.data.zero_() if torch.cuda.is_available(): self.model = self.model.cuda() self.model.eval() real_result = [20.0, 20.0] for datas, _ in self.dataset.generate_batch(0, 1): if torch.cuda.is_available(): datas = datas.cuda() datas = Variable(datas) result = self.model.forward(datas) self.assertListEqual( result.data.cpu().numpy().tolist()[0], real_result) result = Variable(result.data) self.assertFalse(result.requires_grad) def test_train_data(self): number_of_data = self.dataset.number_of_data() first_index = self.dataset.index_of_level(0) first_count = self.dataset.number_of_data_in_each_class()[ first_index[0]:first_index[1]] self.model.initial_weight(number_of_data, first_count) real_loss = - math.log(1 / (1 + math.exp(-2))) for datas, labels in self.dataset.generate_batch(0, 3): if torch.cuda.is_available(): datas = datas.cuda() labels = labels.cuda() datas = Variable(datas) labels = Variable(labels) loss = self.model.train_model(datas, labels) self.assertAlmostEqual(real_loss, loss, 5) def test_eval_data(self): number_of_data = self.dataset.number_of_data() first_index = self.dataset.index_of_level(0) first_count = self.dataset.number_of_data_in_each_class()[ first_index[0]:first_index[1]] self.model.initial_weight(number_of_data, first_count) real_score = 1 for datas, labels, in self.dataset.generate_batch(0, 3): if torch.cuda.is_available(): datas = datas.cuda() labels = labels.cuda() datas = Variable(datas, volatile=True) labels = Variable(labels, volatile=True) f1_macro, f1_micro = self.model.evaluate(datas, labels) self.assertAlmostEqual(real_score, f1_macro, 6) self.assertAlmostEqual(real_score, f1_micro, 6)
class DatasetUnitTest(unittest.TestCase): def setUp(self): self.dataset_train = Dataset(data_name="test", mode="train", fold_number=1, sequence=True) self.dataset_validate = Dataset(data_name="test", mode="validate", fold_number=1, sequence=True) self.dataset_test = Dataset(data_name="test", mode="test", fold_number=1, sequence=True) def test_hierarchy(self): real_all_name = ['1', '2', '3', '4', '5', '6', '7', '8'] real_hierarchy = { 0: set([2, 3]), 1: set([4, 6]), 2: set([5]), 3: set([4]), 4: set([5]), 5: set([6]), 6: set([7]) } real_parent_of = { 2: set([0]), 3: set([0]), 4: set([1, 3]), 5: set([2, 4]), 6: set([1, 5]), 7: set([6]) } real_name_to_index = { '1': 0, '2': 1, '3': 2, '4': 3, '5': 4, '6': 5, '7': 6, '8': 7 } real_level = [0, 2, 4, 5, 6, 7, 8] self.assertSequenceEqual(real_hierarchy, self.dataset_train.hierarchy) self.assertSequenceEqual(real_parent_of, self.dataset_train.parent_of) self.assertSequenceEqual(real_all_name, self.dataset_train.all_name) self.assertSequenceEqual(real_name_to_index, self.dataset_train.name_to_index) self.assertSequenceEqual(real_level, self.dataset_train.level.tolist()) def test_load_data(self): file_name = "test/data.txt" datas, labels = prep.import_data(file_name) hierarchy_file_name = "test/hierarchy.pickle" labels = prep.map_index_of_label(hierarchy_file_name, labels) train = self.dataset_train.datas validate = self.dataset_validate.datas test = self.dataset_test.datas train_label = self.dataset_train.labels validate_label = self.dataset_validate.labels test_label = self.dataset_test.labels fold_datas = np.concatenate([train, validate, test]) fold_labels = np.concatenate([train_label, validate_label, test_label]) self.assertListEqual(sorted(fold_datas.tolist()), sorted(datas)) a = sorted(map(list, fold_labels.tolist())) b = sorted(map(list, labels)) self.assertListEqual(a, b) def test_cant_use_generate_batch(self): with self.assertRaises(NotEmbeddingState): for _ in self.dataset_train.generate_batch(0, 1): pass def test_number_of_each_class(self): self.assertIsInstance(self.dataset_train.check_each_number_of_class(0), int) self.assertEqual(2, self.dataset_train.check_each_number_of_class(0)) self.assertEqual(2, self.dataset_train.check_each_number_of_class(1)) self.assertEqual(1, self.dataset_train.check_each_number_of_class(5)) def test_number_of_level(self): self.assertEqual(6, self.dataset_train.number_of_level())