def test_generate(self): test1 = CWSSample({'x': '', 'y': []}) test2 = CWSSample({'x': '~ ! @ # $ % ^ & * ( ) _ +', 'y': []}) dataset = Dataset('CWS') dataset.load([test1, test2]) mode = [ 'SwapName', 'CnSwapNum', 'Reduplication', 'CnMLM', 'SwapContraction', 'SwapVerb', 'SwapSyn' ] gene = CWSGenerator(trans_methods=mode, sub_methods=[]) for original_samples, trans_rst, trans_type in gene.generate(dataset): self.assertTrue(len(original_samples) == 0) self.assertTrue(len(trans_rst) == 0) # test wrong trans_methods gene = CWSGenerator(trans_methods=["wrong_transform_method"], sub_methods=[]) self.assertRaises(ValueError, next, gene.generate(dataset)) gene = CWSGenerator(trans_methods=["AddSubtree"], sub_methods=[]) self.assertRaises(ValueError, next, gene.generate(dataset)) gene = CWSGenerator(trans_methods="CnMLM", sub_methods=[]) self.assertRaises(ValueError, next, gene.generate(dataset)) sent1 = '周小明生产一万' sent2 = '央视想朦胧' dataset = Dataset(task='CWS') dataset.load({ 'x': [sent1, sent2], 'y': [['B', 'M', 'E', 'B', 'E', 'B', 'E'], ['B', 'E', 'S', 'B', 'E']] }) gene = CWSGenerator(trans_methods=mode, sub_methods=[]) for original_samples, trans_rst, trans_type in gene.generate(dataset): self.assertTrue(len(original_samples) == len(trans_rst))
def test_transform(self): trans_data = swap_ins.transform(data_sample) self.assertTrue(len(trans_data) == 1) self.assertEqual('小明喜欢看电影。', trans_data[0].get_value('x')) self.assertEqual(['B', 'E', 'B', 'E', 'S', 'B', 'E', 'S'], trans_data[0].get_value('y')) self.assertTrue([0, 0, 0, 0, 0, 2, 2, 0] == trans_data[0].mask) trans_data = swap_ins.transform( CWSSample({'x': '玩具厂 大量 生产 玩具 。', 'y': []})) self.assertEqual(1, len(trans_data))
import unittest from textflint.generation_layer.transformation.CWS.swap_contraction \ import SwapContraction from textflint.input_layer.component.sample.cws_sample import CWSSample sent1 = '来自 央视 报道 。' data_sample = CWSSample({'x': sent1, 'y': []}) swap_ins = SwapContraction() class TestSwapContraction(unittest.TestCase): def test_get_transformations(self): self.assertTrue( ([[2, 4]], [['中央电视台']], [['B', 'M', 'M', 'M', 'E']] ) == swap_ins._get_transformations(data_sample.get_words())) self.assertRaises(AssertionError, swap_ins._get_transformations, sent1) self.assertRaises(AssertionError, swap_ins._get_transformations, '') self.assertTrue(swap_ins._get_transformations([]) == ([], [], [])) def test_transformation(self): trans_sample = swap_ins.transform(data_sample) self.assertTrue(len(trans_sample) == 1) trans_sample = trans_sample[0] self.assertEqual([0, 0, 2, 2, 2, 2, 2, 0, 0, 0], trans_sample.mask) self.assertEqual('来自中央电视台报道。', trans_sample.get_value('x')) self.assertEqual(['B', 'E', 'B', 'M', 'M', 'M', 'E', 'B', 'E', 'S'], trans_sample.get_value('y')) if __name__ == "__main__":
import unittest from textflint.input_layer.component.sample.cws_sample import CWSSample from textflint.generation_layer.transformation.CWS.reduplication \ import Reduplication sent1 = '朦胧的月色' sample = CWSSample({'x': sent1, 'y': ['B', 'E', 'S', 'B', 'E']}) swap_ins = Reduplication() class TestReduplication(unittest.TestCase): def test_transformation(self): trans_sample = swap_ins.transform(sample) self.assertTrue(1 == len(trans_sample)) trans_sample = trans_sample[0] self.assertEqual('朦朦胧胧的月色', trans_sample.get_value('x')) self.assertEqual(['B', 'M', 'M', 'E', 'S', 'B', 'E'], trans_sample.get_value('y')) self.assertEqual([2, 2, 2, 2, 0, 0, 0], trans_sample.mask) if __name__ == "__main__": unittest.main()