def test_generate(self): test1 = CWSSample({'x': '', 'y': []}) test2 = CWSSample({'x': '~ ! @ # $ % ^ & * ( ) _ +', 'y': []}) dataset = Dataset('CWS') dataset.load([test1, test2]) mode = [ 'SwapName', 'CnSwapNum', 'Reduplication', 'CnMLM', 'SwapContraction', 'SwapVerb', 'SwapSyn' ] gene = CWSGenerator(trans_methods=mode, sub_methods=[]) for original_samples, trans_rst, trans_type in gene.generate(dataset): self.assertTrue(len(original_samples) == 0) self.assertTrue(len(trans_rst) == 0) # test wrong trans_methods gene = CWSGenerator(trans_methods=["wrong_transform_method"], sub_methods=[]) self.assertRaises(ValueError, next, gene.generate(dataset)) gene = CWSGenerator(trans_methods=["AddSubtree"], sub_methods=[]) self.assertRaises(ValueError, next, gene.generate(dataset)) gene = CWSGenerator(trans_methods="CnMLM", sub_methods=[]) self.assertRaises(ValueError, next, gene.generate(dataset)) sent1 = '周小明生产一万' sent2 = '央视想朦胧' dataset = Dataset(task='CWS') dataset.load({ 'x': [sent1, sent2], 'y': [['B', 'M', 'E', 'B', 'E', 'B', 'E'], ['B', 'E', 'S', 'B', 'E']] }) gene = CWSGenerator(trans_methods=mode, sub_methods=[]) for original_samples, trans_rst, trans_type in gene.generate(dataset): self.assertTrue(len(original_samples) == len(trans_rst))
'AFC at Super Bowl 50?', 'answers': [{"text": "Denver Broncos", "answer_start": 177}, {"text": "Denver Broncos", "answer_start": 177}, {"text": "Denver Broncos", "answer_start": 177}], 'title': "Super_Bowl_50", 'is_impossible': False}) sample2 = MRCSample( {'context': " ", 'question': 'Which NFL team represented ' 'the AFC at Super Bowl 50?', 'answers': [], 'title': "Super_Bowl_50", 'is_impossible': True}) sample3 = MRCSample( {'context': "! @ # $ % ^ & * ( )", 'question': 'Which NFL team represented the AFC at Super Bowl 50?', 'answers': [], 'title': "Super_Bowl_50", 'is_impossible': True}) dataset = Dataset('MRC') dataset.load(data_sample) dataset.extend([sample2, sample3]) class TestMRCGenerator(unittest.TestCase): def test_generate(self): # test task transformation # TODO, domain transformation addsentdiverse transformation_methods = ["PerturbAnswer", "ModifyPos"] gene = MRCGenerator(transformation_methods=transformation_methods, subpopulation_methods=[]) for original_samples, trans_rst, trans_type in gene.generate(dataset): self.assertEqual(1, len(trans_rst)) for index in range(len(original_samples)): ori_sample = original_samples[index]
from textflint.generation_layer.generator.coref_generator import CorefGenerator from textflint.input_layer.dataset import Dataset import unittest from test.data.coref_debug import CorefDebug sample1 = CorefDebug.coref_sample1() sample2 = CorefDebug.coref_sample2() sample3 = CorefDebug.coref_sample3() sample4 = CorefDebug.coref_sample4() sample5 = CorefDebug.coref_sample5() sample6 = CorefDebug.coref_sample6() samples = [sample1, sample2, sample3, sample4, sample5, sample6] dataset = Dataset("COREF") dataset.load(samples) class TestRndRepeat(unittest.TestCase): def test_transform(self): gene = CorefGenerator(trans_methods=["RndRepeat"], sub_methods=[]) for original_samples, trans_rst, trans_type in gene.generate(dataset): self.assertEqual(len(original_samples), len(trans_rst)) for so, st in zip(original_samples, trans_rst): self.assertTrue(so.num_sentences() <= st.num_sentences()) if __name__ == "__main__": unittest.main()
"term_list": { "35390182#756337#4_0": { "id": "35390182#756337#4_0", "polarity": "positive", "term": "!", "from": 0, "to": 1, "opinion_words": ["@"], "opinion_position": [[2, 3]] } } } data_samples = [sample1, sample2, sample3] dataset = Dataset('ABSA') dataset.load(data_samples) special_samples = [sample4, sample5] special_dataset = Dataset('ABSA') special_dataset.load(special_samples) class TestABSAGenerator(unittest.TestCase): def test_generate(self): # test task transformation transformation_methods = ['RevTgt', 'RevNon', 'AddDiff'] gene = ABSAGenerator(transformation_methods=transformation_methods, subpopulation_methods=[], dataset_config='restaurant') for original_samples, trans_rst, trans_type in gene.generate(dataset):
'employee' } sample4 = {'x': ['', '', ''], 'subj': [0, 0], 'obj': [0, 0], 'y': 'age'} sample5 = { 'x': ['!', '@', '#', '$', '%', '&', '*', '(', ')'], 'subj': [5, 5], 'obj': [6, 6], 'y': 'None' } single_data_sample = [sample1] data_samples = [sample1, sample2, sample3, sample4, sample5] dataset = Dataset('RE') single_dataset = Dataset('RE') dataset.load(data_samples) single_dataset.load(single_data_sample) class TestSpecialEntityTyposSwap(unittest.TestCase): def test_generate(self): # test task transformation trans_methods = ["SwapBirth", "SwapAge"] gene = REGenerator(trans_methods=trans_methods, sub_methods=[]) for original_samples, trans_rst, trans_type in gene.generate(dataset): self.assertEqual(1, len(original_samples)) for index in range(len(original_samples)): self.assertTrue(original_samples[index] != trans_rst[index]) # test wrong trans_methods gene = REGenerator(trans_methods=["wrong_transform_method"],