def test_generate(self): test1 = CWSSample({'x': '', 'y': []}) test2 = CWSSample({'x': '~ ! @ # $ % ^ & * ( ) _ +', 'y': []}) dataset = Dataset('CWS') dataset.load([test1, test2]) mode = [ 'SwapName', 'CnSwapNum', 'Reduplication', 'CnMLM', 'SwapContraction', 'SwapVerb', 'SwapSyn' ] gene = CWSGenerator(trans_methods=mode, sub_methods=[]) for original_samples, trans_rst, trans_type in gene.generate(dataset): self.assertTrue(len(original_samples) == 0) self.assertTrue(len(trans_rst) == 0) # test wrong trans_methods gene = CWSGenerator(trans_methods=["wrong_transform_method"], sub_methods=[]) self.assertRaises(ValueError, next, gene.generate(dataset)) gene = CWSGenerator(trans_methods=["AddSubtree"], sub_methods=[]) self.assertRaises(ValueError, next, gene.generate(dataset)) gene = CWSGenerator(trans_methods="CnMLM", sub_methods=[]) self.assertRaises(ValueError, next, gene.generate(dataset)) sent1 = '周小明生产一万' sent2 = '央视想朦胧' dataset = Dataset(task='CWS') dataset.load({ 'x': [sent1, sent2], 'y': [['B', 'M', 'E', 'B', 'E', 'B', 'E'], ['B', 'E', 'S', 'B', 'E']] }) gene = CWSGenerator(trans_methods=mode, sub_methods=[]) for original_samples, trans_rst, trans_type in gene.generate(dataset): self.assertTrue(len(original_samples) == len(trans_rst))
class TestDPGenerator(unittest.TestCase): data_samples = [sample, sample_1] dataset = Dataset('DP') dataset.load(data_samples) def test_generate(self): # test task transformation gene = DPGenerator(transformation_methods=["DeleteSubTree"], subpopulation_methods=[]) for original_samples, trans_rst, trans_type in \ gene.generate(self.dataset): self.assertEqual(2, len(original_samples)) for original_sample, transformed_sample in \ zip(original_samples, trans_rst): self.assertTrue( len(original_sample.get_value('x')) != len( transformed_sample.get_value('x'))) transformation_methods = ["DeleteSubTree", "Ocr"] gene = DPGenerator(transformation_methods=transformation_methods, subpopulation_methods=[]) for original_samples, trans_rst, trans_type in \ gene.generate(self.dataset): for ori_sample, trans_sample in zip(original_samples, trans_rst): self.assertTrue(ori_sample != trans_sample) # test wrong transformation_methods gene = DPGenerator(transformation_methods=["wrong_transform_method"], subpopulation_methods=[]) self.assertRaises(ValueError, next, gene.generate(self.dataset)) gene = DPGenerator(transformation_methods=["EntityTyposSwap"], subpopulation_methods=[]) self.assertRaises(ValueError, next, gene.generate(self.dataset)) gene = DPGenerator(transformation_methods="RemoveSubtree", subpopulation_methods=[]) self.assertRaises(ValueError, next, gene.generate(self.dataset)) # test part of UT transformations gene = DPGenerator(transformation_methods=['WordCase'], subpopulation_methods=[]) for original_samples, trans_rst, trans_type \ in gene.generate(self.dataset): self.assertEqual(2, len(original_samples)) for index in range(len(original_samples)): for trans_word, ori_word in \ zip(trans_rst[index].get_words('x'), original_samples[index].get_words('x')): self.assertEqual(trans_word, ori_word.upper()) gene = DPGenerator(transformation_methods=['SwapNum'], subpopulation_methods=[]) for original_samples, trans_rst, trans_type \ in gene.generate(self.dataset): for index in range(len(original_samples)): for trans_word, ori_word in \ zip(trans_rst[index].get_words('x'), original_samples[index].get_words('x')): if ori_word.isdigit(): self.assertTrue(ori_word != trans_word)
def test_generate(self): # test MultiPOSSwap transformation gene = POSGenerator(transformation_methods=["SwapMultiPOS"], subpopulation_methods=[], transformation_config={"SwapMultiPOS": [{"treebank_tag": "NN"}]}) for original_samples, trans_rst, trans_type in gene.generate(dataset): self.assertEqual(2, len(original_samples)) self.assertEqual(2, len(trans_rst)) for index in range(2): self.assertTrue(trans_rst[index].get_mask('x')[-1] == 2) self.assertTrue(trans_rst[index].get_words('x')[-1] != original_samples[index].get_words('x')[-1]) # test PrefixSwap transformation gene = POSGenerator(transformation_methods=['SwapPrefix'], subpopulation_methods=[]) for original_samples, trans_rst, trans_type in gene.generate(dataset): self.assertEqual(2, len(original_samples)) self.assertEqual(2, len(trans_rst)) for index in range(2): self.assertTrue(trans_rst[index].get_mask('x')[-2] == 2) self.assertTrue(trans_rst[index].get_words('x')[-2] != original_samples[index].get_words('x')[-2]) # test wrong transformation_methods gene = POSGenerator(transformation_methods=["wrong_transform_method"], subpopulation_methods=[]) self.assertRaises(ValueError, next, gene.generate(dataset)) gene = POSGenerator(transformation_methods=["AddSubtree"], subpopulation_methods=[]) self.assertRaises(ValueError, next, gene.generate(dataset)) gene = POSGenerator(transformation_methods="OOV", subpopulation_methods=[]) self.assertRaises(ValueError, next, gene.generate(dataset)) # test empty dataset self.assertRaises(ValueError, next, gene.generate(Dataset('POS'))) # test empty sample self.assertRaises(ValueError, next, gene.generate(special_dataset))
{'context': context, 'question': 'Which NFL team represented the ' 'AFC at Super Bowl 50?', 'answers': [{"text": "Denver Broncos", "answer_start": 177}, {"text": "Denver Broncos", "answer_start": 177}, {"text": "Denver Broncos", "answer_start": 177}], 'title': "Super_Bowl_50", 'is_impossible': False}) sample2 = MRCSample( {'context': " ", 'question': 'Which NFL team represented ' 'the AFC at Super Bowl 50?', 'answers': [], 'title': "Super_Bowl_50", 'is_impossible': True}) sample3 = MRCSample( {'context': "! @ # $ % ^ & * ( )", 'question': 'Which NFL team represented the AFC at Super Bowl 50?', 'answers': [], 'title': "Super_Bowl_50", 'is_impossible': True}) dataset = Dataset('MRC') dataset.load(data_sample) dataset.extend([sample2, sample3]) class TestMRCGenerator(unittest.TestCase): def test_generate(self): # test task transformation # TODO, domain transformation addsentdiverse transformation_methods = ["PerturbAnswer", "ModifyPos"] gene = MRCGenerator(transformation_methods=transformation_methods, subpopulation_methods=[]) for original_samples, trans_rst, trans_type in gene.generate(dataset): self.assertEqual(1, len(trans_rst)) for index in range(len(original_samples)):
from textflint.generation_layer.generator.coref_generator import CorefGenerator from textflint.input_layer.dataset import Dataset import unittest from test.data.coref_debug import CorefDebug sample1 = CorefDebug.coref_sample1() sample2 = CorefDebug.coref_sample2() sample3 = CorefDebug.coref_sample3() sample4 = CorefDebug.coref_sample4() sample5 = CorefDebug.coref_sample5() sample6 = CorefDebug.coref_sample6() samples = [sample1, sample2, sample3, sample4, sample5, sample6] dataset = Dataset("COREF") dataset.load(samples) class TestRndRepeat(unittest.TestCase): def test_transform(self): gene = CorefGenerator(trans_methods=["RndRepeat"], sub_methods=[]) for original_samples, trans_rst, trans_type in gene.generate(dataset): self.assertEqual(len(original_samples), len(trans_rst)) for so, st in zip(original_samples, trans_rst): self.assertTrue(so.num_sentences() <= st.num_sentences()) if __name__ == "__main__": unittest.main()
"sentence": "! @ # $ % ^ & * ( )", "term_list": { "35390182#756337#4_0": { "id": "35390182#756337#4_0", "polarity": "positive", "term": "!", "from": 0, "to": 1, "opinion_words": ["@"], "opinion_position": [[2, 3]] } } } data_samples = [sample1, sample2, sample3] dataset = Dataset('ABSA') dataset.load(data_samples) special_samples = [sample4, sample5] special_dataset = Dataset('ABSA') special_dataset.load(special_samples) class TestABSAGenerator(unittest.TestCase): def test_generate(self): # test task transformation transformation_methods = ['RevTgt', 'RevNon', 'AddDiff'] gene = ABSAGenerator(transformation_methods=transformation_methods, subpopulation_methods=[], dataset_config='restaurant')
'obj': [5, 5], 'y': 'employee' } sample4 = {'x': ['', '', ''], 'subj': [0, 0], 'obj': [0, 0], 'y': 'age'} sample5 = { 'x': ['!', '@', '#', '$', '%', '&', '*', '(', ')'], 'subj': [5, 5], 'obj': [6, 6], 'y': 'None' } single_data_sample = [sample1] data_samples = [sample1, sample2, sample3, sample4, sample5] dataset = Dataset('RE') single_dataset = Dataset('RE') dataset.load(data_samples) single_dataset.load(single_data_sample) class TestSpecialEntityTyposSwap(unittest.TestCase): def test_generate(self): # test task transformation trans_methods = ["SwapBirth", "SwapAge"] gene = REGenerator(trans_methods=trans_methods, sub_methods=[]) for original_samples, trans_rst, trans_type in gene.generate(dataset): self.assertEqual(1, len(original_samples)) for index in range(len(original_samples)): self.assertTrue(original_samples[index] != trans_rst[index])
} sample3 = { 'hypothesis': 'There are two little boys smiling.', 'premise': 'Two little boys are smiling and laughing while one is ' 'standing and one is in a bouncy seat', 'y': 'entailment' } sample4 = { 'hypothesis': '! @ # $ % ^ & * ( )', 'premise': '! @ # $ % ^ & * ( )', 'y': 'neutral' } data_samples = [sample1, sample2, sample3, sample4] dataset = Dataset(task='NLI') dataset.load(data_samples) gene = NLIGenerator() class TestNLIGenerator(unittest.TestCase): def test_generate(self): # test task transformation, ignore NliOverlap because it # does't rely on the original data trans_methods = ["SwapAnt", "AddSent", "NumWord"] gene = NLIGenerator(trans_methods=trans_methods, sub_methods=[]) for original_samples, trans_rst, trans_type in gene.generate(dataset): for index in range(len(original_samples)): logger.info(original_samples[index].dump()) logger.info(trans_rst[index].dump())
} sample3 = { 'sentence1': 'There are two little boys smiling.', 'sentence2': 'Two little boys are smiling and laughing ' 'while one is standing and one is in a bouncy seat', 'y': '0' } sample4 = { 'sentence1': '! @ # $ % ^ & * ( )', 'sentence2': '! @ # $ % ^ & * ( )', 'y': '0' } data_samples = [sample1, sample2, sample3, sample4] dataset = Dataset(task='SM') dataset.load(data_samples) gene = SMGenerator() class TestSMGenerator(unittest.TestCase): def test_generate(self): # test task transformation, ignore SmOverlap because # it does't rely on the original data transformation_methods = ["SwapWord", "SwapNum"] gene = SMGenerator(transformation_methods=transformation_methods, subpopulation_methods=[]) for original_samples, trans_rst, trans_type in gene.generate(dataset): for index in range(len(original_samples)): # test whether the sample changed or not self.assertTrue(
import unittest from textflint.input_layer.dataset import Dataset from textflint.generation_layer.generator.sa_generator import SAGenerator sample1 = {'x': 'Titanic is my favorite movie.', 'y': 'pos'} sample2 = {'x': 'I don\'t like the actor Tim Hill', 'y': 'neg'} sample3 = {'x': 'The leading actor is good.', 'y': 'pos'} sample4 = {'x': '', 'y': 'pos'} sample5 = {'x': '!@#$$%^&*()_+}{|":?><', 'y': 'pos'} single_data_sample = [sample1] data_samples = [sample1, sample2, sample3, sample4, sample5] dataset = Dataset('SA') single_dataset = Dataset('SA') dataset.load(data_samples) single_dataset.load(single_data_sample) class TestSpecialEntityTyposSwap(unittest.TestCase): def test_generate(self): # test task transformation trans_methods = ["SwapSpecialEnt", "AddSum", "DoubleDenial", "SwapNum"] SA_config = { 'AddSum': [{ 'entity_type': 'movie' }, { 'entity_type': 'person' }], 'SwapSpecialEnt': [{ 'entity_type': 'movie' }, { 'entity_type': 'person'
'x': ['That', 'is', 'a', 'pretty', 'prefixed', 'survey'], 'y': ['DT', 'VBZ', 'DT', 'RB', 'JJ', 'NN'] } sample2 = { 'x': ['That', 'is', 'a', 'prefixed', 'survey'], 'y': ['DT', 'VBZ', 'DT', 'JJ', 'NN'] } sample3 = {'x': ['', '', ''], 'y': ['O', 'O', 'O']} sample4 = { 'x': '! @ # $ % ^ & * ( )', 'y': ['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O'] } special_data_sample = [sample3, sample4] data_samples = [sample1, sample2] dataset = Dataset('POS') dataset.load(data_samples) special_dataset = Dataset('POS') special_dataset.load(special_data_sample) class TestPOSGenerate(unittest.TestCase): def test_generate(self): # test MultiPOSSwap transformation gene = POSGenerator( trans_methods=["SwapMultiPOS"], sub_methods=[], trans_config={"SwapMultiPOS": [{ "treebank_tag": "NN" }]}) for original_samples, trans_rst, trans_type in gene.generate(dataset):