def test_generate(self): # test MultiPOSSwap transformation gene = POSGenerator( transformation_methods=["SwapMultiPOS"], subpopulation_methods=[], transformation_config={"SwapMultiPOS": [{ "treebank_tag": "NN" }]}) for original_samples, trans_rst, trans_type in gene.generate(dataset): self.assertEqual(2, len(original_samples)) self.assertEqual(2, len(trans_rst)) for index in range(2): self.assertTrue(trans_rst[index].get_mask('x')[-1] == 2) self.assertTrue(trans_rst[index].get_words('x')[-1] != original_samples[index].get_words('x')[-1]) # test PrefixSwap transformation gene = POSGenerator(transformation_methods=['SwapPrefix'], subpopulation_methods=[]) for original_samples, trans_rst, trans_type in gene.generate(dataset): self.assertEqual(2, len(original_samples)) self.assertEqual(2, len(trans_rst)) for index in range(2): self.assertTrue(trans_rst[index].get_mask('x')[-2] == 2) self.assertTrue(trans_rst[index].get_words('x')[-2] != original_samples[index].get_words('x')[-2]) # test wrong transformation_methods gene = POSGenerator(transformation_methods=["wrong_transform_method"], subpopulation_methods=[]) self.assertRaises(ValueError, next, gene.generate(dataset)) gene = POSGenerator(transformation_methods=["AddSubtree"], subpopulation_methods=[]) self.assertRaises(ValueError, next, gene.generate(dataset)) gene = POSGenerator(transformation_methods="OOV", subpopulation_methods=[]) self.assertRaises(ValueError, next, gene.generate(dataset)) # test empty dataset self.assertRaises(ValueError, next, gene.generate(Dataset('POS'))) # test empty sample self.assertRaises(ValueError, next, gene.generate(special_dataset))
def test_generate(self): test1 = CWSSample({'x': '', 'y': []}) test2 = CWSSample({'x': '~ ! @ # $ % ^ & * ( ) _ +', 'y': []}) dataset = Dataset('CWS') dataset.load([test1, test2]) mode = [ 'SwapName', 'CnSwapNum', 'Reduplication', 'CnMLM', 'SwapContraction', 'SwapVerb', 'SwapSyn' ] gene = CWSGenerator(transformation_methods=mode, subpopulation_methods=[]) for original_samples, trans_rst, trans_type in gene.generate(dataset): self.assertTrue(len(original_samples) == 0) self.assertTrue(len(trans_rst) == 0) # test wrong transformation_methods gene = CWSGenerator(transformation_methods=["wrong_transform_method"], subpopulation_methods=[]) self.assertRaises(ValueError, next, gene.generate(dataset)) gene = CWSGenerator(transformation_methods=["AddSubtree"], subpopulation_methods=[]) self.assertRaises(ValueError, next, gene.generate(dataset)) gene = CWSGenerator(transformation_methods="CnMLM", subpopulation_methods=[]) self.assertRaises(ValueError, next, gene.generate(dataset)) sent1 = '周小明生产一万' sent2 = '央视想朦胧' dataset = Dataset(task='CWS') dataset.load({ 'x': [sent1, sent2], 'y': [['B', 'M', 'E', 'B', 'E', 'B', 'E'], ['B', 'E', 'S', 'B', 'E']] }) gene = CWSGenerator(transformation_methods=mode, subpopulation_methods=[]) for original_samples, trans_rst, trans_type in gene.generate(dataset): self.assertTrue(len(original_samples) == len(trans_rst))
{'context': context, 'question': 'Which NFL team represented the ' 'AFC at Super Bowl 50?', 'answers': [{"text": "Denver Broncos", "answer_start": 177}, {"text": "Denver Broncos", "answer_start": 177}, {"text": "Denver Broncos", "answer_start": 177}], 'title': "Super_Bowl_50", 'is_impossible': False}) sample2 = MRCSample( {'context': " ", 'question': 'Which NFL team represented ' 'the AFC at Super Bowl 50?', 'answers': [], 'title': "Super_Bowl_50", 'is_impossible': True}) sample3 = MRCSample( {'context': "! @ # $ % ^ & * ( )", 'question': 'Which NFL team represented the AFC at Super Bowl 50?', 'answers': [], 'title': "Super_Bowl_50", 'is_impossible': True}) dataset = Dataset('MRC') dataset.load(data_sample) dataset.extend([sample2, sample3]) class TestMRCGenerator(unittest.TestCase): def test_generate(self): # test task transformation # TODO, domain transformation addsentdiverse transformation_methods = ["PerturbAnswer", "ModifyPos"] gene = MRCGenerator(transformation_methods=transformation_methods, subpopulation_methods=[]) for original_samples, trans_rst, trans_type in gene.generate(dataset): self.assertEqual(1, len(trans_rst)) for index in range(len(original_samples)):
from TextFlint.generation_layer.generator.coref_generator import CorefGenerator from TextFlint.input_layer.dataset import Dataset import unittest from ....data.coref_debug import CorefDebug sample1 = CorefDebug.coref_sample1() sample2 = CorefDebug.coref_sample2() sample3 = CorefDebug.coref_sample3() sample4 = CorefDebug.coref_sample4() sample5 = CorefDebug.coref_sample5() sample6 = CorefDebug.coref_sample6() samples = [sample1, sample2, sample3, sample4, sample5, sample6] dataset = Dataset("COREF") dataset.load(samples) class TestRndShuffle(unittest.TestCase): def test_transform(self): gene = CorefGenerator(transformation_methods=["RndShuffle"], subpopulation_methods=[]) for original_samples, trans_rst, trans_type in gene.generate(dataset): self.assertEqual(len(original_samples), len(trans_rst)) for so, st in zip(original_samples, trans_rst): self.assertTrue(so.num_sentences() == st.num_sentences()) if __name__ == "__main__": unittest.main()
import unittest from TextFlint.input_layer.dataset import Dataset from TextFlint.generation_layer.generator.ner_generator import NERGenerator sample1 = {'x': 'Amy lives in a city , which is called NYK .', 'y': ['B-PER', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-LOC', 'O']} sample2 = {'x': 'Jotion lives in Xian 105 kilometers away .', 'y': ['B-PER', 'O', 'O', 'B-LOC', 'O', 'O', 'O', 'O']} sample3 = {'x': 'China rejects Syrians call to boycott Chinese lamb .', 'y': ['B-ORG', 'O', 'B-MISC', 'O', 'O', 'O', 'B-MISC', 'O', 'O']} single_data_sample = [sample1] data_samples = [sample1, sample2, sample3] dataset = Dataset('NER') single_dataset = Dataset('NER') dataset.load(data_samples) single_dataset.load(single_data_sample) gene = NERGenerator() class TestSpecialEntityTyposSwap(unittest.TestCase): def test_generate(self): # test task transformation transformation_methods = ["SwapEnt", "EntTypos"] gene = NERGenerator(transformation_methods=transformation_methods, subpopulation_methods=[]) for original_samples, trans_rst, trans_type in gene.generate(dataset): self.assertEqual(3, len(original_samples)) for index in range(len(original_samples)): for ori_entity, trans_entity in \
import unittest from TextFlint.input_layer.dataset import Dataset from TextFlint.generation_layer.generator.sa_generator import SAGenerator sample1 = {'x': 'Titanic is my favorite movie.', 'y': 'pos'} sample2 = {'x': 'I don\'t like the actor Tim Hill', 'y': 'neg'} sample3 = {'x': 'The leading actor is good.', 'y': 'pos'} sample4 = {'x': '', 'y': 'pos'} sample5 = {'x': '!@#$$%^&*()_+}{|":?><', 'y': 'pos'} single_data_sample = [sample1] data_samples = [sample1, sample2, sample3, sample4, sample5] dataset = Dataset('SA') single_dataset = Dataset('SA') dataset.load(data_samples) single_dataset.load(single_data_sample) class TestSpecialEntityTyposSwap(unittest.TestCase): def test_generate(self): # test task transformation transformation_methods = ["SwapSpecialEnt", "AddSum", "DoubleDenial", "SwapNum"] SA_config = {'AddSum': [{'entity_type': 'movie'}, {'entity_type': 'person'}], 'SwapSpecialEnt': [{'entity_type': 'movie'}, {'entity_type': 'person'}]}
'x': ['That', 'is', 'a', 'pretty', 'prefixed', 'survey'], 'y': ['DT', 'VBZ', 'DT', 'RB', 'JJ', 'NN'] } sample2 = { 'x': ['That', 'is', 'a', 'prefixed', 'survey'], 'y': ['DT', 'VBZ', 'DT', 'JJ', 'NN'] } sample3 = {'x': ['', '', ''], 'y': ['O', 'O', 'O']} sample4 = { 'x': '! @ # $ % ^ & * ( )', 'y': ['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O'] } special_data_sample = [sample3, sample4] data_samples = [sample1, sample2] dataset = Dataset('POS') dataset.load(data_samples) special_dataset = Dataset('POS') special_dataset.load(special_data_sample) class TestPOSGenerate(unittest.TestCase): def test_generate(self): # test MultiPOSSwap transformation gene = POSGenerator( transformation_methods=["SwapMultiPOS"], subpopulation_methods=[], transformation_config={"SwapMultiPOS": [{ "treebank_tag": "NN" }]}) for original_samples, trans_rst, trans_type in gene.generate(dataset):
} sample3 = { 'hypothesis': 'There are two little boys smiling.', 'premise': 'Two little boys are smiling and laughing while one is ' 'standing and one is in a bouncy seat', 'y': 'entailment' } sample4 = { 'hypothesis': '! @ # $ % ^ & * ( )', 'premise': '! @ # $ % ^ & * ( )', 'y': 'neutral' } data_samples = [sample1, sample2, sample3, sample4] dataset = Dataset(task='NLI') dataset.load(data_samples) gene = NLIGenerator() class TestNLIGenerator(unittest.TestCase): def test_generate(self): # test task transformation, ignore NliOverlap because it # does't rely on the original data transformation_methods = ["SwapAnt", "AddSent", "NumWord"] gene = NLIGenerator(transformation_methods=transformation_methods, subpopulation_methods=[]) for original_samples, trans_rst, trans_type in gene.generate(dataset): for index in range(len(original_samples)): logger.info(original_samples[index].dump())
import unittest from TextFlint.input_layer.dataset import Dataset from TextFlint.generation_layer.generator.dp_generator import DPGenerator, sample, sample_1 single_data_sample = [sample] data_samples = [sample, sample_1] dataset = Dataset('DP') dataset.load(data_samples) gene = DPGenerator() class TestDPGenerator(unittest.TestCase): def test_generate(self): # test task transformation gene = DPGenerator(transformation_methods=["DeleteSubTree"], subpopulation_methods=[]) for original_samples, trans_rst, trans_type in gene.generate(dataset): self.assertEqual(2, len(original_samples)) for original_sample, transformed_sample in \ zip(original_samples, trans_rst): self.assertTrue( len(original_sample.get_value('x')) != len( transformed_sample.get_value('x'))) transformation_methods = ["DeleteSubTree", "Ocr"] gene = DPGenerator(transformation_methods=transformation_methods, subpopulation_methods=[]) for original_samples, trans_rst, trans_type in gene.generate(dataset): for ori_sample, trans_sample in zip(original_samples, trans_rst): self.assertTrue(ori_sample != trans_sample)
"sentence": "! @ # $ % ^ & * ( )", "term_list": { "35390182#756337#4_0": { "id": "35390182#756337#4_0", "polarity": "positive", "term": "!", "from": 0, "to": 1, "opinion_words": ["@"], "opinion_position": [[2, 3]] } } } data_samples = [sample1, sample2, sample3] dataset = Dataset('ABSA') dataset.load(data_samples) special_samples = [sample4, sample5] special_dataset = Dataset('ABSA') special_dataset.load(special_samples) class TestABSAGenerator(unittest.TestCase): def test_generate(self): # test task transformation transformation_methods = ['RevTgt', 'RevNon', 'AddDiff'] gene = ABSAGenerator(transformation_methods=transformation_methods, subpopulation_methods=[], dataset_config='restaurant')
'sentence2': 'Mr zhang has 20 students', 'y': '0'} sample2 = {'sentence1': 'I like eating apples', 'sentence2': 'I love to eat apples', 'y': '1'} sample3 = {'sentence1': 'There are two little boys smiling.', 'sentence2': 'Two little boys are smiling and laughing ' 'while one is standing and one is in a bouncy seat', 'y': '0'} sample4 = {'sentence1': '! @ # $ % ^ & * ( )', 'sentence2': '! @ # $ % ^ & * ( )', 'y': '0'} data_samples = [sample1, sample2, sample3, sample4] dataset = Dataset(task='SM') dataset.load(data_samples) gene = SMGenerator() class TestSMGenerator(unittest.TestCase): def test_generate(self): # test task transformation, ignore SmOverlap because # it does't rely on the original data transformation_methods = ["SwapWord", "SwapNum"] gene = SMGenerator(transformation_methods=transformation_methods, subpopulation_methods=[]) for original_samples, trans_rst, trans_type in gene.generate(dataset): for index in range(len(original_samples)): # test whether the sample changed or not