예제 #1
0
    def test_generate(self):
        test1 = CWSSample({'x': '', 'y': []})
        test2 = CWSSample({'x': '~ ! @ # $ % ^ & * ( ) _ +', 'y': []})
        dataset = Dataset('CWS')
        dataset.load([test1, test2])
        mode = [
            'SwapName', 'CnSwapNum', 'Reduplication', 'CnMLM',
            'SwapContraction', 'SwapVerb', 'SwapSyn'
        ]
        gene = CWSGenerator(trans_methods=mode, sub_methods=[])
        for original_samples, trans_rst, trans_type in gene.generate(dataset):
            self.assertTrue(len(original_samples) == 0)
            self.assertTrue(len(trans_rst) == 0)

        # test wrong trans_methods
        gene = CWSGenerator(trans_methods=["wrong_transform_method"],
                            sub_methods=[])
        self.assertRaises(ValueError, next, gene.generate(dataset))
        gene = CWSGenerator(trans_methods=["AddSubtree"], sub_methods=[])
        self.assertRaises(ValueError, next, gene.generate(dataset))
        gene = CWSGenerator(trans_methods="CnMLM", sub_methods=[])
        self.assertRaises(ValueError, next, gene.generate(dataset))

        sent1 = '周小明生产一万'
        sent2 = '央视想朦胧'
        dataset = Dataset(task='CWS')
        dataset.load({
            'x': [sent1, sent2],
            'y': [['B', 'M', 'E', 'B', 'E', 'B', 'E'],
                  ['B', 'E', 'S', 'B', 'E']]
        })

        gene = CWSGenerator(trans_methods=mode, sub_methods=[])
        for original_samples, trans_rst, trans_type in gene.generate(dataset):
            self.assertTrue(len(original_samples) == len(trans_rst))
예제 #2
0
                                     'AFC at Super Bowl 50?',
        'answers': [{"text": "Denver Broncos", "answer_start": 177},
                    {"text": "Denver Broncos", "answer_start": 177},
                    {"text": "Denver Broncos", "answer_start": 177}],
        'title': "Super_Bowl_50", 'is_impossible': False})
sample2 = MRCSample(
    {'context': " ", 'question': 'Which NFL team represented '
                                 'the AFC at Super Bowl 50?',
        'answers': [], 'title': "Super_Bowl_50", 'is_impossible': True})
sample3 = MRCSample(
    {'context': "! @ # $ % ^ & * ( )",
     'question': 'Which NFL team represented the AFC at Super Bowl 50?',
        'answers': [], 'title': "Super_Bowl_50", 'is_impossible': True})

dataset = Dataset('MRC')
dataset.load(data_sample)
dataset.extend([sample2, sample3])


class TestMRCGenerator(unittest.TestCase):

    def test_generate(self):
        # test task transformation
        # TODO, domain transformation addsentdiverse
        transformation_methods = ["PerturbAnswer", "ModifyPos"]
        gene = MRCGenerator(transformation_methods=transformation_methods,
                            subpopulation_methods=[])
        for original_samples, trans_rst, trans_type in gene.generate(dataset):
            self.assertEqual(1, len(trans_rst))
            for index in range(len(original_samples)):
                ori_sample = original_samples[index]
예제 #3
0
from textflint.generation_layer.generator.coref_generator import CorefGenerator
from textflint.input_layer.dataset import Dataset
import unittest
from test.data.coref_debug import CorefDebug

sample1 = CorefDebug.coref_sample1()
sample2 = CorefDebug.coref_sample2()
sample3 = CorefDebug.coref_sample3()
sample4 = CorefDebug.coref_sample4()
sample5 = CorefDebug.coref_sample5()
sample6 = CorefDebug.coref_sample6()
samples = [sample1, sample2, sample3, sample4, sample5, sample6]
dataset = Dataset("COREF")
dataset.load(samples)

class TestRndRepeat(unittest.TestCase):

    def test_transform(self):
        gene = CorefGenerator(trans_methods=["RndRepeat"],
                              sub_methods=[])
        for original_samples, trans_rst, trans_type in gene.generate(dataset):
            self.assertEqual(len(original_samples), len(trans_rst))  
            for so, st in zip(original_samples, trans_rst):
                self.assertTrue(so.num_sentences() <= st.num_sentences()) 


if __name__ == "__main__":
    unittest.main()
예제 #4
0
    "term_list": {
        "35390182#756337#4_0": {
            "id": "35390182#756337#4_0",
            "polarity": "positive",
            "term": "!",
            "from": 0,
            "to": 1,
            "opinion_words": ["@"],
            "opinion_position": [[2, 3]]
        }
    }
}

data_samples = [sample1, sample2, sample3]
dataset = Dataset('ABSA')
dataset.load(data_samples)

special_samples = [sample4, sample5]
special_dataset = Dataset('ABSA')
special_dataset.load(special_samples)


class TestABSAGenerator(unittest.TestCase):
    def test_generate(self):
        # test task transformation
        transformation_methods = ['RevTgt', 'RevNon', 'AddDiff']
        gene = ABSAGenerator(transformation_methods=transformation_methods,
                             subpopulation_methods=[],
                             dataset_config='restaurant')

        for original_samples, trans_rst, trans_type in gene.generate(dataset):
예제 #5
0
    'employee'
}

sample4 = {'x': ['', '', ''], 'subj': [0, 0], 'obj': [0, 0], 'y': 'age'}
sample5 = {
    'x': ['!', '@', '#', '$', '%', '&', '*', '(', ')'],
    'subj': [5, 5],
    'obj': [6, 6],
    'y': 'None'
}

single_data_sample = [sample1]
data_samples = [sample1, sample2, sample3, sample4, sample5]
dataset = Dataset('RE')
single_dataset = Dataset('RE')
dataset.load(data_samples)
single_dataset.load(single_data_sample)


class TestSpecialEntityTyposSwap(unittest.TestCase):
    def test_generate(self):
        # test task transformation
        trans_methods = ["SwapBirth", "SwapAge"]
        gene = REGenerator(trans_methods=trans_methods, sub_methods=[])
        for original_samples, trans_rst, trans_type in gene.generate(dataset):
            self.assertEqual(1, len(original_samples))
            for index in range(len(original_samples)):
                self.assertTrue(original_samples[index] != trans_rst[index])

        # test wrong trans_methods
        gene = REGenerator(trans_methods=["wrong_transform_method"],