Ejemplo n.º 1
0
    def test_transformation(self):
        sent1 = "The quick brown fox jumps over the lazy dog."
        data_sample = SASample({'x': sent1, 'y': "negative"})
        swap_ins = TwitterType()
        x = swap_ins.transform(data_sample, n=5)

        self.assertEqual(5, len(x))
        change = []
        for sample in x:
            origin = data_sample.get_words('x')
            trans = sample.get_words('x')
            change.append(sample.get_text('x'))
            self.assertTrue(origin == trans[1:] or origin == trans[:-1])

        self.assertTrue(5 == len(set(x)))

        # test special input
        special_sample = SASample({'x': '', 'y': "negative"})
        x = swap_ins.transform(special_sample)
        self.assertEqual(1, len(x))
        x = x[0]
        self.assertTrue(x.get_text('x') != '')
        special_sample = SASample({'x': '~!@#$%^7890"\'', 'y': "negative"})
        x = swap_ins.transform(special_sample)
        self.assertEqual(1, len(x))
        x = x[0]
        self.assertTrue(x.get_text('x') != '~!@#$%^7890"\'')
Ejemplo n.º 2
0
    def test_transformation(self):
        import random
        random.seed(100)
        sent1 = 'The quick brown fox jumps over the lazy dog.'
        data_sample = SASample({'x': sent1, 'y': "negative"})
        swap_ins = SpellingError()
        x = swap_ins.transform(data_sample, n=5)

        change = []
        for sample in x:
            origin = data_sample.get_words('x')
            trans = sample.get_words('x')
            self.assertEqual(origin[0], trans[0])
            self.assertEqual(origin[2:7], trans[2:7])
            change.append(trans[1] + trans[7] + trans[8])
            self.assertTrue(trans[1] != origin[1])
            self.assertTrue(trans[7] != origin[7])
            self.assertTrue(trans[8] != origin[8])

        self.assertTrue(5 == len(set(change)))

        special_sample = SASample({'x': '', 'y': "negative"})
        self.assertEqual([], swap_ins.transform(special_sample))
        special_sample = SASample({'x': '~!@#$%^7890"\'', 'y': "negative"})
        self.assertEqual([], swap_ins.transform(special_sample))
Ejemplo n.º 3
0
    def test_transformation(self):
        sent1 = 'The quick brown fox jumps over the lazy dog .'
        data_sample = SASample({'x': sent1, 'y': "negative"})
        trans = Keyboard()

        import random
        random.seed(173)

        x = trans.transform(data_sample, n=5)
        self.assertEqual(5, len(x))

        sents = ["The quick brown f8x jumps over the lzzy dog.",
                 "The quick brown fpx jumps over the lXzy dog.",
                 "The quick brown box jumps over the lzzy dog.",
                 "The quick brown foC jumps over the lXzy dog.",
                 "The quick brown foS jumps over the lazg dog."]
        for sample, sent in zip(x, sents):
            self.assertEqual(sample.get_text('x'), sent)

        # test special data
        special_sample = SASample({'x': '', 'y': "negative"})
        self.assertEqual([], trans.transform(special_sample))
        special_sample = SASample({'x': '~!@#$%^7890"\'', 'y': "negative"})
        special_sample = trans.transform(special_sample)
        self.assertEqual("~!w#$%^7890 \"'", special_sample[0].get_text('x'))
Ejemplo n.º 4
0
    def test_transformation(self):
        sent2 = 'The quick brown fox jumps over the lazy dog. '
        data_sample = SASample({'x': sent2, 'y': "negative"})
        import random
        random.seed(100)
        swap_ins = MLM(device='cpu')

        x = swap_ins.transform(data_sample, n=5)
        self.assertEqual(5, len(x))
        tokens = []
        for _sample in x:
            self.assertTrue(
                _sample.get_words('x')[:2] == data_sample.get_words('x')[:2])
            self.assertTrue(
                _sample.get_words('x')[3] == data_sample.get_words('x')[3])
            self.assertTrue(
                _sample.get_words('x')[-5:] == data_sample.get_words('x')[-5:])
            tokens.append(
                _sample.get_words('x')[2] + _sample.get_words('x')[4])

        self.assertTrue(5 == len(set(tokens)))

        special_sample = SASample({'x': '', 'y': "negative"})
        self.assertEqual([], swap_ins.transform(special_sample))
        special_sample = SASample({'x': '~!@#$%^7890"\'', 'y': "negative"})
        special_sample = swap_ins.transform(special_sample)[0]
        self.assertEqual('epilogue "\'', special_sample.get_text('x'))
Ejemplo n.º 5
0
    def test_transformation(self):
        # test the change num
        change_sample = swap_ins.transform(data_sample, n=10)
        self.assertEqual(10, len(change_sample))

        special_sample = SASample({'x': '', 'y': "negative"})
        self.assertEqual([], swap_ins.transform(special_sample))
        special_sample = SASample({'x': '~!@#$%^7890"\'', 'y': "negative"})
        self.assertEqual([], swap_ins.transform(special_sample))

        # test if the item change
        change_item = []
        for sample in change_sample:
            self.assertEqual(
                data_sample.get_words('x')[:4],
                sample.get_words('x')[:4])
            self.assertEqual(
                data_sample.get_words('x')[-6:],
                sample.get_words('x')[-6:])
            self.assertEqual(
                len(data_sample.get_words('x')) + 1,
                len(sample.get_words('x')))
            change_item.append(sample.get_words('x')[4])

        # test if the n change samples not equal
        self.assertEqual(len(set(change_item)), 10)
Ejemplo n.º 6
0
    def test_transformation(self):
        trans = swap_ins.transform(data_sample)
        self.assertEqual(1, len(trans))
        trans = trans[0]
        self.assertEqual(
            "we are playing ping pang ball, you're so lazy. "
            "She is so beautiful!", trans.get_text('x'))
        self.assertEqual(trans.get_value('y'), data_sample.get_value('y'))

        special_sample = SASample({'x': '', 'y': "negative"})
        self.assertEqual([], swap_ins.transform(special_sample))
        special_sample = SASample({'x': '~!@#$%^7890"\'', 'y': "negative"})
        self.assertEqual([], swap_ins.transform(special_sample))
Ejemplo n.º 7
0
    def test_swap(self):
        typos_trans.mode = 'swap'
        x = typos_trans.transform(data_sample, n=1)
        self.assertTrue(1 == len(x))

        for sample in x:
            self.assertTrue(sample.get_text('x') != data_sample.get_text('x'))
            self.assertTrue(editdistance.distance(sample.get_text('x'),
                                                data_sample.get_text('x')) <= 4)

        special_sample = SASample({'x': '', 'y': "negative"})
        self.assertEqual([], typos_trans.transform(special_sample))
        special_sample = SASample({'x': '~!@#$%^7890"\'', 'y': "negative"})
        self.assertEqual(1, len(typos_trans.transform(special_sample)))
Ejemplo n.º 8
0
    def test_transformation(self):
        sent1 = "The quick brown fox jumps over the lazy dog ."
        data_sample = SASample({'x': sent1, 'y': "negative"})
        swap_ins = ReverseNeg()
        x = swap_ins.transform(data_sample, n=1)
        x = x[0]
        self.assertEqual(
            'The quick brown fox does not jumps over the lazy '
            'dog.', x.get_text('x'))
        self.assertEqual('negative', x.get_value('y'))

        # test special input
        special_sample = SASample({'x': '', 'y': "negative"})
        swap_ins.transform(special_sample)
        self.assertEqual([], swap_ins.transform(special_sample))
        special_sample = SASample({'x': '~!@#$%^7890"\'', 'y': "negative"})
        self.assertEqual([], swap_ins.transform(special_sample))
Ejemplo n.º 9
0
    def test_insert(self):
        typos_trans.mode = 'insert'
        special_sample = SASample({'x': '', 'y': "negative"})
        self.assertEqual([], typos_trans.transform(special_sample))
        special_sample = SASample({'x': '~!@#$%^7890"\'', 'y': "negative"})
        self.assertEqual(1, len(typos_trans.transform(special_sample)))

        x = typos_trans.transform(data_sample, n=3)
        self.assertTrue(3 == len(x))

        change = []
        for sample in x:
            self.assertTrue(sample.get_text('x') != data_sample.get_text('x'))
            self.assertTrue(editdistance.distance(sample.get_text('x'),
                                                data_sample.get_text('x')) <= 2)
            change.append(sample.get_text('x'))

        self.assertTrue(len(set(change)) == 3)
Ejemplo n.º 10
0
    def test_transformation(self):
        sent1 = 'The fast brown fox jumps over the lazy dog .'
        data_sample = SASample({'x': sent1, 'y': "negative"})
        swap_ins = SwapAntWordNet()
        x = swap_ins.transform(data_sample, n=5)
        self.assertTrue(1 == len(x))

        for sample in x:
            origin = data_sample.get_words('x')
            trans = sample.get_words('x')
            self.assertEqual(origin[0], trans[0])
            self.assertTrue(origin[1] != trans[1])
            self.assertEqual(origin[2:], trans[2:])

        special_sample = SASample({'x': '', 'y': "negative"})
        self.assertEqual([], swap_ins.transform(special_sample))
        special_sample = SASample({'x': '~!@#$%^7890"\'', 'y': "negative"})
        self.assertEqual([], swap_ins.transform(special_sample))
Ejemplo n.º 11
0
 def test_transformation(self):
     sent1 = 'To whom did the Virgin Mary allegedly appear in ' \
             '1858 in Lourdes France?'
     data_sample = SASample({'x': sent1, 'y': "negative"})
     trans = BackTrans(device='cpu')
     x = trans.transform(data_sample, n=1)
     self.assertEqual(
         'To whom did the Virgin Mary allegedly appear '
         'in Lourdes, France, in 1858?', x[0].get_text('x'))
     self.assertEqual('negative', x[0].get_value('y'))
Ejemplo n.º 12
0
    def test_transformation(self):
        import random
        random.seed(100)
        sent1 = "Here's 0 bug."
        data_sample = SASample({'x': sent1, 'y': "negative"})
        swap_ins = SwapNum()
        x = swap_ins.transform(data_sample, n=3)

        self.assertTrue(3 == len(x))
        for sample in x:
            self.assertTrue(editdistance.distance(sample.get_text('x'), sent1) == 1)
            self.assertTrue("negative" == sample.get_value('y'))

        self.assertTrue(x[0].get_text('x') == "Here's 1 bug.")

        special_sample = SASample({'x': '', 'y': "negative"})
        self.assertEqual([], swap_ins.transform(special_sample))
        special_sample = SASample({'x': '~!@#$%^"\'', 'y': "negative"})
        self.assertEqual([], swap_ins.transform(special_sample))
Ejemplo n.º 13
0
    def test_transformation(self):
        import random
        random.seed(1)
        sent1 = 'The quick brown fox jumps over the lazy dog .'
        data_sample = SASample({'x': sent1, 'y': "negative"})
        swap_ins = SwapSynWordNet()
        x = swap_ins.transform(data_sample, n=5)

        self.assertTrue(5 == len(x))
        for sample in x:
            cnt = 0
            for i, j in zip(sample.get_words('x'), data_sample.get_words('x')):
                if i != j:
                    cnt += 1
            self.assertTrue(cnt == 1)

        special_sample = SASample({'x': '', 'y': "negative"})
        self.assertEqual([], swap_ins.transform(special_sample))
        special_sample = SASample({'x': '~!@#$%^7890"\'', 'y': "negative"})
        self.assertEqual([], swap_ins.transform(special_sample))
Ejemplo n.º 14
0
    def test_transformation(self):

        sent1 = "The quick brown fox jumps over the lazy dog."
        data_sample = SASample({'x': sent1, 'y': "negative"})
        swap_ins = Punctuation()
        x = swap_ins.transform(data_sample, n=10)

        # test the form
        for _sample in x:
            self.assertEqual(
                ''.join(filter(str.isalpha, _sample.get_text('x'))),
                ''.join(filter(str.isalpha, data_sample.get_text('x'))))
            self.assertTrue(_sample.get_text('x') != data_sample.get_text('x'))

        self.assertTrue(10 >= len(x))

        import random
        random.seed(100)
        x = swap_ins.transform(data_sample, n=1)
        x = x[0]
        # test the data
        self.assertTrue('{ The quick brown fox jumps over the lazy dog; }' ==
                        x.get_text('x'))

        special_sample = SASample({'x': '', 'y': "negative"})
        self.assertEqual(1, len(swap_ins.transform(special_sample)))
        special_sample = SASample({'x': '~!@#$%^7890"\'', 'y': "negative"})
        self.assertEqual(1, len(swap_ins.transform(special_sample)))
Ejemplo n.º 15
0
    def test_transformation(self):
        import random
        random.seed(1)
        sent1 = "There are no water in bottom."
        data_sample = SASample({'x': sent1, 'y': "negative"})
        swap_ins = SwapSynWordEmbedding()
        x = swap_ins.transform(data_sample, n=5)
        self.assertTrue(5 == len(x))

        change = []
        for sample in x:
            origin = data_sample.get_words('x')
            trans = sample.get_words('x')
            change.append(trans[-2])
            self.assertEqual(origin[:5], trans[:5])
            self.assertEqual(origin[-1], trans[-1])
        self.assertEqual(5, len(set(change)))

        special_sample = SASample({'x': '', 'y': "negative"})
        self.assertEqual([], swap_ins.transform(special_sample))
        special_sample = SASample({'x': '~!@#$%^7890"\'', 'y': "negative"})
        self.assertEqual([], swap_ins.transform(special_sample))
Ejemplo n.º 16
0
    def test_transformation(self):

        sent1 = 'The quick brown fox jumps over the lazy dog .'
        data_sample = SASample({'x': sent1, 'y': "negative"})
        swap_ins = Tense()
        x = swap_ins.transform(data_sample, n=3)

        self.assertTrue(3 == len(x))
        change = []
        for sample in x:
            origin = data_sample.get_words('x')
            trans = sample.get_words('x')
            self.assertEqual(origin[:4], trans[:4])
            self.assertEqual(origin[5:], trans[5:])
            change.append(trans[4])
            self.assertTrue(trans[4] != origin[4])

        # test special input
        special_sample = SASample({'x': '', 'y': "negative"})
        self.assertEqual([], swap_ins.transform(special_sample))
        special_sample = SASample({'x': '~!@#$%^7890"\'', 'y': "negative"})
        self.assertEqual([], swap_ins.transform(special_sample))
Ejemplo n.º 17
0
    def test_transformation(self):
        # test the change num
        change_sample = swap_ins.transform(data_sample, n=5)
        self.assertEqual(5, len(change_sample))

        # test if the item change
        begin = []
        end = []
        for sample in change_sample:
            self.assertTrue(sent1 in sample.get_text('x'))
            index = sample.get_text('x').index(sent1)
            begin.append(sample.get_text('x')[:index])
            end.append(sample.get_text('x')[index + len(sent1):])

        # test if the n change samples not equal
        self.assertEqual(len(set(begin)), 5)
        self.assertEqual(len(set(end)), 5)

        random.seed(100)
        special_sample = SASample({'x': '~!@#$%^7890"\'', 'y': "negative"})
        special_sample = swap_ins.transform(special_sample)[0]
        self.assertEqual('I actually wanted to talk to you, ~!@#$%^7890 "\'',
                         special_sample.get_text('x'))
Ejemplo n.º 18
0
    def test_transformation(self):
        sent1 = 'Lionel Messi is a football player from Argentina. ' \
                'Fudan University is located in Shanghai province, ' \
                'Alibaba with 50000 staff. Wang Xiao is a stuendent. ' \
                'Zhangheng road in Pudong area.'
        data_sample = SASample({'x': sent1, 'y': "positive"})
        swap_ins = SwapNamedEnt()

        # test decompose_entities_info
        words = data_sample.get_words('x')
        a, b, c = swap_ins.decompose_entities_info(data_sample.get_ner('x'))
        for pos, word, label in zip(a, b, c):
            self.assertTrue(label in ['LOCATION', 'PERSON', 'ORGANIZATION'])
            self.assertEqual(words[pos[0]:pos[1]], word.split(' '))

        # test transformation
        import random
        random.seed(208)

        trans = swap_ins.transform(data_sample, n=5)
        self.assertEqual(5, len(trans))
        change = ["Mr Ross is a football player from Tashkent. Fudan Unive"
                  "rsity is located in South Zone province, "
                  "Zagreb with 50000 staff. Jean Chrétien is a stuendent. Zhan"
                  "gheng road in  Czech Republic area.",
                  "Mr Ross is a football player from Tashkent. Fudan Univer"
                  "sity is located in South Zone province, "
                  "Zagreb with 50000 staff. Jean Chrétien is a stuendent. "
                  "Zhangheng road in the valley area.",
                  "Mr Ross is a football player from Tashkent. Fudan University"
                  " is located in South Zone province, "
                  "Zagreb with 50000 staff. Jean Chrétien is a stuendent. "
                  "Zhangheng road in Parvan area.",
                  "Mr Ross is a football player from Tashkent. Fudan University"
                  " is located in South Zone province, "
                  "Zagreb with 50000 staff. "
                  "Jean Chrétien is a stuendent. Zhangheng road in East-West "
                  "area.",
                  "Mr Ross is a football player from Tashkent. Fudan "
                  "University is located in east Atlantic province, "
                  "Prague with 50000 staff. Mr Mayoral is a stuendent. "
                  "Zhangheng road in West Midlands area."]
        for sample, sent in zip(trans, change):
            self.assertTrue("positive", sample.get_value('y'))
            self.assertEqual(sent, sample.get_text('x'))

        # test special sample
        special_sample = SASample({'x': '', 'y': "negative"})
        self.assertEqual([], swap_ins.transform(special_sample))
        special_sample = SASample({'x': '~!@#$%^7890"\'', 'y': "negative"})
        self.assertEqual([], swap_ins.transform(special_sample))
Ejemplo n.º 19
0
    def test_transformation(self):
        # test wrong mode
        self.assertRaises(ValueError, Prejudice, 'Loc', 'woman')
        self.assertRaises(ValueError, Prejudice, 'Name', 'Japan')
        self.assertRaises(ValueError, Prejudice, 'Loc', 'Ja')
        self.assertRaises(ValueError, Prejudice, 'Loc', ['Ja'])

        import random
        random.seed(100)

        sent1 = "Interesting and moving performances by Tom Courtenay " \
                "and Peter Finch"
        swap_ins = Prejudice(change_type='Name', prejudice_tendency='woman')

        data_sample = SASample({'x': sent1, 'y': "negative"})
        x = swap_ins.transform(data_sample, n=5)

        special_sample = SASample({'x': '', 'y': "negative"})
        self.assertEqual([], swap_ins.transform(special_sample))
        special_sample = SASample({'x': '~!@#$%^7890"\'', 'y': "negative"})
        self.assertEqual([], swap_ins.transform(special_sample))

        change = []
        for sa_sample in x:
            self.assertEqual(
                data_sample.get_words('x')[:5],
                sa_sample.get_words('x')[:5])
            self.assertEqual(
                data_sample.get_words('x')[6:8],
                sa_sample.get_words('x')[6:8])
            self.assertEqual(
                data_sample.get_words('x')[-1],
                sa_sample.get_words('x')[-1])
            change.append(
                sa_sample.get_words('x')[5] + sa_sample.get_words('x')[8])

        self.assertTrue(5 == len(set(change)))
Ejemplo n.º 20
0
    def test_transformation(self):
        sent1 = 'The quick brown fox jumps over the lazy dog .'
        data_sample = SASample({'x': sent1, 'y': "negative"})

        # test wrong model
        self.assertRaises(ValueError, WordCase, 'random little')

        # test model
        test_case = WordCase()
        self.assertTrue(
            test_case.case_type in ['upper', 'lower', 'title', 'random'])

        # test lower
        self.assertEqual(
            [word.lower() for word in data_sample.get_words('x')],
            WordCase('lower').transform(data_sample)[0].get_words('x'))

        # test upper
        self.assertEqual(
            [word.upper() for word in data_sample.get_words('x')],
            WordCase('upper').transform(data_sample)[0].get_words('x'))

        # test title
        self.assertEqual(
            [word.title() for word in data_sample.get_words('x')],
            WordCase('title').transform(data_sample)[0].get_words('x'))

        # test special case
        special_sample = SASample({'x': '', 'y': "negative"})
        self.assertEqual(
            '',
            WordCase('lower').transform(special_sample)[0].get_text('x'))
        special_sample = SASample({'x': '~!@#$%^7890"\'', 'y': "negative"})
        self.assertEqual(
            '~!@#$%^7890"\'',
            WordCase('lower').transform(special_sample)[0].get_text('x'))
Ejemplo n.º 21
0
import unittest
import random
import editdistance

from TextFlint.input_layer.component.sample import SASample
from TextFlint.generation_layer.transformation.UT.typos import Typos


sample = {'x': 'Pride and Prejudice is a famous fiction', 'y': 'positive'}
data_sample = SASample(sample)
typos_trans = Typos(mode='random')
random.seed(100)


class TestTwitterType(unittest.TestCase):
    def test_random(self):
        typos_trans.mode = 'random'
        special_sample = SASample({'x': '', 'y': "negative"})
        self.assertEqual([], typos_trans.transform(special_sample))
        special_sample = SASample({'x': '~!@#$%^7890"\'', 'y': "negative"})
        self.assertEqual(1, len(typos_trans.transform(special_sample)))

        x = typos_trans.transform(data_sample, n=3)
        self.assertTrue(3 == len(x))

        change = []
        for sample in x:
            self.assertTrue(sample.get_text('x') != data_sample.get_text('x'))
            self.assertTrue(editdistance.distance(sample.get_text('x'),
                                                data_sample.get_text('x')) <= 4)
            change.append(sample.get_text('x'))
Ejemplo n.º 22
0
import unittest

from TextFlint.input_layer.component.sample import SASample
from TextFlint.generation_layer.transformation.UT.insert_adv import InsertAdv

sent1 = 'The quick brown fox jumps over the lazy dog .'
data_sample = SASample({'x': sent1, 'y': "negative"})
swap_ins = InsertAdv()


class TestAddAdverb(unittest.TestCase):
    def test_transformation(self):
        # test the change num
        change_sample = swap_ins.transform(data_sample, n=10)
        self.assertEqual(10, len(change_sample))

        special_sample = SASample({'x': '', 'y': "negative"})
        self.assertEqual([], swap_ins.transform(special_sample))
        special_sample = SASample({'x': '~!@#$%^7890"\'', 'y': "negative"})
        self.assertEqual([], swap_ins.transform(special_sample))

        # test if the item change
        change_item = []
        for sample in change_sample:
            self.assertEqual(
                data_sample.get_words('x')[:4],
                sample.get_words('x')[:4])
            self.assertEqual(
                data_sample.get_words('x')[-6:],
                sample.get_words('x')[-6:])
            self.assertEqual(