예제 #1
0
    def test_transformation(self):
        import random
        random.seed(100)
        sent1 = 'The quick brown fox jumps over the lazy dog.'
        data_sample = SASample({'x': sent1, 'y': "negative"})
        swap_ins = SpellingError()
        x = swap_ins.transform(data_sample, n=5)

        change = []
        for sample in x:
            origin = data_sample.get_words('x')
            trans = sample.get_words('x')
            self.assertEqual(origin[0], trans[0])
            self.assertEqual(origin[2:7], trans[2:7])
            change.append(trans[1] + trans[7] + trans[8])
            self.assertTrue(trans[1] != origin[1])
            self.assertTrue(trans[7] != origin[7])
            self.assertTrue(trans[8] != origin[8])

        self.assertTrue(5 == len(set(change)))

        special_sample = SASample({'x': '', 'y': "negative"})
        self.assertEqual([], swap_ins.transform(special_sample))
        special_sample = SASample({'x': '~!@#$%^7890"\'', 'y': "negative"})
        self.assertEqual([], swap_ins.transform(special_sample))
예제 #2
0
    def test_transformation(self):
        # test the change num
        change_sample = swap_ins.transform(data_sample, n=10)
        self.assertEqual(10, len(change_sample))

        special_sample = SASample({'x': '', 'y': "negative"})
        self.assertEqual([], swap_ins.transform(special_sample))
        special_sample = SASample({'x': '~!@#$%^7890"\'', 'y': "negative"})
        self.assertEqual([], swap_ins.transform(special_sample))

        # test if the item change
        change_item = []
        for sample in change_sample:
            self.assertEqual(
                data_sample.get_words('x')[:4],
                sample.get_words('x')[:4])
            self.assertEqual(
                data_sample.get_words('x')[-6:],
                sample.get_words('x')[-6:])
            self.assertEqual(
                len(data_sample.get_words('x')) + 1,
                len(sample.get_words('x')))
            change_item.append(sample.get_words('x')[4])

        # test if the n change samples not equal
        self.assertEqual(len(set(change_item)), 10)
예제 #3
0
    def test_transformation(self):
        sent2 = 'The quick brown fox jumps over the lazy dog. '
        data_sample = SASample({'x': sent2, 'y': "negative"})
        import random
        random.seed(100)
        swap_ins = MLMSuggestion(device='cpu')

        x = swap_ins.transform(data_sample, n=5)
        self.assertEqual(5, len(x))
        tokens = []
        for _sample in x:
            self.assertTrue(
                _sample.get_words('x')[:2] == data_sample.get_words('x')[:2])
            self.assertTrue(
                _sample.get_words('x')[3] == data_sample.get_words('x')[3])
            self.assertTrue(
                _sample.get_words('x')[-5:] == data_sample.get_words('x')[-5:])
            tokens.append(
                _sample.get_words('x')[2] + _sample.get_words('x')[4])

        self.assertTrue(5 == len(set(tokens)))

        special_sample = SASample({'x': '', 'y': "negative"})
        self.assertEqual([], swap_ins.transform(special_sample))
        special_sample = SASample({'x': '~!@#$%^7890"\'', 'y': "negative"})
        special_sample = swap_ins.transform(special_sample)[0]
        self.assertEqual('epilogue "\'', special_sample.get_text('x'))
예제 #4
0
    def test_transformation(self):
        sent1 = "The quick brown fox jumps over the lazy dog."
        data_sample = SASample({'x': sent1, 'y': "negative"})
        swap_ins = TwitterType()
        x = swap_ins.transform(data_sample, n=5)

        self.assertEqual(5, len(x))
        change = []
        for sample in x:
            origin = data_sample.get_words('x')
            trans = sample.get_words('x')
            change.append(sample.get_text('x'))
            self.assertTrue(origin == trans[1:] or origin == trans[:-1])

        self.assertTrue(5 == len(set(x)))

        # test special input
        special_sample = SASample({'x': '', 'y': "negative"})
        x = swap_ins.transform(special_sample)
        self.assertEqual(1, len(x))
        x = x[0]
        self.assertTrue(x.get_text('x') != '')
        special_sample = SASample({'x': '~!@#$%^7890"\'', 'y': "negative"})
        x = swap_ins.transform(special_sample)
        self.assertEqual(1, len(x))
        x = x[0]
        self.assertTrue(x.get_text('x') != '~!@#$%^7890"\'')
예제 #5
0
    def test_transformation(self):
        trans = swap_ins.transform(data_sample)
        self.assertEqual(1, len(trans))
        trans = trans[0]
        self.assertEqual(
            "we are playing ping pang ball, you're so lazy. "
            "She is so beautiful!", trans.get_text('x'))
        self.assertEqual(trans.get_value('y'), data_sample.get_value('y'))

        special_sample = SASample({'x': '', 'y': "negative"})
        self.assertEqual([], swap_ins.transform(special_sample))
        special_sample = SASample({'x': '~!@#$%^7890"\'', 'y': "negative"})
        self.assertEqual([], swap_ins.transform(special_sample))
예제 #6
0
    def test_swap(self):
        typos_trans.mode = 'swap'
        x = typos_trans.transform(data_sample, n=1)
        self.assertTrue(1 == len(x))

        for sample in x:
            self.assertTrue(sample.get_text('x') != data_sample.get_text('x'))
            self.assertTrue(editdistance.distance(sample.get_text('x'),
                                                data_sample.get_text('x')) <= 4)

        special_sample = SASample({'x': '', 'y': "negative"})
        self.assertEqual([], typos_trans.transform(special_sample))
        special_sample = SASample({'x': '~!@#$%^7890"\'', 'y': "negative"})
        self.assertEqual(1, len(typos_trans.transform(special_sample)))
예제 #7
0
    def test_transformation(self):
        sent1 = "The quick brown fox jumps over the lazy dog ."
        data_sample = SASample({'x': sent1, 'y': "negative"})
        swap_ins = ReverseNeg()
        x = swap_ins.transform(data_sample, n=1)
        x = x[0]
        self.assertEqual(
            'The quick brown fox does not jumps over the lazy '
            'dog.', x.get_text('x'))
        self.assertEqual('negative', x.get_value('y'))

        # test special input
        special_sample = SASample({'x': '', 'y': "negative"})
        swap_ins.transform(special_sample)
        self.assertEqual([], swap_ins.transform(special_sample))
        special_sample = SASample({'x': '~!@#$%^7890"\'', 'y': "negative"})
        self.assertEqual([], swap_ins.transform(special_sample))
예제 #8
0
    def test_transformation(self):
        sent1 = 'The quick brown fox jumps over the lazy dog .'
        data_sample = SASample({'x': sent1, 'y': "negative"})

        # test wrong model
        self.assertRaises(ValueError, WordCase, 'random little')

        # test model
        test_case = WordCase()
        self.assertTrue(test_case.case_type in ['upper', 'lower', 'title',
                                                'random'])

        # test lower
        self.assertEqual(
            [word.lower() for word in data_sample.get_words('x')],
            WordCase('lower').transform(data_sample)[0].get_words('x'))

        # test upper
        self.assertEqual(
            [word.upper() for word in data_sample.get_words('x')],
            WordCase('upper').transform(data_sample)[0].get_words('x'))

        # test title
        self.assertEqual(
            [word.title() for word in data_sample.get_words('x')],
            WordCase('title').transform(data_sample)[0].get_words('x'))

        # test special case
        special_sample = SASample({'x': '', 'y': "negative"})
        self.assertEqual('', WordCase('lower').transform(
            special_sample)[0].get_text('x'))
        special_sample = SASample({'x': '~!@#$%^7890"\'', 'y': "negative"})
        self.assertEqual('~!@#$%^7890"\'', WordCase('lower').transform(
            special_sample)[0].get_text('x'))
예제 #9
0
    def test_insert(self):
        typos_trans.mode = 'insert'
        special_sample = SASample({'x': '', 'y': "negative"})
        self.assertEqual([], typos_trans.transform(special_sample))
        special_sample = SASample({'x': '~!@#$%^7890"\'', 'y': "negative"})
        self.assertEqual(1, len(typos_trans.transform(special_sample)))

        x = typos_trans.transform(data_sample, n=3)
        self.assertTrue(3 == len(x))

        change = []
        for sample in x:
            self.assertTrue(sample.get_text('x') != data_sample.get_text('x'))
            self.assertTrue(editdistance.distance(sample.get_text('x'),
                                                data_sample.get_text('x')) <= 2)
            change.append(sample.get_text('x'))

        self.assertTrue(len(set(change)) == 3)
예제 #10
0
 def test_transformation(self):
     sent1 = 'To whom did the Virgin Mary allegedly appear in ' \
             '1858 in Lourdes France?'
     data_sample = SASample({'x': sent1, 'y': "negative"})
     trans = BackTrans(device='cpu')
     x = trans.transform(data_sample, n=1)
     self.assertEqual('To whom did the Virgin Mary allegedly appear '
                      'in Lourdes, France, in 1858?', x[0].get_text('x'))
     self.assertEqual('negative', x[0].get_value('y'))
예제 #11
0
    def test_transformation(self):
        sent1 = 'The fast brown fox jumps over the lazy dog .'
        data_sample = SASample({'x': sent1, 'y': "negative"})
        swap_ins = SwapAntWordNet()
        x = swap_ins.transform(data_sample, n=5)
        self.assertTrue(1 == len(x))

        for sample in x:
            origin = data_sample.get_words('x')
            trans = sample.get_words('x')
            self.assertEqual(origin[0], trans[0])
            self.assertTrue(origin[1] != trans[1])
            self.assertEqual(origin[2:], trans[2:])

        special_sample = SASample({'x': '', 'y': "negative"})
        self.assertEqual([], swap_ins.transform(special_sample))
        special_sample = SASample({'x': '~!@#$%^7890"\'', 'y': "negative"})
        self.assertEqual([], swap_ins.transform(special_sample))
예제 #12
0
    def test_transformation(self):
        import random
        random.seed(100)
        sent1 = "Here's 0 bug."
        data_sample = SASample({'x': sent1, 'y': "negative"})
        swap_ins = SwapNum()
        x = swap_ins.transform(data_sample, n=3)

        self.assertTrue(3 == len(x))
        for sample in x:
            self.assertTrue(
                editdistance.distance(sample.get_text('x'), sent1) == 1)
            self.assertTrue("negative" == sample.get_value('y'))

        self.assertTrue(x[0].get_text('x') == "Here's 1 bug.")

        special_sample = SASample({'x': '', 'y': "negative"})
        self.assertEqual([], swap_ins.transform(special_sample))
        special_sample = SASample({'x': '~!@#$%^"\'', 'y': "negative"})
        self.assertEqual([], swap_ins.transform(special_sample))
예제 #13
0
    def test_transformation(self):
        import random
        random.seed(1)
        sent1 = 'The quick brown fox jumps over the lazy dog .'
        data_sample = SASample({'x': sent1, 'y': "negative"})
        swap_ins = SwapSynWordNet()
        x = swap_ins.transform(data_sample, n=5)

        self.assertTrue(5 == len(x))
        for sample in x:
            cnt = 0
            for i, j in zip(sample.get_words('x'), data_sample.get_words('x')):
                if i != j:
                    cnt += 1
            self.assertTrue(cnt == 1)

        special_sample = SASample({'x': '', 'y': "negative"})
        self.assertEqual([], swap_ins.transform(special_sample))
        special_sample = SASample({'x': '~!@#$%^7890"\'', 'y': "negative"})
        self.assertEqual([], swap_ins.transform(special_sample))
예제 #14
0
    def test_transformation(self):

        sent1 = "The quick brown fox jumps over the lazy dog."
        data_sample = SASample({'x': sent1, 'y': "negative"})
        swap_ins = Punctuation()
        x = swap_ins.transform(data_sample, n=10)

        # test the form
        for _sample in x:
            self.assertEqual(
                ''.join(filter(str.isalpha, _sample.get_text('x'))),
                ''.join(filter(str.isalpha, data_sample.get_text('x'))))
            self.assertTrue(_sample.get_text('x') != data_sample.get_text('x'))

        self.assertTrue(10 >= len(x))

        import random
        random.seed(100)
        x = swap_ins.transform(data_sample, n=1)
        x = x[0]
        # test the data
        self.assertTrue('{ The quick brown fox jumps over the lazy dog; }' ==
                        x.get_text('x'))

        special_sample = SASample({'x': '', 'y': "negative"})
        self.assertEqual(1, len(swap_ins.transform(special_sample)))
        special_sample = SASample({'x': '~!@#$%^7890"\'', 'y': "negative"})
        self.assertEqual(1, len(swap_ins.transform(special_sample)))
예제 #15
0
    def test_transformation(self):

        sent1 = 'The quick brown fox jumps over the lazy dog .'
        data_sample = SASample({'x': sent1, 'y': "negative"})
        swap_ins = Tense()
        x = swap_ins.transform(data_sample, n=3)

        self.assertTrue(3 == len(x))
        change = []
        for sample in x:
            origin = data_sample.get_words('x')
            trans = sample.get_words('x')
            self.assertEqual(origin[:4], trans[:4])
            self.assertEqual(origin[5:], trans[5:])
            change.append(trans[4])
            self.assertTrue(trans[4] != origin[4])

        # test special input
        special_sample = SASample({'x': '', 'y': "negative"})
        self.assertEqual([], swap_ins.transform(special_sample))
        special_sample = SASample({'x': '~!@#$%^7890"\'', 'y': "negative"})
        self.assertEqual([], swap_ins.transform(special_sample))
예제 #16
0
    def test_transformation(self):
        import random
        random.seed(1)
        sent1 = "There are no water in bottom."
        data_sample = SASample({'x': sent1, 'y': "negative"})
        swap_ins = SwapSynWordEmbedding()
        x = swap_ins.transform(data_sample, n=5)
        self.assertTrue(5 == len(x))

        change = []
        for sample in x:
            origin = data_sample.get_words('x')
            trans = sample.get_words('x')
            change.append(trans[-2])
            self.assertEqual(origin[:5], trans[:5])
            self.assertEqual(origin[-1], trans[-1])
        self.assertEqual(5, len(set(change)))

        special_sample = SASample({'x': '', 'y': "negative"})
        self.assertEqual([], swap_ins.transform(special_sample))
        special_sample = SASample({'x': '~!@#$%^7890"\'', 'y': "negative"})
        self.assertEqual([], swap_ins.transform(special_sample))
예제 #17
0
    def test_transformation(self):
        # test the change num
        change_sample = swap_ins.transform(data_sample, n=5)
        self.assertEqual(5, len(change_sample))

        # test if the item change
        begin = []
        end = []
        for sample in change_sample:
            self.assertTrue(sent1 in sample.get_text('x'))
            index = sample.get_text('x').index(sent1)
            begin.append(sample.get_text('x')[:index])
            end.append(sample.get_text('x')[index + len(sent1):])

        # test if the n change samples not equal
        self.assertEqual(len(set(begin)), 5)
        self.assertEqual(len(set(end)), 5)

        random.seed(100)
        special_sample = SASample({'x': '~!@#$%^7890"\'', 'y': "negative"})
        special_sample = swap_ins.transform(special_sample)[0]
        self.assertEqual('I actually wanted to talk to you, ~!@#$%^7890 "\'',
                         special_sample.get_text('x'))
예제 #18
0
    def test_transformation(self):
        sent1 = 'Lionel Messi is a football player from Argentina. ' \
                'Fudan University is located in Shanghai province, ' \
                'Alibaba with 50000 staff. Wang Xiao is a stuendent. ' \
                'Zhangheng road in Pudong area.'
        data_sample = SASample({'x': sent1, 'y': "positive"})
        swap_ins = SwapNamedEnt()

        # test decompose_entities_info
        words = data_sample.get_words('x')
        a, b, c = swap_ins.decompose_entities_info(data_sample.get_ner('x'))
        for pos, word, label in zip(a, b, c):
            self.assertTrue(label in ['LOCATION', 'PERSON', 'ORGANIZATION'])
            self.assertEqual(words[pos[0]:pos[1]], word.split(' '))

        # test transformation
        import random
        random.seed(208)

        trans = swap_ins.transform(data_sample, n=5)
        self.assertEqual(5, len(trans))
        change = [
            "Mr Ross is a football player from Tashkent. Fudan Unive"
            "rsity is located in South Zone province, "
            "Zagreb with 50000 staff. Jean Chrétien is a stuendent. Zhan"
            "gheng road in  Czech Republic area.",
            "Mr Ross is a football player from Tashkent. Fudan Univer"
            "sity is located in South Zone province, "
            "Zagreb with 50000 staff. Jean Chrétien is a stuendent. "
            "Zhangheng road in the valley area.",
            "Mr Ross is a football player from Tashkent. Fudan University"
            " is located in South Zone province, "
            "Zagreb with 50000 staff. Jean Chrétien is a stuendent. "
            "Zhangheng road in Parvan area.",
            "Mr Ross is a football player from Tashkent. Fudan University"
            " is located in South Zone province, "
            "Zagreb with 50000 staff. "
            "Jean Chrétien is a stuendent. Zhangheng road in East-West "
            "area.", "Mr Ross is a football player from Tashkent. Fudan "
            "University is located in east Atlantic province, "
            "Prague with 50000 staff. Mr Mayoral is a stuendent. "
            "Zhangheng road in West Midlands area."
        ]
        for sample, sent in zip(trans, change):
            self.assertTrue("positive", sample.get_value('y'))
            self.assertEqual(sent, sample.get_text('x'))

        # test special sample
        special_sample = SASample({'x': '', 'y': "negative"})
        self.assertEqual([], swap_ins.transform(special_sample))
        special_sample = SASample({'x': '~!@#$%^7890"\'', 'y': "negative"})
        self.assertEqual([], swap_ins.transform(special_sample))
예제 #19
0
    def test_transformation(self):
        # test wrong mode
        self.assertRaises(ValueError, Prejudice, 'Loc', 'woman')
        self.assertRaises(ValueError, Prejudice, 'Name', 'Japan')
        self.assertRaises(ValueError, Prejudice, 'Loc', 'Ja')
        self.assertRaises(ValueError, Prejudice, 'Loc', ['Ja'])

        import random
        random.seed(100)

        sent1 = "Interesting and moving performances by Tom Courtenay " \
                "and Peter Finch"
        swap_ins = Prejudice(change_type='Name', prejudice_tendency='woman')

        data_sample = SASample({'x': sent1, 'y': "negative"})
        x = swap_ins.transform(data_sample, n=5)

        special_sample = SASample({'x': '', 'y': "negative"})
        self.assertEqual([], swap_ins.transform(special_sample))
        special_sample = SASample({'x': '~!@#$%^7890"\'', 'y': "negative"})
        self.assertEqual([], swap_ins.transform(special_sample))

        change = []
        for sa_sample in x:
            self.assertEqual(
                data_sample.get_words('x')[:5],
                sa_sample.get_words('x')[:5])
            self.assertEqual(
                data_sample.get_words('x')[6:8],
                sa_sample.get_words('x')[6:8])
            self.assertEqual(
                data_sample.get_words('x')[-1],
                sa_sample.get_words('x')[-1])
            change.append(
                sa_sample.get_words('x')[5] + sa_sample.get_words('x')[8])

        self.assertTrue(5 == len(set(change)))
예제 #20
0
import unittest
import random

from textflint.input_layer.component.sample import SASample
from textflint.generation_layer.transformation.UT.append_irr import *

sent1 = 'The quick brown fox jumps over the lazy dog.'
data_sample = SASample({'x': sent1, 'y': "negative"})
swap_ins = AppendIrr()


class TestAppendIrr(unittest.TestCase):
    def test_transformation(self):
        # test the change num
        change_sample = swap_ins.transform(data_sample, n=5)
        self.assertEqual(5, len(change_sample))

        # test if the item change
        begin = []
        end = []
        for sample in change_sample:
            self.assertTrue(sent1 in sample.get_text('x'))
            index = sample.get_text('x').index(sent1)
            begin.append(sample.get_text('x')[:index])
            end.append(sample.get_text('x')[index + len(sent1):])

        # test if the n change samples not equal
        self.assertEqual(len(set(begin)), 5)
        self.assertEqual(len(set(end)), 5)

        random.seed(100)
예제 #21
0
import unittest
import random
import editdistance

from textflint.input_layer.component.sample import SASample
from textflint.generation_layer.transformation.UT.typos import Typos


sample = {'x': 'Pride and Prejudice is a famous fiction', 'y': 'positive'}
data_sample = SASample(sample)
typos_trans = Typos(mode='random')
random.seed(100)


class TestTwitterType(unittest.TestCase):
    def test_random(self):
        typos_trans.mode = 'random'
        special_sample = SASample({'x': '', 'y': "negative"})
        self.assertEqual([], typos_trans.transform(special_sample))
        special_sample = SASample({'x': '~!@#$%^7890"\'', 'y': "negative"})
        self.assertEqual(1, len(typos_trans.transform(special_sample)))

        x = typos_trans.transform(data_sample, n=3)
        self.assertTrue(3 == len(x))

        change = []
        for sample in x:
            self.assertTrue(sample.get_text('x') != data_sample.get_text('x'))
            self.assertTrue(editdistance.distance(sample.get_text('x'),
                                                data_sample.get_text('x')) <= 4)
            change.append(sample.get_text('x'))