Example #1
0
    def test_swap(self):
        texts = ['The quick brown fox jumps over the lazy dog', 'testing']

        aug = RandomCharAug(action="swap", min_char=1)
        for text in texts:
            tokens = list(text)
            orig_token_freq = {}
            for w in tokens:
                orig_token_freq[w] = tokens.count(w)

            augmented_cnt = 0
            augmented_text = text

            # https://github.com/makcedward/nlpaug/issues/77
            for i in range(10):
                augmented_text = aug.augment(augmented_text)

            tokens = list(augmented_text)
            aug_token_freq = {}
            for w in tokens:
                aug_token_freq[w] = tokens.count(w)

            tokens = aug.tokenizer(text)
            augmented_tokens = aug.tokenizer(augmented_text)

            for token, augmented_token in zip(tokens, augmented_tokens):
                if token != augmented_token:
                    augmented_cnt += 1

            self.assertNotEqual(text, augmented_text)

        self.assertTrue(len(texts) > 0)
Example #2
0
    def test_swap_middle(self):
        text = 'quick brown jumps over lazy'
        aug = RandomCharAug(action="swap", swap_mode='middle', min_char=4)

        augmented_text = aug.augment(text)
        self.assertNotEqual(text, augmented_text)
        self.assertEqual(len(augmented_text), len(text))
Example #3
0
    def test_substitute_single_word(self):
        texts = ['Zoology', 'roku123456']
        aug = RandomCharAug(action='substitute', min_char=1)
        for text in texts:
            augmented_text = aug.augment(text)
            self.assertNotEqual(text, augmented_text)

        self.assertTrue(len(texts) > 0)
Example #4
0
    def testSwapChar(self):
        tokens = ['Zoology', 'roku123456']
        aug = RandomCharAug(action=Action.SWAP)
        for t in tokens:
            augmented_text = aug.augment(t)
            self.assertNotEqual(t, augmented_text)

        self.assertTrue(len(tokens) > 0)
Example #5
0
    def testSubstituteExistChar(self):
        tokens = ['Zoology', 'roku123456']
        aug = RandomCharAug(action=Action.SUBSTITUTE)
        for t in tokens:
            augmented_text = aug.augment(t)
            self.assertNotEqual(t, augmented_text)

        self.assertTrue(len(tokens) > 0)
Example #6
0
    def test_delete(self):
        tokens = ['Zoology', 'roku123456']
        aug = RandomCharAug(action='delete', min_char=1)
        for t in tokens:
            augmented_text = aug.augment(t)
            self.assertNotEqual(t, augmented_text)
            self.assertLess(len(augmented_text), len(t))

        self.assertTrue(len(tokens) > 0)
Example #7
0
    def test_insert_single_word(self):
        texts = ['Zoology', 'roku123456']
        aug = RandomCharAug(action=Action.INSERT, min_char=1)
        for text in texts:
            augmented_text = aug.augment(text)
            self.assertNotEqual(text, augmented_text)
            self.assertLess(len(text), len(augmented_text))

        self.assertTrue(len(texts) > 0)
Example #8
0
    def testInsertExistChar(self):
        tokens = ['Zoology', 'roku123456']
        aug = RandomCharAug(action=Action.INSERT)
        for t in tokens:
            augmented_text = aug.augment(t)
            self.assertNotEqual(t, augmented_text)
            self.assertLess(len(t), len(augmented_text))

        self.assertTrue(len(tokens) > 0)
Example #9
0
    def test_min_char(self):
        tokens = ['Zoology', 'roku123456']

        for action in ['insert', 'swap', 'substitute', 'delete']:
            aug = RandomCharAug(action=action, min_char=20)
            for t in tokens:
                augmented_text = aug.augment(t)
                self.assertEqual(t, augmented_text)
                self.assertEqual(len(augmented_text), len(t))

        self.assertTrue(len(tokens) > 0)
Example #10
0
    def testSwapStopwords(self):
        tokens = ['Zoology', 'roku123456']
        stopwords = tokens[:1]
        aug = RandomCharAug(action=Action.SWAP, stopwords=stopwords)
        for t in tokens:
            augmented_text = aug.augment(t)
            if t in stopwords:
                self.assertEqual(t, augmented_text)
            else:
                self.assertNotEqual(t, augmented_text)

        self.assertTrue(len(tokens) > 0)
Example #11
0
    def test_candidiates(self):
        candidiates = ['AAA', '11', '===', '中文']
        text = 'quick brown jumps over lazy'
        aug = RandomCharAug(min_char=4, candidiates=candidiates)
        augmented_text = aug.augment(text)
        self.assertNotEqual(text, augmented_text)

        match = False
        for c in candidiates:
            if c in augmented_text:
                match = True
                break

        self.assertTrue(match)
Example #12
0
    def test_substitute_multi_words(self):
        texts = ['The quick brown fox jumps over the lazy dog']
        aug = RandomCharAug(action='substitute', min_char=1)
        for text in texts:
            augmented_cnt = 0
            augmented_text = aug.augment(text)

            tokens = aug.tokenizer(text)
            augmented_tokens = aug.tokenizer(augmented_text)

            for token, augmented_token in zip(tokens, augmented_tokens):
                if token != augmented_token:
                    augmented_cnt += 1

            self.assertLess(augmented_cnt, len(tokens))
            self.assertNotEqual(text, augmented_text)

        self.assertTrue(len(texts) > 0)
Example #13
0
    def test_swap(self):
        texts = ['The quick brown fox jumps over the lazy dog']
        aug = RandomCharAug(action=Action.SWAP)
        for text in texts:
            augmented_cnt = 0
            augmented_text = aug.augment(text)

            tokens = aug.tokenizer(text)
            augmented_tokens = aug.tokenizer(augmented_text)

            for token, augmented_token in zip(tokens, augmented_tokens):
                if token != augmented_token:
                    augmented_cnt += 1

            self.assertLess(augmented_cnt, len(tokens))
            self.assertNotEqual(text, augmented_text)

        self.assertTrue(len(texts) > 0)