예제 #1
0
    def test_swap(self):
        texts = ['The quick brown fox jumps over the lazy dog', 'testing']

        aug = RandomCharAug(action="swap", min_char=1)
        for text in texts:
            tokens = list(text)
            orig_token_freq = {}
            for w in tokens:
                orig_token_freq[w] = tokens.count(w)

            augmented_cnt = 0
            augmented_text = text

            # https://github.com/makcedward/nlpaug/issues/77
            for i in range(10):
                augmented_text = aug.augment(augmented_text)

            tokens = list(augmented_text)
            aug_token_freq = {}
            for w in tokens:
                aug_token_freq[w] = tokens.count(w)

            tokens = aug.tokenizer(text)
            augmented_tokens = aug.tokenizer(augmented_text)

            for token, augmented_token in zip(tokens, augmented_tokens):
                if token != augmented_token:
                    augmented_cnt += 1

            self.assertNotEqual(text, augmented_text)

        self.assertTrue(len(texts) > 0)
예제 #2
0
    def test_substitute_multi_words(self):
        texts = ['The quick brown fox jumps over the lazy dog']
        aug = RandomCharAug(action='substitute', min_char=1)
        for text in texts:
            augmented_cnt = 0
            augmented_text = aug.augment(text)

            tokens = aug.tokenizer(text)
            augmented_tokens = aug.tokenizer(augmented_text)

            for token, augmented_token in zip(tokens, augmented_tokens):
                if token != augmented_token:
                    augmented_cnt += 1

            self.assertLess(augmented_cnt, len(tokens))
            self.assertNotEqual(text, augmented_text)

        self.assertTrue(len(texts) > 0)
예제 #3
0
    def test_swap(self):
        texts = ['The quick brown fox jumps over the lazy dog']
        aug = RandomCharAug(action=Action.SWAP)
        for text in texts:
            augmented_cnt = 0
            augmented_text = aug.augment(text)

            tokens = aug.tokenizer(text)
            augmented_tokens = aug.tokenizer(augmented_text)

            for token, augmented_token in zip(tokens, augmented_tokens):
                if token != augmented_token:
                    augmented_cnt += 1

            self.assertLess(augmented_cnt, len(tokens))
            self.assertNotEqual(text, augmented_text)

        self.assertTrue(len(texts) > 0)