def test_swap(self): texts = ['The quick brown fox jumps over the lazy dog', 'testing'] aug = RandomCharAug(action="swap", min_char=1) for text in texts: tokens = list(text) orig_token_freq = {} for w in tokens: orig_token_freq[w] = tokens.count(w) augmented_cnt = 0 augmented_text = text # https://github.com/makcedward/nlpaug/issues/77 for i in range(10): augmented_text = aug.augment(augmented_text) tokens = list(augmented_text) aug_token_freq = {} for w in tokens: aug_token_freq[w] = tokens.count(w) tokens = aug.tokenizer(text) augmented_tokens = aug.tokenizer(augmented_text) for token, augmented_token in zip(tokens, augmented_tokens): if token != augmented_token: augmented_cnt += 1 self.assertNotEqual(text, augmented_text) self.assertTrue(len(texts) > 0)
def test_substitute_multi_words(self): texts = ['The quick brown fox jumps over the lazy dog'] aug = RandomCharAug(action='substitute', min_char=1) for text in texts: augmented_cnt = 0 augmented_text = aug.augment(text) tokens = aug.tokenizer(text) augmented_tokens = aug.tokenizer(augmented_text) for token, augmented_token in zip(tokens, augmented_tokens): if token != augmented_token: augmented_cnt += 1 self.assertLess(augmented_cnt, len(tokens)) self.assertNotEqual(text, augmented_text) self.assertTrue(len(texts) > 0)
def test_swap(self): texts = ['The quick brown fox jumps over the lazy dog'] aug = RandomCharAug(action=Action.SWAP) for text in texts: augmented_cnt = 0 augmented_text = aug.augment(text) tokens = aug.tokenizer(text) augmented_tokens = aug.tokenizer(augmented_text) for token, augmented_token in zip(tokens, augmented_tokens): if token != augmented_token: augmented_cnt += 1 self.assertLess(augmented_cnt, len(tokens)) self.assertNotEqual(text, augmented_text) self.assertTrue(len(texts) > 0)