def test_swap(self): texts = ['The quick brown fox jumps over the lazy dog', 'testing'] aug = RandomCharAug(action="swap", min_char=1) for text in texts: tokens = list(text) orig_token_freq = {} for w in tokens: orig_token_freq[w] = tokens.count(w) augmented_cnt = 0 augmented_text = text # https://github.com/makcedward/nlpaug/issues/77 for i in range(10): augmented_text = aug.augment(augmented_text) tokens = list(augmented_text) aug_token_freq = {} for w in tokens: aug_token_freq[w] = tokens.count(w) tokens = aug.tokenizer(text) augmented_tokens = aug.tokenizer(augmented_text) for token, augmented_token in zip(tokens, augmented_tokens): if token != augmented_token: augmented_cnt += 1 self.assertNotEqual(text, augmented_text) self.assertTrue(len(texts) > 0)
def test_swap_middle(self): text = 'quick brown jumps over lazy' aug = RandomCharAug(action="swap", swap_mode='middle', min_char=4) augmented_text = aug.augment(text) self.assertNotEqual(text, augmented_text) self.assertEqual(len(augmented_text), len(text))
def test_substitute_single_word(self): texts = ['Zoology', 'roku123456'] aug = RandomCharAug(action='substitute', min_char=1) for text in texts: augmented_text = aug.augment(text) self.assertNotEqual(text, augmented_text) self.assertTrue(len(texts) > 0)
def testSwapChar(self): tokens = ['Zoology', 'roku123456'] aug = RandomCharAug(action=Action.SWAP) for t in tokens: augmented_text = aug.augment(t) self.assertNotEqual(t, augmented_text) self.assertTrue(len(tokens) > 0)
def testSubstituteExistChar(self): tokens = ['Zoology', 'roku123456'] aug = RandomCharAug(action=Action.SUBSTITUTE) for t in tokens: augmented_text = aug.augment(t) self.assertNotEqual(t, augmented_text) self.assertTrue(len(tokens) > 0)
def test_delete(self): tokens = ['Zoology', 'roku123456'] aug = RandomCharAug(action='delete', min_char=1) for t in tokens: augmented_text = aug.augment(t) self.assertNotEqual(t, augmented_text) self.assertLess(len(augmented_text), len(t)) self.assertTrue(len(tokens) > 0)
def test_insert_single_word(self): texts = ['Zoology', 'roku123456'] aug = RandomCharAug(action=Action.INSERT, min_char=1) for text in texts: augmented_text = aug.augment(text) self.assertNotEqual(text, augmented_text) self.assertLess(len(text), len(augmented_text)) self.assertTrue(len(texts) > 0)
def testInsertExistChar(self): tokens = ['Zoology', 'roku123456'] aug = RandomCharAug(action=Action.INSERT) for t in tokens: augmented_text = aug.augment(t) self.assertNotEqual(t, augmented_text) self.assertLess(len(t), len(augmented_text)) self.assertTrue(len(tokens) > 0)
def test_min_char(self): tokens = ['Zoology', 'roku123456'] for action in ['insert', 'swap', 'substitute', 'delete']: aug = RandomCharAug(action=action, min_char=20) for t in tokens: augmented_text = aug.augment(t) self.assertEqual(t, augmented_text) self.assertEqual(len(augmented_text), len(t)) self.assertTrue(len(tokens) > 0)
def testSwapStopwords(self): tokens = ['Zoology', 'roku123456'] stopwords = tokens[:1] aug = RandomCharAug(action=Action.SWAP, stopwords=stopwords) for t in tokens: augmented_text = aug.augment(t) if t in stopwords: self.assertEqual(t, augmented_text) else: self.assertNotEqual(t, augmented_text) self.assertTrue(len(tokens) > 0)
def test_candidiates(self): candidiates = ['AAA', '11', '===', '中文'] text = 'quick brown jumps over lazy' aug = RandomCharAug(min_char=4, candidiates=candidiates) augmented_text = aug.augment(text) self.assertNotEqual(text, augmented_text) match = False for c in candidiates: if c in augmented_text: match = True break self.assertTrue(match)
def test_substitute_multi_words(self): texts = ['The quick brown fox jumps over the lazy dog'] aug = RandomCharAug(action='substitute', min_char=1) for text in texts: augmented_cnt = 0 augmented_text = aug.augment(text) tokens = aug.tokenizer(text) augmented_tokens = aug.tokenizer(augmented_text) for token, augmented_token in zip(tokens, augmented_tokens): if token != augmented_token: augmented_cnt += 1 self.assertLess(augmented_cnt, len(tokens)) self.assertNotEqual(text, augmented_text) self.assertTrue(len(texts) > 0)
def test_swap(self): texts = ['The quick brown fox jumps over the lazy dog'] aug = RandomCharAug(action=Action.SWAP) for text in texts: augmented_cnt = 0 augmented_text = aug.augment(text) tokens = aug.tokenizer(text) augmented_tokens = aug.tokenizer(augmented_text) for token, augmented_token in zip(tokens, augmented_tokens): if token != augmented_token: augmented_cnt += 1 self.assertLess(augmented_cnt, len(tokens)) self.assertNotEqual(text, augmented_text) self.assertTrue(len(texts) > 0)