def use_embedding_augmenter(): from textattack.augmentation import EmbeddingAugmenter augmenter = EmbeddingAugmenter(transformations_per_example=64) s = 'There is nothing either good or bad, but thinking makes it so.' augmented_text_list = augmenter.augment(s) augmented_s = 'There is nothing either good or unfavourable, but thinking makes it so.' assert augmented_s in augmented_text_list
def test_embedding_augmenter(): from textattack.augmentation import EmbeddingAugmenter augmenter = EmbeddingAugmenter(pct_words_to_swap=0.01, transformations_per_example=64) s = "There is nothing either good or bad, but thinking makes it so." augmented_text_list = augmenter.augment(s) augmented_s = ( "There is nothing either good or unfavourable, but thinking makes it so." ) assert augmented_s in augmented_text_list
from textattack.augmentation import EmbeddingAugmenter augmenter = EmbeddingAugmenter(pct_words_to_swap=0.2, transformations_per_example=2) with open('dataset/test.txt', 'r') as r, open('augmentation_test.txt', 'w') as w: for line in r: list_labels_id = line.split(';') aug = augmenter.augment(list_labels_id[0]) w.write(list_labels_id[0] + ';' + list_labels_id[1]) for aug_text in aug: w.write(aug_text + ';' + list_labels_id[1])
def augment_examples(sentences: List[str], num_transformations: int = 1): emb_augmenter = EmbeddingAugmenter(transformations_per_example=num_transformations) augmented = emb_augmenter.augment_many(sentences, show_progress=True) augmented = [element[0] for element in augmented] return augmented
## WordNetAugmenter Augments the text by replacing it with synonyms from the WordNet thesaurus (thesaurus). WordNet is a lexical database for English. Nouns, verbs, adjectives, and adverbs are grouped into sets of cognitive synonyms (synsets), each of which expresses a different concept. WordNet superficially resembles a thesaurus in that it groups words based on their meaning. """ aug = WordNetAugmenter(); aug.augment(text) """## EmbeddingAugmenter Augment text by transforming words with their inlays, with a constraint to ensure that their cosine similarity is at least 0.8. """ aug = EmbeddingAugmenter(); aug.augment(text) """## CharSwapAugmenter Augment text by substituting, deleting, inserting, and swapping adjacent characters. """ aug = CharSwapAugmenter() aug.augment(text) """## EasyDataAugmenter This augments the text with a combination of Wordnet synonym replacement, word deletion, word order changes, and synonym insertions. All these 4 functionalities occur randomly. So we will get different results every time we run the code. This returns 4 augmented results, unlike the other methods. """ aug = EasyDataAugmenter() aug.augment(text)