Exemple #1
0
    def test_single_word(self):
        """
            Issue#10: contains one character words like: 'I a'
        :return:
        """

        texts = ["I a", 'I']
        aug = naw.WordNetAug()

        for text in texts:
            self.assertLess(0, len(text))
            augmented_text = aug.augment(text)
            self.assertNotEqual(text, augmented_text)

        self.assertLess(0, len(texts))

        texts = [
            "a",
        ]
        aug = naw.WordNetAug()

        for text in texts:
            self.assertLess(0, len(text))
            augmented_text = aug.augment(text)
            self.assertEqual(text, augmented_text)

        self.assertLess(0, len(texts))
 def __init__(self):
     self.raw_data_path = './'
     self.processed_data_path = './'
     self.df = None
     self.augmented_df = pd.DataFrame(columns=['text', 'label'])
     self.no_of_augmentations = 5
     self.augmentor = naw.WordNetAug(aug_p=0.5)
Exemple #3
0
    def test_stopwords(self):
        text = 'The quick brown fox jumps over the lazy dog'
        aug = naw.WordNetAug(stopwords=['quick', 'brown', 'fox'])

        self.assertLess(0, len(text))
        augmented_text = aug.augment(text)

        self.assertNotEqual(text, augmented_text)
Exemple #4
0
    def test_antonyms(self):
        texts = ['Good bad']
        aug = naw.WordNetAug(is_synonym=False)

        for text in texts:
            self.assertLess(0, len(text))
            augmented_text = aug.augment(text)

            self.assertNotEqual(text, augmented_text)

        self.assertLess(0, len(texts))
Exemple #5
0
    def test_substitute(self):
        texts = ['The quick brown fox jumps over the lazy dog']
        aug = naw.WordNetAug()

        for text in texts:
            self.assertLess(0, len(text))
            augmented_text = aug.augment(text)

            self.assertNotEqual(text, augmented_text)

        self.assertLess(0, len(texts))
Exemple #6
0
    def test_no_separator(self):
        """
            Pull#11: Remove seperator (underscore/ hyphen)
        :return:
        """

        texts = ["linguistic"]
        aug = naw.WordNetAug()

        for text in texts:
            self.assertLess(0, len(text))
            augmented_text = aug.augment(text)
            for separator in ['-', '_']:
                self.assertNotIn(separator, augmented_text)
            self.assertNotEqual(text, augmented_text)

        self.assertLess(0, len(texts))