Beispiel #1
0
    def _validate_augmenter(self, method, action):
        if method not in Method.getall():
            raise ValueError(
                'Method must be one of {} while {} is passed'.format(Method.getall(), method))

        if action not in Action.getall():
            raise ValueError(
                'Action must be one of {} while {} is passed'.format(Action.getall(), action))
Beispiel #2
0
    def test_augment_detail(self):
        text = 'The quick brown fox jumps over the lazy dog'

        flows = [
            naf.Sequential([
                naf.Sometimes([
                    nac.RandomCharAug(action="insert"),
                    nac.RandomCharAug(action="delete")
                ],
                              pipeline_p=0.5),
                naf.Sequential([
                    nac.RandomCharAug(action="substitute",
                                      aug_char_min=1,
                                      aug_char_p=0.6,
                                      aug_word_p=0.6)
                ],
                               name='Sub_Seq')
            ],
                           include_detail=True),
            naf.Sometimes([
                naf.Sometimes([
                    nac.RandomCharAug(action="insert"),
                    nac.RandomCharAug(action="delete")
                ]),
                naf.Sequential([
                    nac.OcrAug(),
                    nac.KeyboardAug(aug_char_min=1),
                    nac.RandomCharAug(action="substitute",
                                      aug_char_min=1,
                                      aug_char_p=0.6,
                                      aug_word_p=0.6)
                ])
            ],
                          pipeline_p=1,
                          include_detail=True)
        ]

        for flow in flows:
            augmented_text, augment_details = flow.augment(text)

            self.assertNotEqual(text, augmented_text)
            self.assertGreater(len(augment_details), 0)
            for augment_detail in augment_details:
                self.assertGreater(augment_detail['orig_start_pos'], -1)
                self.assertGreater(augment_detail['new_start_pos'], -1)
                self.assertGreater(augment_detail['change_seq'], 0)
                self.assertIn(augment_detail['action'], Action.getall())
Beispiel #3
0
    def test_augment_detail(self):
        for model_path in self.model_paths:
            aug = nas.ContextualWordEmbsForSentenceAug(model_path=model_path,
                                                       include_detail=True)

            augmented_text, augment_details = aug.augment(self.text)

            self.assertNotEqual(self.text, augmented_text)
            self.assertGreater(len(augment_details), 0)
            for augment_detail in augment_details:
                self.assertTrue(augment_detail['orig_token'] in self.text)
                self.assertEqual(augment_detail['orig_start_pos'], -1)
                self.assertGreater(augment_detail['new_start_pos'], -1)
                self.assertGreater(augment_detail['change_seq'], 0)
                self.assertIn(augment_detail['action'], Action.getall())

            self.assertNotEqual(self.text, augmented_text)
Beispiel #4
0
    def test_augment_detail(self):
        text = 'The quick brown fox jumps over the lazy dog'
        augs = [
            naw.RandomWordAug(include_detail=True),  # Delete, use SWAP later
            naw.ContextualWordEmbsAug(model_path='bert-base-uncased',
                                      include_detail=True)  # Substitute
        ]

        for aug in augs:
            augmented_text, augment_details = aug.augment(text)

            self.assertNotEqual(text, augmented_text)
            self.assertGreater(len(augment_details), 0)
            for augment_detail in augment_details:
                self.assertTrue(augment_detail['orig_token'] in text)
                self.assertGreater(augment_detail['orig_start_pos'], -1)
                self.assertGreater(augment_detail['new_start_pos'], -1)
                self.assertGreater(augment_detail['change_seq'], 0)
                self.assertIn(augment_detail['action'], Action.getall())
Beispiel #5
0
    def test_augment_detail(self):
        text = 'The quick brown fox jumps over the lazy dog'
        augs = [
            nac.KeyboardAug(min_char=1, include_detail=True),
            nac.OcrAug(min_char=1, include_detail=True),
            nac.RandomCharAug(min_char=2, include_detail=True)
        ]

        for aug in augs:
            augmented_text, augment_details = aug.augment(text)

            self.assertNotEqual(text, augmented_text)
            self.assertGreater(len(augment_details), 0)
            for augment_detail in augment_details:
                self.assertTrue(augment_detail['orig_token'] in text)
                self.assertGreater(augment_detail['orig_start_pos'], -1)
                self.assertGreater(augment_detail['new_start_pos'], -1)
                self.assertGreater(augment_detail['change_seq'], 0)
                self.assertIn(augment_detail['action'], Action.getall())

            # Get back original input by re-engineering
            reengineering_text = augmented_text
            for change_obj in sorted(augment_details, key=lambda item: item['orig_start_pos'], reverse=True):
                if change_obj['action'] == Action.DELETE:
                    text_prefix = reengineering_text[:change_obj['new_start_pos']]
                    text_core = change_obj['orig_token'] + ' '
                    text_suffix = reengineering_text[change_obj['new_start_pos']:]

                elif change_obj['action'] in [Action.INSERT, Action.SUBSTITUTE]:
                    text_prefix = reengineering_text[:change_obj['new_start_pos']]
                    text_core = reengineering_text[change_obj['new_start_pos']:].replace(
                        change_obj['new_token'], change_obj['orig_token'], 1)
                    text_suffix = ''
                # TODO
                # elif change_obj['action'] in Action.SWAP:

                reengineering_text = text_prefix + text_core + text_suffix
                reengineering_text = reengineering_text.strip()

            self.assertEqual(text, reengineering_text)