Esempio n. 1
0
 def __init__(self,
              num_masks=1,
              mask_factor=27,
              name="FreqMasking",
              verbose=0):
     super(FreqMasking, self).__init__(
         action=Action.SUBSTITUTE, zone=(0.2, 0.8), name=name, device="cpu", verbose=verbose,
         coverage=1., factor=(40, 80), silence=False, stateless=True)
     self.flow = Sequential([FreqMaskingAugmenter(mask_factor) for _ in range(num_masks)])
Esempio n. 2
0
 def __init__(self,
              num_masks=1,
              mask_factor=100,
              p_upperbound=1,
              name="TimeMasking",
              verbose=0):
     super(TimeMasking, self).__init__(
         action=Action.SUBSTITUTE, zone=(0.2, 0.8), name=name, device="cpu", verbose=verbose,
         coverage=1., silence=False, stateless=True)
     self.flow = Sequential([
         TimeMaskingAugmenter(mask_factor, p_upperbound) for _ in range(num_masks)
     ])
Esempio n. 3
0
def build_flow(flow_config):
    augmenters = []
    for key, value in flow_config.items():
        if key == "settings":
            continue
        augmenters.append(build_augmenter(key, value))
    settings = flow_config['settings']
    if not settings['is_random']:
        return Sequential(augmenters)
    return Sometimes(augmenters, aug_p=settings['aug_p'])
Esempio n. 4
0
    def test_apply(self):
        # Set the seed
        random.seed(0)
        np.random.seed(0)

        # Create the nlpaug transformation
        nlpaug_transformation = NlpAugTransformation(
            pipeline=Sequential(flow=[SynonymAug()]),
            num_transformed=3,
        )

        for i, identifier in enumerate(nlpaug_transformation.identifiers):
            self.assertEqual(
                str(identifier),
                f"NlpAugTransformation-{i + 1}(pipeline=[Synonym_Aug(src=wordnet, "
                f"action=substitute, method=word)])",
            )

        # Apply it
        dataset, slices, slice_membership = nlpaug_transformation(
            self.testbed.dataset, columns=["text"]
        )

        # All the sizes match up
        self.assertEqual(len(dataset), len(self.testbed.dataset))
        for sl in slices:
            self.assertEqual(len(sl), len(self.testbed.dataset))
        self.assertEqual(slice_membership.shape, (6, 3))

        # Everything was transformed
        self.assertTrue(np.all(slice_membership))

        # Dataset interaction history updated correctly
        self.assertEqual(
            len(dataset.fetch_tape(["slicebuilders", "transformation"]).history), 3
        )

        # Checking that the transformed text matches
        self.assertEqual(
            slices[0]["text"],
            [
                "The man is walk.",
                "The man be running.",
                "The cleaning lady is sprinting.",
                "The woman personify resting.",
                "The hobbit is fly.",
                "The hobbit is swimming.",
            ],
        )
class FreqMasking(SpectrogramAugmenter):
    def __init__(self,
                 num_masks: int = 1,
                 mask_factor: float = 27,
                 name: str = "FreqMasking",
                 verbose=0):
        super(FreqMasking, self).__init__(action=Action.SUBSTITUTE,
                                          zone=(0.2, 0.8),
                                          name=name,
                                          device="cpu",
                                          verbose=verbose,
                                          coverage=1.,
                                          factor=(40, 80),
                                          silence=False,
                                          stateless=True)
        self.flow = Sequential(
            [FreqMaskingAugmenter(mask_factor) for _ in range(num_masks)])

    def substitute(self, data):
        return self.flow.augment(data)
Esempio n. 6
0
    def test_apply(self):
        # Set the seed
        random.seed(0)
        np.random.seed(0)

        # Create the nlpaug transformation
        nlpaug_transformation = NlpAugTransformation(
            pipeline=Sequential(flow=[SynonymAug()]),
            num_transformed=3,
        )

        for i, identifier in enumerate(nlpaug_transformation.identifiers):
            self.assertEqual(
                str(identifier),
                f"NlpAugTransformation-{i + 1}(pipeline=[Synonym_Aug(src=wordnet, "
                f"action=substitute, method=word)])",
            )

        # Apply it
        slices, slice_membership = nlpaug_transformation(self.testbed.dataset,
                                                         columns=["text"])

        # All the sizes match up
        # self.assertEqual(len(dataset), len(self.testbed.dataset))
        for sl in slices:
            self.assertEqual(len(sl), len(self.testbed.dataset))
        self.assertEqual(slice_membership.shape, (6, 3))

        # Everything was transformed
        self.assertTrue(np.all(slice_membership))

        # Dataset interaction history updated correctly
        self.assertEqual(
            len(
                self.testbed.dataset.fetch_tape(
                    ["slicebuilders", "transformation"]).history),
            3,
        )
Esempio n. 7
0
def build_chain_translation_augmenter(language_chain: List[str],
                                      device: str) -> Sequential:
    pair_to_model = {
        "en-fr": "transformer.wmt14.en-fr",
        "en-de": "transformer.wmt19.en-de",
        "de-en": "transformer.wmt19.de-en",
        "en-ru": "transformer.wmt19.en-ru",
        "ru-en": "transformer.wmt19.ru-en"
    }
    if len(language_chain) <= 2:
        raise Exception(
            "Can't backtranslate with less than two languages in a chain")

    augmenters = []
    for i in range(len(language_chain) - 2):
        from_key = f"{language_chain[i]}-{language_chain[i+1]}"
        to_key = f"{language_chain[i+1]}-{language_chain[i+2]}"
        from_model_name = pair_to_model[from_key]
        to_model_name = pair_to_model[to_key]
        augmenters.append(
            BackTranslationAug(from_model_name=from_model_name,
                               to_model_name=to_model_name,
                               device=device))
    return Sequential(augmenters)