def __init__(self, num_masks=1, mask_factor=27, name="FreqMasking", verbose=0): super(FreqMasking, self).__init__( action=Action.SUBSTITUTE, zone=(0.2, 0.8), name=name, device="cpu", verbose=verbose, coverage=1., factor=(40, 80), silence=False, stateless=True) self.flow = Sequential([FreqMaskingAugmenter(mask_factor) for _ in range(num_masks)])
def __init__(self, num_masks=1, mask_factor=100, p_upperbound=1, name="TimeMasking", verbose=0): super(TimeMasking, self).__init__( action=Action.SUBSTITUTE, zone=(0.2, 0.8), name=name, device="cpu", verbose=verbose, coverage=1., silence=False, stateless=True) self.flow = Sequential([ TimeMaskingAugmenter(mask_factor, p_upperbound) for _ in range(num_masks) ])
def build_flow(flow_config): augmenters = [] for key, value in flow_config.items(): if key == "settings": continue augmenters.append(build_augmenter(key, value)) settings = flow_config['settings'] if not settings['is_random']: return Sequential(augmenters) return Sometimes(augmenters, aug_p=settings['aug_p'])
def test_apply(self): # Set the seed random.seed(0) np.random.seed(0) # Create the nlpaug transformation nlpaug_transformation = NlpAugTransformation( pipeline=Sequential(flow=[SynonymAug()]), num_transformed=3, ) for i, identifier in enumerate(nlpaug_transformation.identifiers): self.assertEqual( str(identifier), f"NlpAugTransformation-{i + 1}(pipeline=[Synonym_Aug(src=wordnet, " f"action=substitute, method=word)])", ) # Apply it dataset, slices, slice_membership = nlpaug_transformation( self.testbed.dataset, columns=["text"] ) # All the sizes match up self.assertEqual(len(dataset), len(self.testbed.dataset)) for sl in slices: self.assertEqual(len(sl), len(self.testbed.dataset)) self.assertEqual(slice_membership.shape, (6, 3)) # Everything was transformed self.assertTrue(np.all(slice_membership)) # Dataset interaction history updated correctly self.assertEqual( len(dataset.fetch_tape(["slicebuilders", "transformation"]).history), 3 ) # Checking that the transformed text matches self.assertEqual( slices[0]["text"], [ "The man is walk.", "The man be running.", "The cleaning lady is sprinting.", "The woman personify resting.", "The hobbit is fly.", "The hobbit is swimming.", ], )
class FreqMasking(SpectrogramAugmenter): def __init__(self, num_masks: int = 1, mask_factor: float = 27, name: str = "FreqMasking", verbose=0): super(FreqMasking, self).__init__(action=Action.SUBSTITUTE, zone=(0.2, 0.8), name=name, device="cpu", verbose=verbose, coverage=1., factor=(40, 80), silence=False, stateless=True) self.flow = Sequential( [FreqMaskingAugmenter(mask_factor) for _ in range(num_masks)]) def substitute(self, data): return self.flow.augment(data)
def test_apply(self): # Set the seed random.seed(0) np.random.seed(0) # Create the nlpaug transformation nlpaug_transformation = NlpAugTransformation( pipeline=Sequential(flow=[SynonymAug()]), num_transformed=3, ) for i, identifier in enumerate(nlpaug_transformation.identifiers): self.assertEqual( str(identifier), f"NlpAugTransformation-{i + 1}(pipeline=[Synonym_Aug(src=wordnet, " f"action=substitute, method=word)])", ) # Apply it slices, slice_membership = nlpaug_transformation(self.testbed.dataset, columns=["text"]) # All the sizes match up # self.assertEqual(len(dataset), len(self.testbed.dataset)) for sl in slices: self.assertEqual(len(sl), len(self.testbed.dataset)) self.assertEqual(slice_membership.shape, (6, 3)) # Everything was transformed self.assertTrue(np.all(slice_membership)) # Dataset interaction history updated correctly self.assertEqual( len( self.testbed.dataset.fetch_tape( ["slicebuilders", "transformation"]).history), 3, )
def build_chain_translation_augmenter(language_chain: List[str], device: str) -> Sequential: pair_to_model = { "en-fr": "transformer.wmt14.en-fr", "en-de": "transformer.wmt19.en-de", "de-en": "transformer.wmt19.de-en", "en-ru": "transformer.wmt19.en-ru", "ru-en": "transformer.wmt19.ru-en" } if len(language_chain) <= 2: raise Exception( "Can't backtranslate with less than two languages in a chain") augmenters = [] for i in range(len(language_chain) - 2): from_key = f"{language_chain[i]}-{language_chain[i+1]}" to_key = f"{language_chain[i+1]}-{language_chain[i+2]}" from_model_name = pair_to_model[from_key] to_model_name = pair_to_model[to_key] augmenters.append( BackTranslationAug(from_model_name=from_model_name, to_model_name=to_model_name, device=device)) return Sequential(augmenters)