def test_reset_model(self): for model_path in self.model_paths: original_aug = nas.ContextualWordEmbsForSentenceAug(model_path=model_path, top_p=0.5) original_temperature = original_aug.model.temperature original_top_k = original_aug.model.top_k # original_top_p = original_aug.model.top_p new_aug = nas.ContextualWordEmbsForSentenceAug( model_path=model_path, temperature=original_temperature+1, top_k=original_top_k+1) new_temperature = new_aug.model.temperature new_top_k = new_aug.model.top_k # new_top_p = new_aug.model.top_p self.assertEqual(original_temperature+1, new_temperature) self.assertEqual(original_top_k + 1, new_top_k)
def execute_by_device(self, device): for model_path in self.model_paths: aug = nas.ContextualWordEmbsForSentenceAug(model_path=model_path, force_reload=True, device=device) self.empty_input(aug) self.insert(aug) self.assertLess(0, len(self.model_paths))
def setUpClass(cls): env_config_path = os.path.abspath( os.path.join(os.path.dirname(__file__), '..', '..', '.env')) load_dotenv(env_config_path) cls.augs = [ nac.RandomCharAug(), naw.ContextualWordEmbsAug(), nas.ContextualWordEmbsForSentenceAug() ]
def execute_by_device(self, device): for model_path in self.model_paths: aug = nas.ContextualWordEmbsForSentenceAug(model_path=model_path, device=device) self.empty_input(aug) for data in [self.text, self.texts]: self.insert(aug, data) self.assertLess(0, len(self.model_paths))
def test_batch_size(self): # 1 per batch aug = nas.ContextualWordEmbsForSentenceAug(model_path='distilgpt2', batch_size=1) aug_data = aug.augment(self.texts) self.assertEqual(len(aug_data), len(self.texts)) # batch size = input size aug = nas.ContextualWordEmbsForSentenceAug(model_path='distilgpt2', batch_size=len(self.texts)) aug_data = aug.augment(self.texts) self.assertEqual(len(aug_data), len(self.texts)) # batch size > input size aug = nas.ContextualWordEmbsForSentenceAug(model_path='distilgpt2', batch_size=len(self.texts)+1) aug_data = aug.augment(self.texts) self.assertEqual(len(aug_data), len(self.texts)) # input size > batch size aug = nas.ContextualWordEmbsForSentenceAug(model_path='distilgpt2', batch_size=2) aug_data = aug.augment(self.texts * 2) self.assertEqual(len(aug_data), len(self.texts)*2)
def execute_by_device(self, device): for model_path in self.model_paths: aug = nas.ContextualWordEmbsForSentenceAug(model_path=model_path, device=device) self.empty_input(aug) self.insert(aug) self.top_k(aug) self.top_p(aug) self.top_k_top_p(aug) self.no_top_k_top_p(aug) self.assertLess(0, len(self.model_paths))
def test_optimize(self): model_paths = ['gpt2', 'distilgpt2'] device = 'cpu' enable_optimize = {'external_memory': 1024, 'return_proba': True} disable_optimize = {'external_memory': 0, 'return_proba': True} epoch = 10 for model_path in model_paths: # Optimized durations = [] aug = nas.ContextualWordEmbsForSentenceAug( model_path=model_path, device=device, optimize=enable_optimize, force_reload=True) for i in range(epoch): start_dt = time.monotonic() for j in range(epoch): aug.augment(self.text) end_dt = time.monotonic() durations.append(round(end_dt - start_dt, 2)) optimized_total_duration = sum(durations) optimized_average_duration = round( optimized_total_duration / len(durations), 2) # No optimized durations = [] aug.model.optimize = disable_optimize for _ in range(epoch): start_dt = time.monotonic() for _ in range(epoch): aug.augment(self.text) end_dt = time.monotonic() durations.append(round(end_dt - start_dt, 2)) no_optimized_total_duration = sum(durations) no_optimized_average_duration = round( no_optimized_total_duration / len(durations), 2) print('Model:{}, Optimized: {}({}), No Optimized: {}({})'.format( model_path, optimized_total_duration, optimized_average_duration, no_optimized_total_duration, no_optimized_average_duration)) self.assertGreater(no_optimized_total_duration, optimized_total_duration) self.assertGreater(no_optimized_average_duration, optimized_average_duration)
def test_augment_detail(self): for model_path in self.model_paths: aug = nas.ContextualWordEmbsForSentenceAug(model_path=model_path, include_detail=True) augmented_text, augment_details = aug.augment(self.text) self.assertNotEqual(self.text, augmented_text) self.assertGreater(len(augment_details), 0) for augment_detail in augment_details: self.assertTrue(augment_detail['orig_token'] in self.text) self.assertEqual(augment_detail['orig_start_pos'], -1) self.assertGreater(augment_detail['new_start_pos'], -1) self.assertGreater(augment_detail['change_seq'], 0) self.assertIn(augment_detail['action'], Action.getall()) self.assertNotEqual(self.text, augmented_text)
def setUpClass(cls): env_config_path = os.path.abspath( os.path.join(os.path.dirname(__file__), '..', '..', '.env')) load_dotenv(env_config_path) # https://freewavesamples.com/yamaha-v50-rock-beat-120-bpm cls.sample_wav_file = os.environ.get( "DATA_DIR") + 'Yamaha-V50-Rock-Beat-120bpm.wav' cls.audio, cls.sampling_rate = librosa.load(cls.sample_wav_file) cls.textual_augs = [ nac.RandomCharAug(), naw.ContextualWordEmbsAug(), nas.ContextualWordEmbsForSentenceAug() ] cls.audio_augs = [ naa.CropAug(sampling_rate=cls.sampling_rate), naa.SpeedAug(), ]
def test_optimize(self): model_paths = ['gpt2', 'distilgpt2'] # model_paths = ['xlnet-base-cased'] for model_path in model_paths: aug = nas.ContextualWordEmbsForSentenceAug(model_path=model_path) enable_optimize = aug.model.get_default_optimize_config() enable_optimize['external_memory'] = 1024 disable_optimize = aug.model.get_default_optimize_config() disable_optimize['external_memory'] = 0 original_optimize = aug.model.optimize aug.model.optimize = enable_optimize augmented_data = aug.augment(self.text) self.assertNotEqual(self.text, augmented_data) aug.model.optimize = disable_optimize augmented_data = aug.augment(self.text) self.assertNotEqual(self.text, augmented_data) aug.model.optimize = original_optimize
def test_none_device(self): for model_path in self.model_paths: aug = nas.ContextualWordEmbsForSentenceAug( model_path=model_path, force_reload=True, device=None) self.assertTrue(aug.device == 'cuda' or aug.device == 'cpu')
def test_incorrect_model_name(self): with self.assertRaises(ValueError) as error: nas.ContextualWordEmbsForSentenceAug(model_path='unknown') self.assertTrue('Model name value is unexpected.' in str(error.exception))
import nlpaug.augmenter.char as nac import nlpaug.augmenter.word as naw import nlpaug.augmenter.sentence as nas import nlpaug.flow as nafc import os from nlpaug.util import Action text = "Embarrassment is a common emotion. But as a bot I don't really feel it." aug = nas.ContextualWordEmbsForSentenceAug(model_path='xlnet-base-cased') augmented_texts = aug.augment(text, n=3) print("Original:") print(text) print("Augmented Texts:") print(augmented_texts) aug = nas.ContextualWordEmbsForSentenceAug(model_path='gpt2') augmented_text = aug.augment(text) print("Original:") print(text) print("Augmented Text:") print(augmented_text) aug = nas.ContextualWordEmbsForSentenceAug(model_path='distilgpt2') augmented_text = aug.augment(text) print("Original:") print(text) print("Augmented Text:") print(augmented_text)