def test_hyperparameter_search(self): with mockenv_context(**self.dist_env_1_gpu): ds_config_zero3_dict = self.get_config_dict(ZERO3) # hyperparameter_search requires model_init() to recreate the model for each trial def model_init(): config = RegressionModelConfig(a=0, b=0, double_output=False) model = RegressionPreTrainedModel(config) return model trainer = get_regression_trainer( local_rank=0, fp16=True, model_init=model_init, deepspeed=ds_config_zero3_dict, ) n_trials = 3 with CaptureLogger(deepspeed_logger) as cl: with CaptureStd() as cs: trainer.hyperparameter_search(direction="maximize", n_trials=n_trials) self.assertIn("DeepSpeed info", cl.out, "expected DeepSpeed logger output but got none") self.assertIn(f"Trial {n_trials-1} finished with value", cs.err, "expected hyperparameter_search output") self.assertIn("Best is trial", cs.err, "expected hyperparameter_search output")
def test_fake_notebook_no_launcher(self): # this setup emulates a notebook where a launcher needs to be emulated by hand with CaptureStd() as cs: # noqa with mockenv_context(**self.dist_env_1_gpu): trainer = get_regression_trainer(local_rank=0, deepspeed=self.ds_config_file) trainer.train()
def test_cli_env(self): # test transformers-cli env import transformers.commands.transformers_cli with CaptureStd() as cs: transformers.commands.transformers_cli.main() assert "Python version" in cs.out assert "Platform" in cs.out assert "Using distributed or parallel set-up in script?" in cs.out
def test_fake_notebook_no_launcher(self): # this setup emulates a notebook where a launcher needs to be emulated by hand with CaptureStd() as cs: with mockenv_context(**self.dist_env_1_gpu): trainer = get_regression_trainer(local_rank=0, deepspeed=self.ds_config_file) trainer.train() assert "DeepSpeed info" in cs.out, "expected DeepSpeed logger output but got none"
def test_fake_notebook_no_launcher(self): sys.path.append(self.tests_dir_str) from test_trainer import get_regression_trainer del sys.path[-1] # restore ds_config_file = f"{self.test_file_dir_str}/ds_config.json" with CaptureStd() as cs: trainer = get_regression_trainer(local_rank=0, deepspeed=ds_config_file) trainer.train() assert "DeepSpeed info" in cs.out, "expected DeepSpeed logger output but got none"
def test_load_best_model(self, stage, dtype): # this test exercises --load_best_model_at_end - the key is being able to resume after some training data_dir = self.tests_dir / "fixtures/tests_samples/wmt_en_ro" output_dir = self.get_auto_remove_tmp_dir() args = f""" --model_name_or_path {T5_TINY} --tokenizer_name {T5_TINY} --train_file {data_dir}/train.json --validation_file {data_dir}/val.json --output_dir {output_dir} --overwrite_output_dir --source_lang en --target_lang ro --do_train --max_train_samples 3 --do_eval --max_eval_samples 1 --logging_strategy steps --logging_steps 1 --evaluation_strategy steps --eval_steps 1 --save_strategy steps --save_steps 1 --load_best_model_at_end --per_device_train_batch_size 1 --per_device_eval_batch_size 1 --num_train_epochs 1 --report_to none """.split() args.extend(["--source_prefix", "translate English to Romanian: "]) args.extend([f"--{dtype}"]) ds_args = f"--deepspeed {self.test_file_dir_str}/ds_config_{stage}.json".split( ) script = [ f"{self.examples_dir_str}/pytorch/translation/run_translation.py" ] launcher = get_launcher(distributed=False) cmd = launcher + script + args + ds_args # keep for quick debug # print(" ".join([f"\nPYTHONPATH={self.src_dir_str}"] +cmd)); die with CaptureStd() as cs: execute_subprocess_async(cmd, env=self.get_env()) # enough to test it didn't fail self.assertIn("DeepSpeed info", cs.out)