Example #1
0
    def test_hyperparameter_search(self):
        with mockenv_context(**self.dist_env_1_gpu):

            ds_config_zero3_dict = self.get_config_dict(ZERO3)

            # hyperparameter_search requires model_init() to recreate the model for each trial
            def model_init():
                config = RegressionModelConfig(a=0, b=0, double_output=False)
                model = RegressionPreTrainedModel(config)
                return model

            trainer = get_regression_trainer(
                local_rank=0,
                fp16=True,
                model_init=model_init,
                deepspeed=ds_config_zero3_dict,
            )

            n_trials = 3
            with CaptureLogger(deepspeed_logger) as cl:
                with CaptureStd() as cs:
                    trainer.hyperparameter_search(direction="maximize",
                                                  n_trials=n_trials)
            self.assertIn("DeepSpeed info", cl.out,
                          "expected DeepSpeed logger output but got none")
            self.assertIn(f"Trial {n_trials-1} finished with value", cs.err,
                          "expected hyperparameter_search output")
            self.assertIn("Best is trial", cs.err,
                          "expected hyperparameter_search output")
 def test_fake_notebook_no_launcher(self):
     # this setup emulates a notebook where a launcher needs to be emulated by hand
     with CaptureStd() as cs:  # noqa
         with mockenv_context(**self.dist_env_1_gpu):
             trainer = get_regression_trainer(local_rank=0,
                                              deepspeed=self.ds_config_file)
             trainer.train()
Example #3
0
    def test_cli_env(self):
        # test transformers-cli env
        import transformers.commands.transformers_cli

        with CaptureStd() as cs:
            transformers.commands.transformers_cli.main()
        assert "Python version" in cs.out
        assert "Platform" in cs.out
        assert "Using distributed or parallel set-up in script?" in cs.out
Example #4
0
    def test_fake_notebook_no_launcher(self):

        # this setup emulates a notebook where a launcher needs to be emulated by hand

        with CaptureStd() as cs:
            with mockenv_context(**self.dist_env_1_gpu):
                trainer = get_regression_trainer(local_rank=0,
                                                 deepspeed=self.ds_config_file)
                trainer.train()
        assert "DeepSpeed info" in cs.out, "expected DeepSpeed logger output but got none"
Example #5
0
    def test_fake_notebook_no_launcher(self):
        sys.path.append(self.tests_dir_str)
        from test_trainer import get_regression_trainer

        del sys.path[-1]  # restore
        ds_config_file = f"{self.test_file_dir_str}/ds_config.json"
        with CaptureStd() as cs:
            trainer = get_regression_trainer(local_rank=0, deepspeed=ds_config_file)
            trainer.train()
        assert "DeepSpeed info" in cs.out, "expected DeepSpeed logger output but got none"
Example #6
0
    def test_load_best_model(self, stage, dtype):
        # this test exercises --load_best_model_at_end - the key is being able to resume after some training

        data_dir = self.tests_dir / "fixtures/tests_samples/wmt_en_ro"
        output_dir = self.get_auto_remove_tmp_dir()
        args = f"""
            --model_name_or_path {T5_TINY}
            --tokenizer_name {T5_TINY}
            --train_file {data_dir}/train.json
            --validation_file {data_dir}/val.json
            --output_dir {output_dir}
            --overwrite_output_dir
            --source_lang en
            --target_lang ro
            --do_train
            --max_train_samples 3
            --do_eval
            --max_eval_samples 1
            --logging_strategy steps
            --logging_steps 1
            --evaluation_strategy steps
            --eval_steps 1
            --save_strategy steps
            --save_steps 1
            --load_best_model_at_end
            --per_device_train_batch_size 1
            --per_device_eval_batch_size 1
            --num_train_epochs 1
            --report_to none
            """.split()
        args.extend(["--source_prefix", "translate English to Romanian: "])

        args.extend([f"--{dtype}"])

        ds_args = f"--deepspeed {self.test_file_dir_str}/ds_config_{stage}.json".split(
        )
        script = [
            f"{self.examples_dir_str}/pytorch/translation/run_translation.py"
        ]
        launcher = get_launcher(distributed=False)

        cmd = launcher + script + args + ds_args
        # keep for quick debug
        # print(" ".join([f"\nPYTHONPATH={self.src_dir_str}"] +cmd)); die
        with CaptureStd() as cs:
            execute_subprocess_async(cmd, env=self.get_env())
        # enough to test it didn't fail
        self.assertIn("DeepSpeed info", cs.out)