Ejemplo n.º 1
0
def train_rl_ppo_online(num_workers: int, use_gpu: bool = False) -> Result:
    print("Starting online training")
    trainer = RLTrainer(
        run_config=RunConfig(stop={"training_iteration": 5}),
        scaling_config={
            "num_workers": num_workers,
            "use_gpu": use_gpu,
        },
        algorithm="PPO",
        config={
            "env": "CartPole-v0",
            "framework": "tf",
            "evaluation_num_workers": 1,
            "evaluation_interval": 1,
            "evaluation_config": {
                "input": "sampler"
            },
        },
    )
    # Todo (krfricke/xwjiang): Enable checkpoint config in RunConfig
    # result = trainer.fit()
    tuner = Tuner(
        trainer,
        _tuner_kwargs={"checkpoint_at_end": True},
    )
    result = tuner.fit()[0]
    return result
Ejemplo n.º 2
0
 def test_tuner_with_torch_trainer(self):
     """Test a successful run using torch trainer."""
     shutil.rmtree(
         os.path.join(DEFAULT_RESULTS_DIR, "test_tuner_torch"), ignore_errors=True
     )
     # The following two should be tunable.
     config = {"lr": 1e-2, "hidden_size": 1, "batch_size": 4, "epochs": 10}
     scaling_config = {"num_workers": 1, "use_gpu": False}
     trainer = TorchTrainer(
         train_loop_per_worker=linear_train_func,
         train_loop_config=config,
         scaling_config=scaling_config,
     )
     param_space = {
         "scaling_config": {
             "num_workers": tune.grid_search([1, 2]),
         },
         "train_loop_config": {
             "batch_size": tune.grid_search([4, 8]),
             "epochs": tune.grid_search([5, 10]),
         },
     }
     tuner = Tuner(
         trainable=trainer,
         run_config=RunConfig(name="test_tuner"),
         param_space=param_space,
         tune_config=TuneConfig(mode="min", metric="loss"),
     )
     results = tuner.fit()
     assert len(results) == 8
Ejemplo n.º 3
0
def train_rl_bc_offline(path: str,
                        num_workers: int,
                        use_gpu: bool = False) -> Result:
    print("Starting offline training")
    dataset = ray.data.read_json(path,
                                 parallelism=num_workers,
                                 ray_remote_args={"num_cpus": 1})

    trainer = RLTrainer(
        run_config=RunConfig(stop={"training_iteration": 5}),
        scaling_config={
            "num_workers": num_workers,
            "use_gpu": use_gpu,
        },
        datasets={"train": dataset},
        algorithm=BCTrainer,
        config={
            "env": "CartPole-v0",
            "framework": "tf",
            "evaluation_num_workers": 1,
            "evaluation_interval": 1,
            "evaluation_config": {
                "input": "sampler"
            },
        },
    )

    # Todo (krfricke/xwjiang): Enable checkpoint config in RunConfig
    # result = trainer.fit()
    tuner = Tuner(
        trainer,
        _tuner_kwargs={"checkpoint_at_end": True},
    )
    result = tuner.fit()[0]
    return result
Ejemplo n.º 4
0
    def __init__(
        self,
        restore_path: str = None,
        trainable: Optional[Union[str, Callable, Type[Trainable],
                                  Trainer, ]] = None,
        param_space: Optional[Dict[str, Any]] = None,
        tune_config: Optional[TuneConfig] = None,
        run_config: Optional[RunConfig] = None,
    ):
        # Restored from Tuner checkpoint.
        if restore_path:
            trainable_ckpt = os.path.join(restore_path, _TRAINABLE_PKL)
            with open(trainable_ckpt, "rb") as fp:
                trainable = pickle.load(fp)

            tuner_ckpt = os.path.join(restore_path, _TUNER_PKL)
            with open(tuner_ckpt, "rb") as fp:
                tuner = pickle.load(fp)
                self.__dict__.update(tuner.__dict__)

            self._is_restored = True
            self._trainable = trainable
            self._experiment_checkpoint_dir = restore_path
            return

        # Start from fresh
        if not trainable:
            raise TuneError("You need to provide a trainable to tune.")

        self._is_restored = False
        self._trainable = trainable
        self._tune_config = tune_config or TuneConfig()
        self._run_config = run_config or RunConfig()
        self._experiment_checkpoint_dir = self._setup_create_experiment_checkpoint_dir(
            self._run_config)

        # Not used for restored Tuner.
        self._param_space = param_space or {}
        self._process_dataset_param()

        # This needs to happen before `tune.run()` is kicked in.
        # This is because currently tune does not exit gracefully if
        # run in ray client mode - if crash happens, it just exits immediately
        # without allowing for checkpointing tuner and trainable.
        # Thus this has to happen before tune.run() so that we can have something
        # to restore from.
        tuner_ckpt = os.path.join(self._experiment_checkpoint_dir, _TUNER_PKL)
        with open(tuner_ckpt, "wb") as fp:
            pickle.dump(self, fp)

        trainable_ckpt = os.path.join(self._experiment_checkpoint_dir,
                                      _TRAINABLE_PKL)
        with open(trainable_ckpt, "wb") as fp:
            pickle.dump(self._trainable, fp)
Ejemplo n.º 5
0
    def __init__(
        self,
        scaling_config: Optional[ScalingConfig] = None,
        run_config: Optional[RunConfig] = None,
        datasets: Optional[Dict[str, GenDataset]] = None,
        preprocessor: Optional[Preprocessor] = None,
        resume_from_checkpoint: Optional[Checkpoint] = None,
    ):

        self.scaling_config = scaling_config if scaling_config else {}
        self.run_config = run_config if run_config else RunConfig()
        self.datasets = datasets if datasets else {}
        self.preprocessor = preprocessor
        self.resume_from_checkpoint = resume_from_checkpoint
Ejemplo n.º 6
0
def generate_offline_data(path: str):
    print(f"Generating offline data for training at {path}")
    trainer = RLTrainer(
        algorithm="PPO",
        run_config=RunConfig(stop={"timesteps_total": 5000}),
        config={
            "env": "CartPole-v0",
            "output": "dataset",
            "output_config": {
                "format": "json",
                "path": path,
                "max_num_samples_per_file": 1,
            },
            "batch_mode": "complete_episodes",
        },
    )
    trainer.fit()
Ejemplo n.º 7
0
 def test_tuner_trainer_fail(self):
     trainer = DummyTrainer()
     param_space = {
         "scaling_config": {
             "num_workers": tune.grid_search([1, 2]),
         }
     }
     tuner = Tuner(
         trainable=trainer,
         run_config=RunConfig(name="test_tuner_trainer_fail"),
         param_space=param_space,
         tune_config=TuneConfig(mode="max", metric="iteration"),
     )
     results = tuner.fit()
     assert len(results) == 2
     for i in range(2):
         assert results[i].error
Ejemplo n.º 8
0
 def test_tuner_with_xgboost_trainer(self):
     """Test a successful run."""
     shutil.rmtree(os.path.join(DEFAULT_RESULTS_DIR, "test_tuner"),
                   ignore_errors=True)
     trainer = XGBoostTrainer(
         label_column="target",
         params={},
         # TODO(xwjiang): change when dataset out-of-band ser/des is landed.
         datasets={"train": gen_dataset_func_eager()},
     )
     # prep_v1 = StandardScaler(["worst radius", "worst area"])
     # prep_v2 = StandardScaler(["worst concavity", "worst smoothness"])
     param_space = {
         "scaling_config": {
             "num_workers": tune.grid_search([1, 2]),
         },
         # TODO(xwjiang): Add when https://github.com/ray-project/ray/issues/23363
         #  is resolved.
         # "preprocessor": tune.grid_search([prep_v1, prep_v2]),
         # "datasets": {
         #     "train": tune.choice(
         #         [gen_dataset_func(), gen_dataset_func(do_shuffle=True)]
         #     ),
         # },
         "params": {
             "objective": "binary:logistic",
             "tree_method": "approx",
             "eval_metric": ["logloss", "error"],
             "eta": tune.loguniform(1e-4, 1e-1),
             "subsample": tune.uniform(0.5, 1.0),
             "max_depth": tune.randint(1, 9),
         },
     }
     tuner = Tuner(
         trainable=trainer,
         run_config=RunConfig(name="test_tuner"),
         param_space=param_space,
         tune_config=TuneConfig(mode="min", metric="train-error"),
     )
     results = tuner.fit()
     assert not isinstance(results.get_best_result().checkpoint,
                           TrialCheckpoint)
     assert len(results) == 2
Ejemplo n.º 9
0
 def test_tuner_with_xgboost_trainer(self):
     """Test a successful run."""
     shutil.rmtree(
         os.path.join(DEFAULT_RESULTS_DIR, "test_tuner"), ignore_errors=True
     )
     trainer = XGBoostTrainer(
         label_column="target",
         params={},
         datasets={"train": gen_dataset_func_eager()},
     )
     # prep_v1 = StandardScaler(["worst radius", "worst area"])
     # prep_v2 = StandardScaler(["worst concavity", "worst smoothness"])
     param_space = {
         "scaling_config": {
             "num_workers": tune.grid_search([1, 2]),
         },
         # "preprocessor": tune.grid_search([prep_v1, prep_v2]),
         "datasets": {
             "train": tune.grid_search(
                 [gen_dataset_func(), gen_dataset_func(do_shuffle=True)]
             ),
         },
         "params": {
             "objective": "binary:logistic",
             "tree_method": "approx",
             "eval_metric": ["logloss", "error"],
             "eta": tune.loguniform(1e-4, 1e-1),
             "subsample": tune.uniform(0.5, 1.0),
             "max_depth": tune.randint(1, 9),
         },
     }
     tuner = Tuner(
         trainable=trainer,
         run_config=RunConfig(name="test_tuner"),
         param_space=param_space,
         tune_config=TuneConfig(mode="min", metric="train-error"),
         # limiting the number of trials running at one time.
         # As the unit test only has access to 4 CPUs on Buildkite.
         _tuner_kwargs={"max_concurrent_trials": 1},
     )
     results = tuner.fit()
     assert not isinstance(results.get_best_result().checkpoint, TrialCheckpoint)
     assert len(results) == 4
Ejemplo n.º 10
0
def train_rl_ppo_online(num_workers: int, use_gpu: bool = False) -> Result:
    print("Starting online training")
    trainer = RLTrainer(
        run_config=RunConfig(stop={"training_iteration": 5}),
        scaling_config={
            "num_workers": num_workers,
            "use_gpu": use_gpu,
        },
        algorithm="PPO",
        config={
            "env": "CartPole-v0",
            "framework": "tf",
            "evaluation_num_workers": 1,
            "evaluation_interval": 1,
            "evaluation_config": {"input": "sampler"},
        },
    )
    result = trainer.fit()

    return result
Ejemplo n.º 11
0
    def test_tuner_trainer_fail(self):
        class DummyTrainer(Trainer):
            def training_loop(self) -> None:
                raise RuntimeError("There is an error in trainer!")

        trainer = DummyTrainer()
        param_space = {
            "scaling_config": {
                "num_workers": tune.grid_search([1, 2]),
            }
        }
        tuner = Tuner(
            trainable=trainer,
            run_config=RunConfig(name="test_tuner_trainer_fail"),
            param_space=param_space,
            tune_config=TuneConfig(mode="max", metric="iteration"),
        )
        results = tuner.fit()
        assert len(results) == 2
        for i in range(2):
            assert results[i].error
Ejemplo n.º 12
0
 def test_tuner_with_torch_trainer(self):
     """Test a successful run using torch trainer."""
     shutil.rmtree(
         os.path.join(DEFAULT_RESULTS_DIR, "test_tuner_torch"), ignore_errors=True
     )
     # The following two should be tunable.
     config = {"lr": 1e-2, "hidden_size": 1, "batch_size": 4, "epochs": 10}
     scaling_config = {"num_workers": 1, "use_gpu": False}
     trainer = TorchTrainer(
         train_loop_per_worker=linear_train_func,
         train_loop_config=config,
         scaling_config=scaling_config,
     )
     # prep_v1 = StandardScaler(["worst radius", "worst area"])
     # prep_v2 = StandardScaler(["worst concavity", "worst smoothness"])
     param_space = {
         "scaling_config": {
             "num_workers": tune.grid_search([1, 2]),
         },
         # TODO(xwjiang): Add when https://github.com/ray-project/ray/issues/23363
         #  is resolved.
         # "preprocessor": tune.grid_search([prep_v1, prep_v2]),
         # "datasets": {
         #     "train": tune.choice(
         #         [gen_dataset_func(), gen_dataset_func(do_shuffle=True)]
         #     ),
         # },
         "train_loop_config": {
             "batch_size": tune.grid_search([4, 8]),
             "epochs": tune.grid_search([5, 10]),
         },
     }
     tuner = Tuner(
         trainable=trainer,
         run_config=RunConfig(name="test_tuner"),
         param_space=param_space,
         tune_config=TuneConfig(mode="min", metric="loss"),
     )
     results = tuner.fit()
     assert len(results) == 8
Ejemplo n.º 13
0
    def test_tuner_run_config_override(self):
        trainer = DummyTrainer(run_config=RunConfig(stop={"metric": 4}))
        tuner = Tuner(trainer)

        assert tuner._local_tuner._run_config.stop == {"metric": 4}
Ejemplo n.º 14
0
    def test_tuner_with_xgboost_trainer_driver_fail_and_resume(self):
        # So that we have some global checkpointing happening.
        os.environ["TUNE_GLOBAL_CHECKPOINT_S"] = "1"
        shutil.rmtree(
            os.path.join(DEFAULT_RESULTS_DIR, "test_tuner_driver_fail"),
            ignore_errors=True,
        )
        trainer = XGBoostTrainer(
            label_column="target",
            params={},
            # TODO(xwjiang): change when dataset out-of-band ser/des is landed.
            datasets={"train": gen_dataset_func_eager()},
        )
        # prep_v1 = StandardScaler(["worst radius", "worst area"])
        # prep_v2 = StandardScaler(["worst concavity", "worst smoothness"])
        param_space = {
            "scaling_config": {
                "num_workers": tune.grid_search([1, 2]),
            },
            # TODO(xwjiang): Add when https://github.com/ray-project/ray/issues/23363
            #  is resolved.
            # "preprocessor": tune.grid_search([prep_v1, prep_v2]),
            # "datasets": {
            #     "train": tune.choice(
            #         [gen_dataset_func(), gen_dataset_func(do_shuffle=True)]
            #     ),
            # },
            "params": {
                "objective": "binary:logistic",
                "tree_method": "approx",
                "eval_metric": ["logloss", "error"],
                "eta": tune.loguniform(1e-4, 1e-1),
                "subsample": tune.uniform(0.5, 1.0),
                "max_depth": tune.randint(1, 9),
            },
        }

        class FailureInjectionCallback(Callback):
            """Inject failure at the configured iteration number."""
            def __init__(self, num_iters=10):
                self.num_iters = num_iters

            def on_step_end(self, iteration, trials, **kwargs):
                if iteration == self.num_iters:
                    print(f"Failing after {self.num_iters} iters.")
                    raise RuntimeError

        tuner = Tuner(
            trainable=trainer,
            run_config=RunConfig(name="test_tuner_driver_fail",
                                 callbacks=[FailureInjectionCallback()]),
            param_space=param_space,
            tune_config=TuneConfig(mode="min", metric="train-error"),
        )
        with self.assertRaises(TuneError):
            tuner.fit()

        # Test resume
        restore_path = os.path.join(DEFAULT_RESULTS_DIR,
                                    "test_tuner_driver_fail")
        tuner = Tuner.restore(restore_path)
        # A hack before we figure out RunConfig semantics across resumes.
        tuner._local_tuner._run_config.callbacks = None
        results = tuner.fit()
        assert len(results) == 2