def test_preprocessor_in_checkpoint(ray_start_4_cpus, tmpdir): train_dataset = ray.data.from_pandas(train_df) valid_dataset = ray.data.from_pandas(test_df) class DummyPreprocessor(Preprocessor): def __init__(self): super().__init__() self.is_same = True def fit(self, dataset): self.fitted_ = True def _transform_pandas(self, df: "pd.DataFrame") -> "pd.DataFrame": return df trainer = XGBoostTrainer( scaling_config=scale_config, label_column="target", params=params, datasets={TRAIN_DATASET_KEY: train_dataset, "valid": valid_dataset}, preprocessor=DummyPreprocessor(), ) result = trainer.fit() # Move checkpoint to a different directory. checkpoint_dict = result.checkpoint.to_dict() checkpoint = Checkpoint.from_dict(checkpoint_dict) checkpoint_path = checkpoint.to_directory(tmpdir) resume_from = Checkpoint.from_directory(checkpoint_path) model, preprocessor = load_checkpoint(resume_from) assert get_num_trees(model) == 10 assert preprocessor.is_same assert preprocessor.fitted_
def test_tune(ray_start_4_cpus): train_dataset = ray.data.from_pandas(train_df) valid_dataset = ray.data.from_pandas(test_df) trainer = XGBoostTrainer( scaling_config=scale_config, label_column="target", params={ **params, **{ "max_depth": 1 } }, datasets={ TRAIN_DATASET_KEY: train_dataset, "valid": valid_dataset }, ) tune.run( trainer.as_trainable(), config={"params": { "max_depth": tune.randint(2, 4) }}, num_samples=2, ) # Make sure original Trainer is not affected. assert trainer.params["max_depth"] == 1
def train_xgboost(num_workers: int, use_gpu: bool = False) -> Result: train_dataset, valid_dataset, _ = prepare_data() # Scale some random columns columns_to_scale = ["mean radius", "mean texture"] preprocessor = StandardScaler(columns=columns_to_scale) # XGBoost specific params params = { "tree_method": "approx", "objective": "binary:logistic", "eval_metric": ["logloss", "error"], } trainer = XGBoostTrainer( scaling_config={ "num_workers": num_workers, "use_gpu": use_gpu, }, label_column="target", params=params, datasets={ "train": train_dataset, "valid": valid_dataset }, preprocessor=preprocessor, num_boost_round=100, ) result = trainer.fit() print(result.metrics) return result
def test_fit(ray_start_4_cpus): train_dataset = ray.data.from_pandas(train_df) valid_dataset = ray.data.from_pandas(test_df) trainer = XGBoostTrainer( scaling_config=scale_config, label_column="target", params=params, datasets={TRAIN_DATASET_KEY: train_dataset, "valid": valid_dataset}, ) trainer.fit()
def test_validation(ray_start_4_cpus): train_dataset = ray.data.from_pandas(train_df) valid_dataset = ray.data.from_pandas(test_df) with pytest.raises(KeyError, match=TRAIN_DATASET_KEY): XGBoostTrainer( scaling_config={"num_workers": 2}, label_column="target", params=params, datasets={"valid": valid_dataset}, ) with pytest.raises(KeyError, match="dmatrix_params"): XGBoostTrainer( scaling_config={"num_workers": 2}, label_column="target", params=params, dmatrix_params={"data": {}}, datasets={TRAIN_DATASET_KEY: train_dataset, "valid": valid_dataset}, )
def train_model(train_dataset: ray.data.Dataset, wandb_project: str) -> Result: """Train a simple XGBoost model and return the result.""" trainer = XGBoostTrainer( scaling_config={"num_workers": 2}, params={"tree_method": "auto"}, label_column="target", datasets={"train": train_dataset}, num_boost_round=10, run_config=RunConfig(callbacks=[ # This is the part needed to enable logging to Weights & Biases. # It assumes you've logged in before, e.g. with `wandb login`. WandbLoggerCallback( project=wandb_project, save_checkpoints=True, ) ]), ) result = trainer.fit() return result
def train_model(train_dataset: ray.data.Dataset, comet_project: str) -> Result: """Train a simple XGBoost model and return the result.""" trainer = XGBoostTrainer( scaling_config={"num_workers": 2}, params={"tree_method": "auto"}, label_column="target", datasets={"train": train_dataset}, num_boost_round=10, run_config=RunConfig(callbacks=[ # This is the part needed to enable logging to Comet ML. # It assumes Comet ML can find a valid API (e.g. by setting # the ``COMET_API_KEY`` environment variable). CometLoggerCallback( project_name=comet_project, save_checkpoints=True, ) ]), ) result = trainer.fit() return result
def test_resume_from_checkpoint(ray_start_4_cpus, tmpdir): train_dataset = ray.data.from_pandas(train_df) valid_dataset = ray.data.from_pandas(test_df) trainer = XGBoostTrainer( scaling_config=scale_config, label_column="target", params=params, num_boost_round=5, datasets={TRAIN_DATASET_KEY: train_dataset, "valid": valid_dataset}, ) result = trainer.fit() checkpoint = result.checkpoint xgb_model, _ = load_checkpoint(checkpoint) assert get_num_trees(xgb_model) == 5 # Move checkpoint to a different directory. checkpoint_dict = result.checkpoint.to_dict() checkpoint = Checkpoint.from_dict(checkpoint_dict) checkpoint_path = checkpoint.to_directory(tmpdir) resume_from = Checkpoint.from_directory(checkpoint_path) trainer = XGBoostTrainer( scaling_config=scale_config, label_column="target", params=params, num_boost_round=5, datasets={TRAIN_DATASET_KEY: train_dataset, "valid": valid_dataset}, resume_from_checkpoint=resume_from, ) result = trainer.fit() checkpoint = result.checkpoint model, _ = load_checkpoint(checkpoint) assert get_num_trees(model) == 10
def test_tuner_with_xgboost_trainer(self): """Test a successful run.""" shutil.rmtree(os.path.join(DEFAULT_RESULTS_DIR, "test_tuner"), ignore_errors=True) trainer = XGBoostTrainer( label_column="target", params={}, # TODO(xwjiang): change when dataset out-of-band ser/des is landed. datasets={"train": gen_dataset_func_eager()}, ) # prep_v1 = StandardScaler(["worst radius", "worst area"]) # prep_v2 = StandardScaler(["worst concavity", "worst smoothness"]) param_space = { "scaling_config": { "num_workers": tune.grid_search([1, 2]), }, # TODO(xwjiang): Add when https://github.com/ray-project/ray/issues/23363 # is resolved. # "preprocessor": tune.grid_search([prep_v1, prep_v2]), # "datasets": { # "train": tune.choice( # [gen_dataset_func(), gen_dataset_func(do_shuffle=True)] # ), # }, "params": { "objective": "binary:logistic", "tree_method": "approx", "eval_metric": ["logloss", "error"], "eta": tune.loguniform(1e-4, 1e-1), "subsample": tune.uniform(0.5, 1.0), "max_depth": tune.randint(1, 9), }, } tuner = Tuner( trainable=trainer, run_config=RunConfig(name="test_tuner"), param_space=param_space, tune_config=TuneConfig(mode="min", metric="train-error"), ) results = tuner.fit() assert not isinstance(results.get_best_result().checkpoint, TrialCheckpoint) assert len(results) == 2
def test_tuner_with_xgboost_trainer(self): """Test a successful run.""" shutil.rmtree( os.path.join(DEFAULT_RESULTS_DIR, "test_tuner"), ignore_errors=True ) trainer = XGBoostTrainer( label_column="target", params={}, datasets={"train": gen_dataset_func_eager()}, ) # prep_v1 = StandardScaler(["worst radius", "worst area"]) # prep_v2 = StandardScaler(["worst concavity", "worst smoothness"]) param_space = { "scaling_config": { "num_workers": tune.grid_search([1, 2]), }, # "preprocessor": tune.grid_search([prep_v1, prep_v2]), "datasets": { "train": tune.grid_search( [gen_dataset_func(), gen_dataset_func(do_shuffle=True)] ), }, "params": { "objective": "binary:logistic", "tree_method": "approx", "eval_metric": ["logloss", "error"], "eta": tune.loguniform(1e-4, 1e-1), "subsample": tune.uniform(0.5, 1.0), "max_depth": tune.randint(1, 9), }, } tuner = Tuner( trainable=trainer, run_config=RunConfig(name="test_tuner"), param_space=param_space, tune_config=TuneConfig(mode="min", metric="train-error"), # limiting the number of trials running at one time. # As the unit test only has access to 4 CPUs on Buildkite. _tuner_kwargs={"max_concurrent_trials": 1}, ) results = tuner.fit() assert not isinstance(results.get_best_result().checkpoint, TrialCheckpoint) assert len(results) == 4
params = { "tree_method": "approx", "objective": "binary:logistic", "eval_metric": ["logloss", "error"], } num_workers = 2 use_gpu = False # use GPUs if detected. trainer = XGBoostTrainer( scaling_config={ "num_workers": num_workers, "use_gpu": use_gpu, }, label_column="target", params=params, datasets={ "train": train_dataset, "valid": valid_dataset }, preprocessor=preprocessor, num_boost_round=20, ) result = trainer.fit() print(result.metrics) # __air_xgb_train_end__ # __air_xgb_batchpred_start__ from ray.ml.batch_predictor import BatchPredictor from ray.ml.predictors.integrations.xgboost import XGBoostPredictor batch_predictor = BatchPredictor.from_checkpoint(result.checkpoint,
def test_tuner_with_xgboost_trainer_driver_fail_and_resume(self): # So that we have some global checkpointing happening. os.environ["TUNE_GLOBAL_CHECKPOINT_S"] = "1" shutil.rmtree( os.path.join(DEFAULT_RESULTS_DIR, "test_tuner_driver_fail"), ignore_errors=True, ) trainer = XGBoostTrainer( label_column="target", params={}, # TODO(xwjiang): change when dataset out-of-band ser/des is landed. datasets={"train": gen_dataset_func_eager()}, ) # prep_v1 = StandardScaler(["worst radius", "worst area"]) # prep_v2 = StandardScaler(["worst concavity", "worst smoothness"]) param_space = { "scaling_config": { "num_workers": tune.grid_search([1, 2]), }, # TODO(xwjiang): Add when https://github.com/ray-project/ray/issues/23363 # is resolved. # "preprocessor": tune.grid_search([prep_v1, prep_v2]), # "datasets": { # "train": tune.choice( # [gen_dataset_func(), gen_dataset_func(do_shuffle=True)] # ), # }, "params": { "objective": "binary:logistic", "tree_method": "approx", "eval_metric": ["logloss", "error"], "eta": tune.loguniform(1e-4, 1e-1), "subsample": tune.uniform(0.5, 1.0), "max_depth": tune.randint(1, 9), }, } class FailureInjectionCallback(Callback): """Inject failure at the configured iteration number.""" def __init__(self, num_iters=10): self.num_iters = num_iters def on_step_end(self, iteration, trials, **kwargs): if iteration == self.num_iters: print(f"Failing after {self.num_iters} iters.") raise RuntimeError tuner = Tuner( trainable=trainer, run_config=RunConfig(name="test_tuner_driver_fail", callbacks=[FailureInjectionCallback()]), param_space=param_space, tune_config=TuneConfig(mode="min", metric="train-error"), ) with self.assertRaises(TuneError): tuner.fit() # Test resume restore_path = os.path.join(DEFAULT_RESULTS_DIR, "test_tuner_driver_fail") tuner = Tuner.restore(restore_path) # A hack before we figure out RunConfig semantics across resumes. tuner._local_tuner._run_config.callbacks = None results = tuner.fit() assert len(results) == 2