Beispiel #1
0
def test_checkpoint_freq(ray_start_4_cpus, freq_end_expected):
    freq, end, expected = freq_end_expected

    train_dataset = ray.data.from_pandas(train_df)
    valid_dataset = ray.data.from_pandas(test_df)
    trainer = XGBoostTrainer(
        run_config=ray.air.RunConfig(
            checkpoint_config=ray.air.CheckpointConfig(
                checkpoint_frequency=freq, checkpoint_at_end=end)),
        scaling_config=scale_config,
        label_column="target",
        params=params,
        num_boost_round=25,
        datasets={
            TRAIN_DATASET_KEY: train_dataset,
            "valid": valid_dataset
        },
    )
    result = trainer.fit()

    # Assert number of checkpoints
    assert len(result.best_checkpoints) == expected, str([
        (metrics["training_iteration"], _cp._local_path)
        for _cp, metrics in result.best_checkpoints
    ])

    # Assert checkpoint numbers are increasing
    cp_paths = [cp._local_path for cp, _ in result.best_checkpoints]
    assert cp_paths == sorted(cp_paths), str(cp_paths)
Beispiel #2
0
def test_tune(ray_start_4_cpus):
    train_dataset = ray.data.from_pandas(train_df)
    valid_dataset = ray.data.from_pandas(test_df)
    trainer = XGBoostTrainer(
        scaling_config=scale_config,
        label_column="target",
        params={
            **params,
            **{
                "max_depth": 1
            }
        },
        datasets={
            TRAIN_DATASET_KEY: train_dataset,
            "valid": valid_dataset
        },
    )

    tune.run(
        trainer.as_trainable(),
        config={"params": {
            "max_depth": tune.randint(2, 4)
        }},
        num_samples=2,
    )

    # Make sure original Trainer is not affected.
    assert trainer.params["max_depth"] == 1
def test_preprocessor_in_checkpoint(ray_start_4_cpus, tmpdir):
    train_dataset = ray.data.from_pandas(train_df)
    valid_dataset = ray.data.from_pandas(test_df)

    class DummyPreprocessor(Preprocessor):
        def __init__(self):
            super().__init__()
            self.is_same = True

        def fit(self, dataset):
            self.fitted_ = True

        def _transform_pandas(self, df: "pd.DataFrame") -> "pd.DataFrame":
            return df

    trainer = XGBoostTrainer(
        scaling_config=scale_config,
        label_column="target",
        params=params,
        datasets={TRAIN_DATASET_KEY: train_dataset, "valid": valid_dataset},
        preprocessor=DummyPreprocessor(),
    )
    result = trainer.fit()

    # Move checkpoint to a different directory.
    checkpoint_dict = result.checkpoint.to_dict()
    checkpoint = Checkpoint.from_dict(checkpoint_dict)
    checkpoint_path = checkpoint.to_directory(tmpdir)
    resume_from = Checkpoint.from_directory(checkpoint_path)

    model, preprocessor = load_checkpoint(resume_from)
    assert get_num_trees(model) == 10
    assert preprocessor.is_same
    assert preprocessor.fitted_
def test_fit(ray_start_4_cpus):
    train_dataset = ray.data.from_pandas(train_df)
    valid_dataset = ray.data.from_pandas(test_df)
    trainer = XGBoostTrainer(
        scaling_config=scale_config,
        label_column="target",
        params=params,
        datasets={TRAIN_DATASET_KEY: train_dataset, "valid": valid_dataset},
    )
    trainer.fit()
def test_validation(ray_start_4_cpus):
    train_dataset = ray.data.from_pandas(train_df)
    valid_dataset = ray.data.from_pandas(test_df)
    with pytest.raises(KeyError, match=TRAIN_DATASET_KEY):
        XGBoostTrainer(
            scaling_config={"num_workers": 2},
            label_column="target",
            params=params,
            datasets={"valid": valid_dataset},
        )
    with pytest.raises(KeyError, match="dmatrix_params"):
        XGBoostTrainer(
            scaling_config={"num_workers": 2},
            label_column="target",
            params=params,
            dmatrix_params={"data": {}},
            datasets={TRAIN_DATASET_KEY: train_dataset, "valid": valid_dataset},
        )
Beispiel #6
0
def run_xgboost_training(data_path: str, num_workers: int):
    ds = data.read_parquet(data_path)
    params = {
        "objective": "binary:logistic",
        "eval_metric": ["logloss", "error"],
    }

    trainer = XGBoostTrainer(
        scaling_config=ScalingConfig(
            num_workers=num_workers,
            resources_per_worker={"CPU": 12},
        ),
        label_column="labels",
        params=params,
        datasets={"train": ds},
    )
    result = trainer.fit()
    checkpoint = XGBoostCheckpoint.from_checkpoint(result.checkpoint)
    xgboost_model = checkpoint.get_model()
    xgboost_model.save_model(_XGB_MODEL_PATH)
    ray.shutdown()
def test_resume_from_checkpoint(ray_start_4_cpus, tmpdir):
    train_dataset = ray.data.from_pandas(train_df)
    valid_dataset = ray.data.from_pandas(test_df)
    trainer = XGBoostTrainer(
        scaling_config=scale_config,
        label_column="target",
        params=params,
        num_boost_round=5,
        datasets={TRAIN_DATASET_KEY: train_dataset, "valid": valid_dataset},
    )
    result = trainer.fit()
    checkpoint = result.checkpoint
    xgb_model, _ = load_checkpoint(checkpoint)
    assert get_num_trees(xgb_model) == 5

    # Move checkpoint to a different directory.
    checkpoint_dict = result.checkpoint.to_dict()
    checkpoint = Checkpoint.from_dict(checkpoint_dict)
    checkpoint_path = checkpoint.to_directory(tmpdir)
    resume_from = Checkpoint.from_directory(checkpoint_path)

    trainer = XGBoostTrainer(
        scaling_config=scale_config,
        label_column="target",
        params=params,
        num_boost_round=5,
        datasets={TRAIN_DATASET_KEY: train_dataset, "valid": valid_dataset},
        resume_from_checkpoint=resume_from,
    )
    result = trainer.fit()
    checkpoint = result.checkpoint
    model, _ = load_checkpoint(checkpoint)
    assert get_num_trees(model) == 10
Beispiel #8
0
 def test_tuner_with_xgboost_trainer(self):
     """Test a successful run."""
     shutil.rmtree(os.path.join(DEFAULT_RESULTS_DIR, "test_tuner"),
                   ignore_errors=True)
     trainer = XGBoostTrainer(
         label_column="target",
         params={},
         datasets={"train": gen_dataset_func_eager()},
     )
     # prep_v1 = StandardScaler(["worst radius", "worst area"])
     # prep_v2 = StandardScaler(["worst concavity", "worst smoothness"])
     param_space = {
         "scaling_config": {
             "num_workers": tune.grid_search([1, 2]),
         },
         # "preprocessor": tune.grid_search([prep_v1, prep_v2]),
         "datasets": {
             "train":
             tune.grid_search(
                 [gen_dataset_func(),
                  gen_dataset_func(do_shuffle=True)]),
         },
         "params": {
             "objective": "binary:logistic",
             "tree_method": "approx",
             "eval_metric": ["logloss", "error"],
             "eta": tune.loguniform(1e-4, 1e-1),
             "subsample": tune.uniform(0.5, 1.0),
             "max_depth": tune.randint(1, 9),
         },
     }
     tuner = Tuner(
         trainable=trainer,
         run_config=RunConfig(name="test_tuner"),
         param_space=param_space,
         tune_config=TuneConfig(mode="min", metric="train-error"),
         # limiting the number of trials running at one time.
         # As the unit test only has access to 4 CPUs on Buildkite.
         _tuner_kwargs={"max_concurrent_trials": 1},
     )
     results = tuner.fit()
     assert not isinstance(results.get_best_result().checkpoint,
                           TrialCheckpoint)
     assert len(results) == 4
Beispiel #9
0
params = {
    "tree_method": "approx",
    "objective": "binary:logistic",
    "eval_metric": ["logloss", "error"],
}

num_workers = 2
use_gpu = False  # use GPUs if detected.

trainer = XGBoostTrainer(
    scaling_config={
        "num_workers": num_workers,
        "use_gpu": use_gpu,
    },
    label_column="target",
    params=params,
    datasets={
        "train": train_dataset,
        "valid": valid_dataset
    },
    preprocessor=preprocessor,
    num_boost_round=20,
)
result = trainer.fit()
print(result.metrics)
# __air_xgb_train_end__

# __air_xgb_batchpred_start__
from ray.train.batch_predictor import BatchPredictor
from ray.train.xgboost import XGBoostPredictor

batch_predictor = BatchPredictor.from_checkpoint(result.checkpoint,
Beispiel #10
0
train_dataset = ray.data.from_items([{
    "x": x,
    "y": 2 * x
} for x in range(0, 32, 3)])
valid_dataset = ray.data.from_items([{
    "x": x,
    "y": 2 * x
} for x in range(1, 32, 3)])

preprocessor = MinMaxScaler(["x"])

trainer = XGBoostTrainer(
    label_column="y",
    params={"objective": "reg:squarederror"},
    scaling_config=ScalingConfig(num_workers=2),
    datasets={
        "train": train_dataset,
        "valid": valid_dataset
    },
    preprocessor=preprocessor,
)
result = trainer.fit()
# __trainer_end__

# __checkpoint_start__
import os
import ray.cloudpickle as cpickle
from ray.air.constants import PREPROCESSOR_KEY

checkpoint = result.checkpoint
with checkpoint.as_directory() as checkpoint_path:
    path = os.path.join(checkpoint_path, PREPROCESSOR_KEY)
Beispiel #11
0
    def test_tuner_with_xgboost_trainer_driver_fail_and_resume(self):
        # So that we have some global checkpointing happening.
        os.environ["TUNE_GLOBAL_CHECKPOINT_S"] = "1"
        shutil.rmtree(
            os.path.join(DEFAULT_RESULTS_DIR, "test_tuner_driver_fail"),
            ignore_errors=True,
        )
        trainer = XGBoostTrainer(
            label_column="target",
            params={},
            datasets={"train": gen_dataset_func_eager()},
        )
        # prep_v1 = StandardScaler(["worst radius", "worst area"])
        # prep_v2 = StandardScaler(["worst concavity", "worst smoothness"])
        param_space = {
            "scaling_config": {
                "num_workers": tune.grid_search([1, 2]),
            },
            # "preprocessor": tune.grid_search([prep_v1, prep_v2]),
            "datasets": {
                "train":
                tune.grid_search(
                    [gen_dataset_func(),
                     gen_dataset_func(do_shuffle=True)]),
            },
            "params": {
                "objective": "binary:logistic",
                "tree_method": "approx",
                "eval_metric": ["logloss", "error"],
                "eta": tune.loguniform(1e-4, 1e-1),
                "subsample": tune.uniform(0.5, 1.0),
                "max_depth": tune.randint(1, 9),
            },
        }

        class FailureInjectionCallback(Callback):
            """Inject failure at the configured iteration number."""
            def __init__(self, num_iters=10):
                self.num_iters = num_iters

            def on_step_end(self, iteration, trials, **kwargs):
                if iteration == self.num_iters:
                    print(f"Failing after {self.num_iters} iters.")
                    raise RuntimeError

        tuner = Tuner(
            trainable=trainer,
            run_config=RunConfig(name="test_tuner_driver_fail",
                                 callbacks=[FailureInjectionCallback()]),
            param_space=param_space,
            tune_config=TuneConfig(mode="min", metric="train-error"),
            # limiting the number of trials running at one time.
            # As the unit test only has access to 4 CPUs on Buildkite.
            _tuner_kwargs={"max_concurrent_trials": 1},
        )
        with self.assertRaises(TuneError):
            tuner.fit()

        # Test resume
        restore_path = os.path.join(DEFAULT_RESULTS_DIR,
                                    "test_tuner_driver_fail")
        tuner = Tuner.restore(restore_path)
        # A hack before we figure out RunConfig semantics across resumes.
        tuner._local_tuner._run_config.callbacks = None
        results = tuner.fit()
        assert len(results) == 4
Beispiel #12
0
# Split data into train and validation.
dataset = ray.data.read_csv(
    "s3://anonymous@air-example-data/breast_cancer.csv")
train_dataset, valid_dataset = train_test_split(dataset, test_size=0.3)
test_dataset = valid_dataset.drop_columns(["target"])

columns_to_scale = ["mean radius", "mean texture"]
preprocessor = StandardScaler(columns=columns_to_scale)

trainer = XGBoostTrainer(
    label_column="target",
    num_boost_round=20,
    scaling_config=ScalingConfig(num_workers=2),
    params={
        "objective": "binary:logistic",
        "eval_metric": ["logloss", "error"],
    },
    datasets={"train": train_dataset},
    preprocessor=preprocessor,
)
result = trainer.fit()

# You can also create a checkpoint from a trained model using
# `XGBoostCheckpoint.from_model`.

# import xgboost as xgb
# from ray.train.xgboost import XGBoostCheckpoint
# model = xgb.Booster()
# model.load_model(...)
# checkpoint = XGBoostCheckpoint.from_model(model, path=".")
Beispiel #13
0
# __air_generic_preprocess_end__

# __air_xgb_train_start__
from ray.train.xgboost import XGBoostTrainer
from ray.air.config import ScalingConfig

trainer = XGBoostTrainer(
    scaling_config=ScalingConfig(
        # Number of workers to use for data parallelism.
        num_workers=2,
        # Whether to use GPU acceleration.
        use_gpu=False,
    ),
    label_column="target",
    num_boost_round=20,
    params={
        # XGBoost specific params
        "objective": "binary:logistic",
        "eval_metric": ["logloss", "error"],
    },
    datasets={
        "train": train_dataset,
        "valid": valid_dataset
    },
    preprocessor=preprocessor,
)
result = trainer.fit()
print(result.metrics)
# __air_xgb_train_end__

# __air_xgb_tuner_start__
from ray import tune
Beispiel #14
0
use_gpu = False
# XGBoost specific params
params = {
    "tree_method": "approx",
    "objective": "binary:logistic",
    "eval_metric": ["logloss", "error"],
    "max_depth": 2,
}

trainer = XGBoostTrainer(
    scaling_config=ScalingConfig(
        num_workers=num_workers,
        use_gpu=use_gpu,
    ),
    label_column="target",
    params=params,
    datasets={
        "train": train_dataset,
        "valid": valid_dataset
    },
    preprocessor=preprocessor,
    num_boost_round=5,
)

result = trainer.fit()
# __air_trainer_end__

# __air_trainer_output_start__
print(result.metrics)
print(result.checkpoint)
# __air_trainer_output_end__