Esempio n. 1
0
def train_wrapper(config, ray_params):
    train_ray(
        path="/data/classification.parquet",
        num_workers=4,
        num_boost_rounds=100,
        num_files=64,
        regression=False,
        use_gpu=False,
        ray_params=ray_params,
        xgboost_params=config,
    )
Esempio n. 2
0
 def train():
     os.environ["TEST_OUTPUT_JSON"] = output
     train_ray(
         path="/data/classification.parquet",
         num_workers=None,
         num_boost_rounds=100,
         num_files=25,
         regression=False,
         use_gpu=False,
         ray_params=ray_params,
         xgboost_params=None,
     )
Esempio n. 3
0
    def train():
        os.environ["RXGB_PLACEMENT_GROUP_TIMEOUT_S"] = "1200"

        train_ray(
            path="/data/classification.parquet",
            num_workers=4,
            num_boost_rounds=100,
            num_files=25,
            regression=False,
            use_gpu=True,
            ray_params=ray_params,
            xgboost_params=None,
        )
Esempio n. 4
0
if __name__ == "__main__":
    ray.init(address="auto")

    ray_params = RayParams(elastic_training=False,
                           max_actor_restarts=2,
                           num_actors=4,
                           cpus_per_actor=4,
                           gpus_per_actor=1)

    start = time.time()
    train_ray(
        path="/data/classification.parquet",
        num_workers=4,
        num_boost_rounds=100,
        num_files=25,
        regression=False,
        use_gpu=True,
        ray_params=ray_params,
        xgboost_params=None,
    )
    taken = time.time() - start

    result = {
        "time_taken": taken,
    }
    test_output_json = os.environ.get("TEST_OUTPUT_JSON",
                                      "/tmp/train_gpu.json")
    with open(test_output_json, "wt") as f:
        json.dump(result, f)

    print("PASSED.")
Esempio n. 5
0
        gpus_per_actor=1,
    )

    @ray.remote
    def ray_get_parquet_files():
        return get_parquet_files(
            path="/data/classification.parquet",
            num_files=25,
        )

    start = time.time()
    train_ray(
        path=ray.get(ray_get_parquet_files.remote()),
        num_workers=4,
        num_boost_rounds=100,
        regression=False,
        use_gpu=True,
        ray_params=ray_params,
        xgboost_params=None,
    )
    taken = time.time() - start

    result = {
        "time_taken": taken,
    }
    test_output_json = os.environ.get("TEST_OUTPUT_JSON",
                                      "/tmp/train_gpu_connect.json")
    with open(test_output_json, "wt") as f:
        json.dump(result, f)

    print("PASSED.")
Esempio n. 6
0
        start_actors.append(len(_training_state.failed_actor_ranks))

        return unmocked_train(*args, _training_state=_training_state, **kwargs)

    with patch("xgboost_ray.main._train") as mocked:
        mocked.side_effect = _mock_train
        _, additional_results, _ = train_ray(
            path="/data/classification.parquet",
            num_workers=4,
            num_boost_rounds=100,
            num_files=200,
            regression=False,
            use_gpu=False,
            ray_params=ray_params,
            xgboost_params=None,
            callbacks=[
                TrackingCallback(),
                FailureInjection(id="first_fail",
                                 state=failure_state,
                                 ranks=[2],
                                 iteration=14),
                FailureInjection(id="second_fail",
                                 state=failure_state,
                                 ranks=[0],
                                 iteration=34)
            ])

    actor_1_world_size = set(additional_results["callback_returns"][1])

    if 3 not in actor_1_world_size and 3 not in world_sizes and \
       1 not in world_sizes:
        warnings.warn(