def train_wrapper(config, ray_params): train_ray( path="/data/classification.parquet", num_workers=4, num_boost_rounds=100, num_files=64, regression=False, use_gpu=False, ray_params=ray_params, xgboost_params=config, )
def train(): os.environ["TEST_OUTPUT_JSON"] = output train_ray( path="/data/classification.parquet", num_workers=None, num_boost_rounds=100, num_files=25, regression=False, use_gpu=False, ray_params=ray_params, xgboost_params=None, )
def train(): os.environ["RXGB_PLACEMENT_GROUP_TIMEOUT_S"] = "1200" train_ray( path="/data/classification.parquet", num_workers=4, num_boost_rounds=100, num_files=25, regression=False, use_gpu=True, ray_params=ray_params, xgboost_params=None, )
if __name__ == "__main__": ray.init(address="auto") ray_params = RayParams(elastic_training=False, max_actor_restarts=2, num_actors=4, cpus_per_actor=4, gpus_per_actor=1) start = time.time() train_ray( path="/data/classification.parquet", num_workers=4, num_boost_rounds=100, num_files=25, regression=False, use_gpu=True, ray_params=ray_params, xgboost_params=None, ) taken = time.time() - start result = { "time_taken": taken, } test_output_json = os.environ.get("TEST_OUTPUT_JSON", "/tmp/train_gpu.json") with open(test_output_json, "wt") as f: json.dump(result, f) print("PASSED.")
gpus_per_actor=1, ) @ray.remote def ray_get_parquet_files(): return get_parquet_files( path="/data/classification.parquet", num_files=25, ) start = time.time() train_ray( path=ray.get(ray_get_parquet_files.remote()), num_workers=4, num_boost_rounds=100, regression=False, use_gpu=True, ray_params=ray_params, xgboost_params=None, ) taken = time.time() - start result = { "time_taken": taken, } test_output_json = os.environ.get("TEST_OUTPUT_JSON", "/tmp/train_gpu_connect.json") with open(test_output_json, "wt") as f: json.dump(result, f) print("PASSED.")
start_actors.append(len(_training_state.failed_actor_ranks)) return unmocked_train(*args, _training_state=_training_state, **kwargs) with patch("xgboost_ray.main._train") as mocked: mocked.side_effect = _mock_train _, additional_results, _ = train_ray( path="/data/classification.parquet", num_workers=4, num_boost_rounds=100, num_files=200, regression=False, use_gpu=False, ray_params=ray_params, xgboost_params=None, callbacks=[ TrackingCallback(), FailureInjection(id="first_fail", state=failure_state, ranks=[2], iteration=14), FailureInjection(id="second_fail", state=failure_state, ranks=[0], iteration=34) ]) actor_1_world_size = set(additional_results["callback_returns"][1]) if 3 not in actor_1_world_size and 3 not in world_sizes and \ 1 not in world_sizes: warnings.warn(