def run_hyperopt_executor( sampler, executor, csv_filename, ray_mock_dir, validate_output_feature=False, validation_metric=None, ): with ray_start_4_cpus(): config = _get_config(sampler, executor) csv_filename = os.path.join(ray_mock_dir, "dataset.csv") dataset_csv = generate_data(config["input_features"], config["output_features"], csv_filename, num_examples=100) dataset_parquet = create_data_set_to_use("parquet", dataset_csv) config = merge_with_defaults(config) hyperopt_config = config["hyperopt"] if validate_output_feature: hyperopt_config["output_feature"] = config["output_features"][0]["name"] if validation_metric: hyperopt_config["validation_metric"] = validation_metric update_hyperopt_params_with_defaults(hyperopt_config) parameters = hyperopt_config["parameters"] if sampler.get("search_alg", {}).get("type", "") == "bohb": # bohb does not support grid_search search space del parameters["combiner.num_steps"] split = hyperopt_config["split"] output_feature = hyperopt_config["output_feature"] metric = hyperopt_config["metric"] goal = hyperopt_config["goal"] hyperopt_sampler = get_build_hyperopt_sampler(sampler["type"])(goal, parameters, **sampler) # preprocess backend = RayBackend(**RAY_BACKEND_KWARGS) model = LudwigModel(config=config, backend=backend) training_set, validation_set, test_set, training_set_metadata = model.preprocess( dataset=dataset_parquet, ) # hyperopt hyperopt_executor = MockRayTuneExecutor(hyperopt_sampler, output_feature, metric, split, **executor) hyperopt_executor.mock_path = os.path.join(ray_mock_dir, "bucket") hyperopt_executor.execute( config, training_set=training_set, validation_set=validation_set, test_set=test_set, training_set_metadata=training_set_metadata, backend=backend, output_directory=ray_mock_dir, skip_save_processed_input=True, skip_save_unprocessed_output=True, )
def run_api_experiment(config, data_parquet): # Sanity check that we get 4 slots over 1 host kwargs = get_horovod_kwargs() assert kwargs.get('num_hosts') == 1 assert kwargs.get('num_slots') == 2 # Train on Parquet dask_backend = RayBackend() train_with_backend(dask_backend, config, dataset=data_parquet)
def run_api_experiment(config, data_parquet): # Sanity check that we get 4 slots over 1 host kwargs = get_horovod_kwargs() assert kwargs.get("num_workers") == 2 # Train on Parquet dask_backend = RayBackend() assert train_with_backend(dask_backend, config, dataset=data_parquet, evaluate=False)
def run_hyperopt_executor( sampler, executor, csv_filename, ray_mock_dir, validate_output_feature=False, validation_metric=None, ): config = _get_config(sampler, executor) csv_filename = os.path.join(ray_mock_dir, 'dataset.csv') dataset_csv = generate_data( config['input_features'], config['output_features'], csv_filename, num_examples=100) dataset_parquet = create_data_set_to_use('parquet', dataset_csv) config = merge_with_defaults(config) hyperopt_config = config["hyperopt"] if validate_output_feature: hyperopt_config['output_feature'] = config['output_features'][0]['name'] if validation_metric: hyperopt_config['validation_metric'] = validation_metric update_hyperopt_params_with_defaults(hyperopt_config) parameters = hyperopt_config["parameters"] if sampler.get("search_alg", {}).get("type", "") == 'bohb': # bohb does not support grid_search search space del parameters['combiner.num_steps'] split = hyperopt_config["split"] output_feature = hyperopt_config["output_feature"] metric = hyperopt_config["metric"] goal = hyperopt_config["goal"] hyperopt_sampler = get_build_hyperopt_sampler( sampler["type"])(goal, parameters, **sampler) hyperopt_executor = MockRayTuneExecutor( hyperopt_sampler, output_feature, metric, split, **executor) hyperopt_executor.mock_path = os.path.join(ray_mock_dir, "bucket") hyperopt_executor.execute( config, dataset=dataset_parquet, backend=RayBackend(processor={'parallelism': 4,}), output_directory=ray_mock_dir, skip_save_processed_input=True, skip_save_unprocessed_output=True )
def create_ray_backend(**kwargs): from ludwig.backend.ray import RayBackend return RayBackend(**kwargs)