コード例 #1
0
def run_hyperopt_executor(
    sampler,
    executor,
    csv_filename,
    ray_mock_dir,
    validate_output_feature=False,
    validation_metric=None,
):
    with ray_start_4_cpus():
        config = _get_config(sampler, executor)

        csv_filename = os.path.join(ray_mock_dir, "dataset.csv")
        dataset_csv = generate_data(config["input_features"], config["output_features"], csv_filename, num_examples=100)
        dataset_parquet = create_data_set_to_use("parquet", dataset_csv)

        config = merge_with_defaults(config)

        hyperopt_config = config["hyperopt"]

        if validate_output_feature:
            hyperopt_config["output_feature"] = config["output_features"][0]["name"]
        if validation_metric:
            hyperopt_config["validation_metric"] = validation_metric

        update_hyperopt_params_with_defaults(hyperopt_config)

        parameters = hyperopt_config["parameters"]
        if sampler.get("search_alg", {}).get("type", "") == "bohb":
            # bohb does not support grid_search search space
            del parameters["combiner.num_steps"]

        split = hyperopt_config["split"]
        output_feature = hyperopt_config["output_feature"]
        metric = hyperopt_config["metric"]
        goal = hyperopt_config["goal"]

        hyperopt_sampler = get_build_hyperopt_sampler(sampler["type"])(goal, parameters, **sampler)

        # preprocess
        backend = RayBackend(**RAY_BACKEND_KWARGS)
        model = LudwigModel(config=config, backend=backend)
        training_set, validation_set, test_set, training_set_metadata = model.preprocess(
            dataset=dataset_parquet,
        )

        # hyperopt
        hyperopt_executor = MockRayTuneExecutor(hyperopt_sampler, output_feature, metric, split, **executor)
        hyperopt_executor.mock_path = os.path.join(ray_mock_dir, "bucket")

        hyperopt_executor.execute(
            config,
            training_set=training_set,
            validation_set=validation_set,
            test_set=test_set,
            training_set_metadata=training_set_metadata,
            backend=backend,
            output_directory=ray_mock_dir,
            skip_save_processed_input=True,
            skip_save_unprocessed_output=True,
        )
コード例 #2
0
def run_api_experiment(config, data_parquet):
    # Sanity check that we get 4 slots over 1 host
    kwargs = get_horovod_kwargs()
    assert kwargs.get('num_hosts') == 1
    assert kwargs.get('num_slots') == 2

    # Train on Parquet
    dask_backend = RayBackend()
    train_with_backend(dask_backend, config, dataset=data_parquet)
コード例 #3
0
ファイル: test_ludwig.py プロジェクト: vishalbelsare/ray
def run_api_experiment(config, data_parquet):
    # Sanity check that we get 4 slots over 1 host
    kwargs = get_horovod_kwargs()
    assert kwargs.get("num_workers") == 2

    # Train on Parquet
    dask_backend = RayBackend()
    assert train_with_backend(dask_backend,
                              config,
                              dataset=data_parquet,
                              evaluate=False)
コード例 #4
0
def run_hyperopt_executor(
    sampler, executor, csv_filename, ray_mock_dir,
    validate_output_feature=False,
    validation_metric=None,
):
    config = _get_config(sampler, executor)

    csv_filename = os.path.join(ray_mock_dir, 'dataset.csv')
    dataset_csv = generate_data(
        config['input_features'], config['output_features'], csv_filename, num_examples=100)
    dataset_parquet = create_data_set_to_use('parquet', dataset_csv)

    config = merge_with_defaults(config)

    hyperopt_config = config["hyperopt"]

    if validate_output_feature:
        hyperopt_config['output_feature'] = config['output_features'][0]['name']
    if validation_metric:
        hyperopt_config['validation_metric'] = validation_metric

    update_hyperopt_params_with_defaults(hyperopt_config)

    parameters = hyperopt_config["parameters"]
    if sampler.get("search_alg", {}).get("type", "") == 'bohb':
        # bohb does not support grid_search search space
        del parameters['combiner.num_steps']

    split = hyperopt_config["split"]
    output_feature = hyperopt_config["output_feature"]
    metric = hyperopt_config["metric"]
    goal = hyperopt_config["goal"]

    hyperopt_sampler = get_build_hyperopt_sampler(
        sampler["type"])(goal, parameters, **sampler)

    hyperopt_executor = MockRayTuneExecutor(
        hyperopt_sampler, output_feature, metric, split, **executor)
    hyperopt_executor.mock_path = os.path.join(ray_mock_dir, "bucket")

    hyperopt_executor.execute(
        config,
        dataset=dataset_parquet,
        backend=RayBackend(processor={'parallelism': 4,}),
        output_directory=ray_mock_dir,
        skip_save_processed_input=True,
        skip_save_unprocessed_output=True
    )
コード例 #5
0
ファイル: __init__.py プロジェクト: zhisbug/ludwig
def create_ray_backend(**kwargs):
    from ludwig.backend.ray import RayBackend
    return RayBackend(**kwargs)