def run_hyperopt_executor( sampler, executor, csv_filename, ray_mock_dir, validate_output_feature=False, validation_metric=None, ): with ray_start_4_cpus(): config = _get_config(sampler, executor) csv_filename = os.path.join(ray_mock_dir, "dataset.csv") dataset_csv = generate_data(config["input_features"], config["output_features"], csv_filename, num_examples=100) dataset_parquet = create_data_set_to_use("parquet", dataset_csv) config = merge_with_defaults(config) hyperopt_config = config["hyperopt"] if validate_output_feature: hyperopt_config["output_feature"] = config["output_features"][0]["name"] if validation_metric: hyperopt_config["validation_metric"] = validation_metric update_hyperopt_params_with_defaults(hyperopt_config) parameters = hyperopt_config["parameters"] if sampler.get("search_alg", {}).get("type", "") == "bohb": # bohb does not support grid_search search space del parameters["combiner.num_steps"] split = hyperopt_config["split"] output_feature = hyperopt_config["output_feature"] metric = hyperopt_config["metric"] goal = hyperopt_config["goal"] hyperopt_sampler = get_build_hyperopt_sampler(sampler["type"])(goal, parameters, **sampler) # preprocess backend = RayBackend(**RAY_BACKEND_KWARGS) model = LudwigModel(config=config, backend=backend) training_set, validation_set, test_set, training_set_metadata = model.preprocess( dataset=dataset_parquet, ) # hyperopt hyperopt_executor = MockRayTuneExecutor(hyperopt_sampler, output_feature, metric, split, **executor) hyperopt_executor.mock_path = os.path.join(ray_mock_dir, "bucket") hyperopt_executor.execute( config, training_set=training_set, validation_set=validation_set, test_set=test_set, training_set_metadata=training_set_metadata, backend=backend, output_directory=ray_mock_dir, skip_save_processed_input=True, skip_save_unprocessed_output=True, )
def test_hyperopt_executor(sampler, executor, csv_filename, validate_output_feature=False, validation_metric=None): if executor['type'] == 'fiber' and sampler['type'] == 'grid': # This test is very slow and doesn't give us additional converage pytest.skip('Skipping Fiber grid search') input_features = [ text_feature(name="utterance", cell_type="lstm", reduce_output="sum"), category_feature(vocab_size=2, reduce_input="sum") ] output_features = [category_feature(vocab_size=2, reduce_input="sum")] rel_path = generate_data(input_features, output_features, csv_filename) config = { "input_features": input_features, "output_features": output_features, "combiner": { "type": "concat", "num_fc_layers": 2 }, "training": { "epochs": 2, "learning_rate": 0.001 } } config = merge_with_defaults(config) hyperopt_config = HYPEROPT_CONFIG.copy() if validate_output_feature: hyperopt_config['output_feature'] = output_features[0]['name'] if validation_metric: hyperopt_config['validation_metric'] = validation_metric update_hyperopt_params_with_defaults(hyperopt_config) parameters = hyperopt_config["parameters"] split = hyperopt_config["split"] output_feature = hyperopt_config["output_feature"] metric = hyperopt_config["metric"] goal = hyperopt_config["goal"] hyperopt_sampler = get_build_hyperopt_sampler(sampler["type"])(goal, parameters, **sampler) hyperopt_executor = get_build_hyperopt_executor(executor["type"])( hyperopt_sampler, output_feature, metric, split, **executor) hyperopt_executor.execute(config, dataset=rel_path, gpus=get_available_gpus_cuda_string())
def test_hyperopt_executor(sampler, executor, csv_filename, validate_output_feature=False, validation_metric=None): input_features = [ text_feature(name="utterance", cell_type="lstm", reduce_output="sum"), category_feature(vocab_size=2, reduce_input="sum"), ] output_features = [category_feature(vocab_size=2, reduce_input="sum")] rel_path = generate_data(input_features, output_features, csv_filename) config = { "input_features": input_features, "output_features": output_features, "combiner": { "type": "concat", "num_fc_layers": 2 }, TRAINER: { "epochs": 2, "learning_rate": 0.001 }, } config = merge_with_defaults(config) hyperopt_config = HYPEROPT_CONFIG.copy() if validate_output_feature: hyperopt_config["output_feature"] = output_features[0]["name"] if validation_metric: hyperopt_config["validation_metric"] = validation_metric update_hyperopt_params_with_defaults(hyperopt_config) parameters = hyperopt_config["parameters"] split = hyperopt_config["split"] output_feature = hyperopt_config["output_feature"] metric = hyperopt_config["metric"] goal = hyperopt_config["goal"] hyperopt_sampler = get_build_hyperopt_sampler(sampler["type"])(goal, parameters, **sampler) hyperopt_executor = get_build_hyperopt_executor(executor["type"])( hyperopt_sampler, output_feature, metric, split, **executor) gpus = [i for i in range(torch.cuda.device_count())] hyperopt_executor.execute(config, dataset=rel_path, gpus=gpus)
def run_hyperopt_executor(sampler, executor, csv_filename, validate_output_feature=False, validation_metric=None): input_features = [ text_feature(name="utterance", cell_type="lstm", reduce_output="sum"), category_feature(vocab_size=2, reduce_input="sum")] output_features = [category_feature(vocab_size=2, reduce_input="sum")] rel_path = generate_data(input_features, output_features, csv_filename) config = { "input_features": input_features, "output_features": output_features, "combiner": {"type": "concat", "num_fc_layers": 2}, "training": {"epochs": 2, "learning_rate": 0.001}, "hyperopt": { **HYPEROPT_CONFIG, "executor": executor, "sampler": sampler, }, } config = merge_with_defaults(config) hyperopt_config = config["hyperopt"] if validate_output_feature: hyperopt_config['output_feature'] = output_features[0]['name'] if validation_metric: hyperopt_config['validation_metric'] = validation_metric update_hyperopt_params_with_defaults(hyperopt_config) parameters = hyperopt_config["parameters"] if sampler.get("search_alg", {}).get("type", "") == 'bohb': # bohb does not support grid_search search space del parameters['utterance.cell_type'] split = hyperopt_config["split"] output_feature = hyperopt_config["output_feature"] metric = hyperopt_config["metric"] goal = hyperopt_config["goal"] hyperopt_sampler = get_build_hyperopt_sampler( sampler["type"])(goal, parameters, **sampler) hyperopt_executor = get_build_hyperopt_executor(executor["type"])( hyperopt_sampler, output_feature, metric, split, **executor) hyperopt_executor.execute(config, dataset=rel_path)
def run_hyperopt_executor( search_alg, executor, csv_filename, tmpdir, validate_output_feature=False, validation_metric=None, use_split=True, ): config = _get_config(search_alg, executor) rel_path = generate_data(config["input_features"], config["output_features"], csv_filename) if not use_split: df = pd.read_csv(rel_path) df["split"] = 0 df.to_csv(rel_path) config = merge_with_defaults(config) hyperopt_config = config["hyperopt"] if validate_output_feature: hyperopt_config["output_feature"] = config["output_features"][0]["name"] if validation_metric: hyperopt_config["validation_metric"] = validation_metric update_hyperopt_params_with_defaults(hyperopt_config) parameters = hyperopt_config["parameters"] if search_alg.get("type", "") == "bohb": # bohb does not support grid_search search space del parameters["utterance.cell_type"] hyperopt_config["parameters"] = parameters split = hyperopt_config["split"] output_feature = hyperopt_config["output_feature"] metric = hyperopt_config["metric"] goal = hyperopt_config["goal"] search_alg = hyperopt_config["search_alg"] hyperopt_executor = get_build_hyperopt_executor(executor["type"])( parameters, output_feature, metric, goal, split, search_alg=search_alg, **executor ) hyperopt_executor.execute( config, dataset=rel_path, output_directory=tmpdir, backend="local", )
def test_hyperopt_search_alg(search_alg, csv_filename, tmpdir, ray_cluster, validate_output_feature=False, validation_metric=None): config, rel_path = _setup_ludwig_config(csv_filename) hyperopt_config = HYPEROPT_CONFIG.copy() # finalize hyperopt config settings if search_alg == "dragonfly": hyperopt_config["search_alg"] = { "type": search_alg, "domain": "euclidean", "optimizer": "random", } elif search_alg is None: hyperopt_config["search_alg"] = {} else: hyperopt_config["search_alg"] = { "type": search_alg, } if validate_output_feature: hyperopt_config["output_feature"] = config[OUTPUT_FEATURES][0][NAME] if validation_metric: hyperopt_config["validation_metric"] = validation_metric update_hyperopt_params_with_defaults(hyperopt_config) parameters = hyperopt_config["parameters"] split = hyperopt_config["split"] output_feature = hyperopt_config["output_feature"] metric = hyperopt_config["metric"] goal = hyperopt_config["goal"] executor = hyperopt_config["executor"] search_alg = hyperopt_config["search_alg"] hyperopt_executor = get_build_hyperopt_executor(RAY)(parameters, output_feature, metric, goal, split, search_alg=search_alg, **executor) raytune_results = hyperopt_executor.execute(config, dataset=rel_path, output_directory=tmpdir) assert isinstance(raytune_results, RayTuneResults)
def run_hyperopt_executor( sampler, executor, csv_filename, ray_mock_dir, validate_output_feature=False, validation_metric=None, ): config = _get_config(sampler, executor) csv_filename = os.path.join(ray_mock_dir, 'dataset.csv') dataset_csv = generate_data( config['input_features'], config['output_features'], csv_filename, num_examples=100) dataset_parquet = create_data_set_to_use('parquet', dataset_csv) config = merge_with_defaults(config) hyperopt_config = config["hyperopt"] if validate_output_feature: hyperopt_config['output_feature'] = config['output_features'][0]['name'] if validation_metric: hyperopt_config['validation_metric'] = validation_metric update_hyperopt_params_with_defaults(hyperopt_config) parameters = hyperopt_config["parameters"] if sampler.get("search_alg", {}).get("type", "") == 'bohb': # bohb does not support grid_search search space del parameters['combiner.num_steps'] split = hyperopt_config["split"] output_feature = hyperopt_config["output_feature"] metric = hyperopt_config["metric"] goal = hyperopt_config["goal"] hyperopt_sampler = get_build_hyperopt_sampler( sampler["type"])(goal, parameters, **sampler) hyperopt_executor = MockRayTuneExecutor( hyperopt_sampler, output_feature, metric, split, **executor) hyperopt_executor.mock_path = os.path.join(ray_mock_dir, "bucket") hyperopt_executor.execute( config, dataset=dataset_parquet, backend=RayBackend(processor={'parallelism': 4,}), output_directory=ray_mock_dir, skip_save_processed_input=True, skip_save_unprocessed_output=True )
def run_hyperopt_executor( sampler, executor, csv_filename, validate_output_feature=False, validation_metric=None, ): config = _get_config(sampler, executor) rel_path = generate_data(config["input_features"], config["output_features"], csv_filename) config = merge_with_defaults(config) hyperopt_config = config["hyperopt"] if validate_output_feature: hyperopt_config["output_feature"] = config["output_features"][0][ "name"] if validation_metric: hyperopt_config["validation_metric"] = validation_metric update_hyperopt_params_with_defaults(hyperopt_config) parameters = hyperopt_config["parameters"] if sampler.get("search_alg", {}).get("type", "") == "bohb": # bohb does not support grid_search search space del parameters["utterance.cell_type"] split = hyperopt_config["split"] output_feature = hyperopt_config["output_feature"] metric = hyperopt_config["metric"] goal = hyperopt_config["goal"] hyperopt_sampler = get_build_hyperopt_sampler(sampler["type"])(goal, parameters, **sampler) hyperopt_executor = get_build_hyperopt_executor(executor["type"])( hyperopt_sampler, output_feature, metric, split, **executor) hyperopt_executor.execute( config, dataset=rel_path, backend="local", )
def test_hyperopt_scheduler(scheduler, csv_filename, tmpdir, ray_cluster, validate_output_feature=False, validation_metric=None): config, rel_path = _setup_ludwig_config(csv_filename) hyperopt_config = HYPEROPT_CONFIG.copy() # finalize hyperopt config settings if scheduler == "pb2": # setup scheduler hyperparam_bounds parameter min = hyperopt_config["parameters"]["trainer.learning_rate"]["lower"] max = hyperopt_config["parameters"]["trainer.learning_rate"]["upper"] hyperparam_bounds = { "trainer.learning_rate": [min, max], } hyperopt_config["executor"]["scheduler"] = { "type": scheduler, "hyperparam_bounds": hyperparam_bounds, } else: hyperopt_config["executor"]["scheduler"] = { "type": scheduler, } if validate_output_feature: hyperopt_config["output_feature"] = config[OUTPUT_FEATURES][0][NAME] if validation_metric: hyperopt_config["validation_metric"] = validation_metric update_hyperopt_params_with_defaults(hyperopt_config) parameters = hyperopt_config["parameters"] split = hyperopt_config["split"] output_feature = hyperopt_config["output_feature"] metric = hyperopt_config["metric"] goal = hyperopt_config["goal"] executor = hyperopt_config["executor"] search_alg = hyperopt_config["search_alg"] # TODO: Determine if we still need this if-then-else construct if search_alg["type"] in {""}: with pytest.raises(ImportError): get_build_hyperopt_executor(RAY)(parameters, output_feature, metric, goal, split, search_alg=search_alg, **executor) else: hyperopt_executor = get_build_hyperopt_executor(RAY)( parameters, output_feature, metric, goal, split, search_alg=search_alg, **executor) raytune_results = hyperopt_executor.execute(config, dataset=rel_path, output_directory=tmpdir) assert isinstance(raytune_results, RayTuneResults)