def hyperopt_results(): """This function generates hyperopt results.""" input_features = [ text_feature(name="utterance", cell_type="lstm", reduce_output="sum"), category_feature(vocab_size=2, reduce_input="sum"), ] output_features = [category_feature(vocab_size=2, reduce_input="sum")] csv_filename = uuid.uuid4().hex[:10].upper() + ".csv" rel_path = generate_data(input_features, output_features, csv_filename) config = { "input_features": input_features, "output_features": output_features, "combiner": {"type": "concat", "num_fc_layers": 2}, "training": {"epochs": 2, "learning_rate": 0.001}, } output_feature_name = output_features[0]["name"] hyperopt_configs = { "parameters": { "training.learning_rate": { "type": "float", "low": 0.0001, "high": 0.01, "space": "log", "steps": 3, }, output_feature_name + ".fc_size": {"type": "int", "low": 32, "high": 256, "steps": 5}, output_feature_name + ".num_fc_layers": {"type": "int", "low": 1, "high": 5, "space": "linear", "steps": 4}, }, "goal": "minimize", "output_feature": output_feature_name, "validation_metrics": "loss", "executor": {"type": "serial"}, "sampler": {"type": "random", "num_samples": 2}, } # add hyperopt parameter space to the config config["hyperopt"] = hyperopt_configs hyperopt(config, dataset=rel_path, output_directory="results") return os.path.abspath("results")
def _train(config: Dict, dataset: Union[str, pd.DataFrame, dd.core.DataFrame], output_directory: str, model_name: str, **kwargs): hyperopt_results = hyperopt(config, dataset=dataset, output_directory=output_directory, model_name=model_name, backend='local', **kwargs) return hyperopt_results
def run_hyperopt(config, rel_path): hyperopt_results = hyperopt( config, dataset=rel_path, output_directory='results_hyperopt' ) # check for return results assert isinstance(hyperopt_results, list) # check for existence of the hyperopt statistics file assert os.path.isfile( os.path.join('results_hyperopt', 'hyperopt_statistics.json') )
def test_hyperopt_with_shared_params(csv_filename, tmpdir): config, rel_path, num_filters_search_space, embedding_size_search_space = _setup_ludwig_config_with_shared_params( csv_filename) hyperopt_results = hyperopt(config, dataset=rel_path, output_directory=tmpdir, experiment_name="test_hyperopt") hyperopt_results_df = hyperopt_results.experiment_analysis.results_df _test_hyperopt_with_shared_params_trial_table(hyperopt_results_df, num_filters_search_space, embedding_size_search_space) _test_hyperopt_with_shared_params_written_config( hyperopt_results_df, num_filters_search_space, embedding_size_search_space)
def _train( config: Dict, dataset: Union[str, pd.DataFrame, dd.core.DataFrame], output_directory: str, model_name: str, random_seed: int, **kwargs, ): hyperopt_results = hyperopt( config, dataset=dataset, output_directory=output_directory, model_name=model_name, random_seed=random_seed, skip_save_log=True, # avoid per-step log overhead by default **kwargs, ) return hyperopt_results
def run_hyperopt( config, rel_path, experiment_name="ray_hyperopt", callbacks=None, ): hyperopt_results = hyperopt( config, dataset=rel_path, output_directory="results_hyperopt", experiment_name=experiment_name, callbacks=callbacks, ) # check for return results assert isinstance(hyperopt_results, RayTuneResults) # check for existence of the hyperopt statistics file assert os.path.isfile(os.path.join("results_hyperopt", "hyperopt_statistics.json"))
def run_hyperopt( config, rel_path, tmpdir, experiment_name="ray_hyperopt", callbacks=None, ): hyperopt_results = hyperopt( config, dataset=rel_path, output_directory=tmpdir, experiment_name=experiment_name, callbacks=callbacks, ) # check for return results assert isinstance(hyperopt_results, RayTuneResults) # check for existence of the hyperopt statistics file assert os.path.isfile(os.path.join(tmpdir, experiment_name, HYPEROPT_STATISTICS_FILE_NAME))
def run_hyperopt( config, rel_path, out_dir, experiment_name="ray_hyperopt", ): with ray_start_4_cpus(): callback = TestCallback() hyperopt_results = hyperopt( config, dataset=rel_path, output_directory=out_dir, experiment_name=experiment_name, callbacks=[callback], ) # check for return results assert isinstance(hyperopt_results, RayTuneResults) # check for existence of the hyperopt statistics file assert os.path.isfile(os.path.join(out_dir, "hyperopt_statistics.json"))
def test_hyperopt_run_hyperopt(csv_filename, search_space, tmpdir, ray_cluster): input_features = [ text_feature(name="utterance", reduce_output="sum"), category_feature(vocab_size=3), ] output_features = [category_feature(vocab_size=3)] rel_path = generate_data(input_features, output_features, csv_filename) config = { INPUT_FEATURES: input_features, OUTPUT_FEATURES: output_features, COMBINER: { TYPE: "concat", "num_fc_layers": 2 }, TRAINER: { "epochs": 2, "learning_rate": 0.001 }, } output_feature_name = output_features[0][NAME] if search_space == "random": # random search will be size of num_samples search_parameters = { "trainer.learning_rate": { "lower": 0.0001, "upper": 0.01, "space": "loguniform", }, output_feature_name + ".fc_layers": { "space": "choice", "categories": [ [{ "output_size": 64 }, { "output_size": 32 }], [{ "output_size": 64 }], [{ "output_size": 32 }], ], }, output_feature_name + ".output_size": { "space": "choice", "categories": [16, 21, 26, 31, 36] }, output_feature_name + ".num_fc_layers": { "space": "randint", "lower": 1, "upper": 6 }, } else: # grid search space will be product each parameter size search_parameters = { "trainer.learning_rate": { "space": "grid_search", "values": [0.001, 0.005, 0.01] }, output_feature_name + ".output_size": { "space": "grid_search", "values": [16, 21, 36] }, output_feature_name + ".num_fc_layers": { "space": "grid_search", "values": [1, 3, 6] }, } hyperopt_configs = { "parameters": search_parameters, "goal": "minimize", "output_feature": output_feature_name, "validation_metrics": "loss", "executor": { "type": "ray", "num_samples": 1 if search_space == "grid" else RANDOM_SEARCH_SIZE }, "search_alg": { "type": "variant_generator" }, } # add hyperopt parameter space to the config config["hyperopt"] = hyperopt_configs hyperopt_results = hyperopt(config, dataset=rel_path, output_directory=tmpdir, experiment_name="test_hyperopt") if search_space == "random": assert hyperopt_results.experiment_analysis.results_df.shape[ 0] == RANDOM_SEARCH_SIZE else: # compute size of search space for grid search grid_search_size = 1 for k, v in search_parameters.items(): grid_search_size *= len(v["values"]) assert hyperopt_results.experiment_analysis.results_df.shape[ 0] == grid_search_size # check for return results assert isinstance(hyperopt_results, HyperoptResults) # check for existence of the hyperopt statistics file assert os.path.isfile( os.path.join(tmpdir, "test_hyperopt", HYPEROPT_STATISTICS_FILE_NAME))
def hyperopt_cli( model_definition: dict, model_definition_file: str = None, dataset: str = None, training_set: str = None, validation_set: str = None, test_set: str = None, training_set_metadata: str = None, data_format: str = None, experiment_name: str = 'experiment', model_name: str = 'run', # model_load_path=None, # model_resume_path=None, skip_save_training_description: bool = False, skip_save_training_statistics: bool = False, skip_save_model: bool = False, skip_save_progress: bool = False, skip_save_log: bool = False, skip_save_processed_input: bool = False, skip_save_unprocessed_output: bool = False, skip_save_predictions: bool = False, skip_save_eval_stats: bool = False, skip_save_hyperopt_statistics: bool = False, output_directory: str = 'results', gpus: Union[str, int, List[int]] = None, gpu_memory_limit: int = None, allow_parallel_threads: bool = True, use_horovod: bool = None, random_seed: int = default_random_seed, debug: bool = False, **kwargs, ): """ Searches for optimal hyperparameters. # Inputs :param model_definition: (dict) model definition which defines the different parameters of the model, features, preprocessing and training. :param model_definition_file: (str, default: `None`) the filepath string that specifies the model definition. It is a yaml file. :param dataset: (Union[str, dict, pandas.DataFrame], default: `None`) source containing the entire dataset to be used for training. If it has a split column, it will be used for splitting (0 for train, 1 for validation, 2 for test), otherwise the dataset will be randomly split. :param training_set: (Union[str, dict, pandas.DataFrame], default: `None`) source containing training data. :param validation_set: (Union[str, dict, pandas.DataFrame], default: `None`) source containing validation data. :param test_set: (Union[str, dict, pandas.DataFrame], default: `None`) source containing test data. :param training_set_metadata: (Union[str, dict], default: `None`) metadata JSON file or loaded metadata. Intermediate preprocess structure containing the mappings of the input dataset created the first time an input file is used in the same directory with the same name and a '.meta.json' extension. :param data_format: (str, default: `None`) format to interpret data sources. Will be inferred automatically if not specified. Valid formats are `'auto'`, `'csv'`, `'excel'`, `'feather'`, `'fwf'`, `'hdf5'` (cache file produced during previous training), `'html'` (file containing a single HTML `<table>`), `'json'`, `'jsonl'`, `'parquet'`, `'pickle'` (pickled Pandas DataFrame), `'sas'`, `'spss'`, `'stata'`, `'tsv'`. :param experiment_name: (str, default: `'experiment'`) name for the experiment. :param model_name: (str, default: `'run'`) name of the model that is being used. :param skip_save_training_description: (bool, default: `False`) disables saving the description JSON file. :param skip_save_training_statistics: (bool, default: `False`) disables saving training statistics JSON file. :param skip_save_model: (bool, default: `False`) disables saving model weights and hyperparameters each time the model improves. By default Ludwig saves model weights after each epoch the validation metric improves, but if the model is really big that can be time consuming if you do not want to keep the weights and just find out what performance can a model get with a set of hyperparameters, use this parameter to skip it, but the model will not be loadable later on and the returned model will have the weights obtained at the end of training, instead of the weights of the epoch with the best validation performance. :param skip_save_progress: (bool, default: `False`) disables saving progress each epoch. By default Ludwig saves weights and stats after each epoch for enabling resuming of training, but if the model is really big that can be time consuming and will uses twice as much space, use this parameter to skip it, but training cannot be resumed later on. :param skip_save_log: (bool, default: `False`) disables saving TensorBoard logs. By default Ludwig saves logs for the TensorBoard, but if it is not needed turning it off can slightly increase the overall speed. :param skip_save_processed_input: (bool, default: `False`) if input dataset is provided it is preprocessed and cached by saving an HDF5 and JSON files to avoid running the preprocessing again. If this parameter is `False`, the HDF5 and JSON file are not saved. :param skip_save_unprocessed_output: (bool, default: `False`) by default predictions and their probabilities are saved in both raw unprocessed numpy files containing tensors and as postprocessed CSV files (one for each output feature). If this parameter is True, only the CSV ones are saved and the numpy ones are skipped. :param skip_save_predictions: (bool, default: `False`) skips saving test predictions CSV files :param skip_save_eval_stats: (bool, default: `False`) skips saving test statistics JSON file :param skip_save_hyperopt_statistics: (bool, default: `False`) skips saving hyperopt stats file. :param output_directory: (str, default: `'results'`) the directory that will contain the training statistics, TensorBoard logs, the saved model and the training progress files. :param gpus: (list, default: `None`) list of GPUs that are available for training. :param gpu_memory_limit: (int, default: `None`) maximum memory in MB to allocate per GPU device. :param allow_parallel_threads: (bool, default: `True`) allow TensorFlow to use multithreading parallelism to improve performance at the cost of determinism. :param use_horovod: (bool, default: `None`) flag for using horovod. :param random_seed: (int: default: 42) random seed used for weights initialization, splits and any other random function. :param debug: (bool, default: `False) if `True` turns on `tfdbg` with `inf_or_nan` checks. **kwargs: # Return :return" (`None`) """ model_definition = check_which_model_definition(model_definition, model_definition_file) return hyperopt( model_definition=model_definition, dataset=dataset, training_set=training_set, validation_set=validation_set, test_set=test_set, training_set_metadata=training_set_metadata, data_format=data_format, experiment_name=experiment_name, model_name=model_name, # model_load_path=model_load_path, # model_resume_path=model_resume_path, skip_save_training_description=skip_save_training_description, skip_save_training_statistics=skip_save_training_statistics, skip_save_model=skip_save_model, skip_save_progress=skip_save_progress, skip_save_log=skip_save_log, skip_save_processed_input=skip_save_processed_input, skip_save_unprocessed_output=skip_save_unprocessed_output, skip_save_predictions=skip_save_predictions, skip_save_eval_stats=skip_save_eval_stats, skip_save_hyperopt_statistics=skip_save_hyperopt_statistics, output_directory=output_directory, gpus=gpus, gpu_memory_limit=gpu_memory_limit, allow_parallel_threads=allow_parallel_threads, use_horovod=use_horovod, random_seed=random_seed, debug=debug, **kwargs, )
def hyperopt_cli( config: Union[str, dict], dataset: str = None, training_set: str = None, validation_set: str = None, test_set: str = None, training_set_metadata: str = None, data_format: str = None, experiment_name: str = "experiment", model_name: str = "run", # model_load_path=None, # model_resume_path=None, skip_save_training_description: bool = False, skip_save_training_statistics: bool = False, skip_save_model: bool = False, skip_save_progress: bool = False, skip_save_log: bool = False, skip_save_processed_input: bool = False, skip_save_unprocessed_output: bool = False, skip_save_predictions: bool = False, skip_save_eval_stats: bool = False, skip_save_hyperopt_statistics: bool = False, output_directory: str = "results", gpus: Union[str, int, List[int]] = None, gpu_memory_limit: int = None, allow_parallel_threads: bool = True, callbacks: List[Callback] = None, backend: Union[Backend, str] = None, random_seed: int = default_random_seed, hyperopt_log_verbosity: int = 3, **kwargs, ): """Searches for optimal hyperparameters. # Inputs :param config: (Union[str, dict]) in-memory representation of config or string path to a YAML config file. :param dataset: (Union[str, dict, pandas.DataFrame], default: `None`) source containing the entire dataset to be used for training. If it has a split column, it will be used for splitting (0 for train, 1 for validation, 2 for test), otherwise the dataset will be randomly split. :param training_set: (Union[str, dict, pandas.DataFrame], default: `None`) source containing training data. :param validation_set: (Union[str, dict, pandas.DataFrame], default: `None`) source containing validation data. :param test_set: (Union[str, dict, pandas.DataFrame], default: `None`) source containing test data. :param training_set_metadata: (Union[str, dict], default: `None`) metadata JSON file or loaded metadata. Intermediate preprocessed structure containing the mappings of the input dataset created the first time an input file is used in the same directory with the same name and a '.meta.json' extension. :param data_format: (str, default: `None`) format to interpret data sources. Will be inferred automatically if not specified. Valid formats are `'auto'`, `'csv'`, `'excel'`, `'feather'`, `'fwf'`, `'hdf5'` (cache file produced during previous training), `'html'` (file containing a single HTML `<table>`), `'json'`, `'jsonl'`, `'parquet'`, `'pickle'` (pickled Pandas DataFrame), `'sas'`, `'spss'`, `'stata'`, `'tsv'`. :param experiment_name: (str, default: `'experiment'`) name for the experiment. :param model_name: (str, default: `'run'`) name of the model that is being used. :param skip_save_training_description: (bool, default: `False`) disables saving the description JSON file. :param skip_save_training_statistics: (bool, default: `False`) disables saving training statistics JSON file. :param skip_save_model: (bool, default: `False`) disables saving model weights and hyperparameters each time the model improves. By default Ludwig saves model weights after each epoch the validation metric improves, but if the model is really big that can be time consuming. If you do not want to keep the weights and just find out what performance a model can get with a set of hyperparameters, use this parameter to skip it, but the model will not be loadable later on and the returned model will have the weights obtained at the end of training, instead of the weights of the epoch with the best validation performance. :param skip_save_progress: (bool, default: `False`) disables saving progress each epoch. By default Ludwig saves weights and stats after each epoch for enabling resuming of training, but if the model is really big that can be time consuming and will uses twice as much space, use this parameter to skip it, but training cannot be resumed later on. :param skip_save_log: (bool, default: `False`) disables saving TensorBoard logs. By default Ludwig saves logs for the TensorBoard, but if it is not needed turning it off can slightly increase the overall speed. :param skip_save_processed_input: (bool, default: `False`) if input dataset is provided it is preprocessed and cached by saving an HDF5 and JSON files to avoid running the preprocessing again. If this parameter is `False`, the HDF5 and JSON file are not saved. :param skip_save_unprocessed_output: (bool, default: `False`) by default predictions and their probabilities are saved in both raw unprocessed numpy files containing tensors and as postprocessed CSV files (one for each output feature). If this parameter is True, only the CSV ones are saved and the numpy ones are skipped. :param skip_save_predictions: (bool, default: `False`) skips saving test predictions CSV files :param skip_save_eval_stats: (bool, default: `False`) skips saving test statistics JSON file :param skip_save_hyperopt_statistics: (bool, default: `False`) skips saving hyperopt stats file. :param output_directory: (str, default: `'results'`) the directory that will contain the training statistics, TensorBoard logs, the saved model and the training progress files. :param gpus: (list, default: `None`) list of GPUs that are available for training. :param gpu_memory_limit: (int, default: `None`) maximum memory in MB to allocate per GPU device. :param allow_parallel_threads: (bool, default: `True`) allow TensorFlow to use multithreading parallelism to improve performance at the cost of determinism. :param callbacks: (list, default: `None`) a list of `ludwig.callbacks.Callback` objects that provide hooks into the Ludwig pipeline. :param backend: (Union[Backend, str]) `Backend` or string name of backend to use to execute preprocessing / training steps. :param random_seed: (int: default: 42) random seed used for weights initialization, splits and any other random function. :param hyperopt_log_verbosity: (int: default: 3) Controls verbosity of ray tune log messages. Valid values: 0 = silent, 1 = only status updates, 2 = status and brief trial results, 3 = status and detailed trial results. # Return :return" (`None`) """ return hyperopt( config=config, dataset=dataset, training_set=training_set, validation_set=validation_set, test_set=test_set, training_set_metadata=training_set_metadata, data_format=data_format, experiment_name=experiment_name, model_name=model_name, # model_load_path=model_load_path, # model_resume_path=model_resume_path, skip_save_training_description=skip_save_training_description, skip_save_training_statistics=skip_save_training_statistics, skip_save_model=skip_save_model, skip_save_progress=skip_save_progress, skip_save_log=skip_save_log, skip_save_processed_input=skip_save_processed_input, skip_save_unprocessed_output=skip_save_unprocessed_output, skip_save_predictions=skip_save_predictions, skip_save_eval_stats=skip_save_eval_stats, skip_save_hyperopt_statistics=skip_save_hyperopt_statistics, output_directory=output_directory, gpus=gpus, gpu_memory_limit=gpu_memory_limit, allow_parallel_threads=allow_parallel_threads, callbacks=callbacks, backend=backend, random_seed=random_seed, hyperopt_log_verbosity=hyperopt_log_verbosity, **kwargs, )
def hyperopt_cli( model_definition=None, model_definition_file=None, dataset=None, training_set=None, validation_set=None, test_set=None, training_set_metadata=None, data_format=None, experiment_name="hyperopt", model_name="run", # model_load_path=None, # model_resume_path=None, skip_save_training_description=True, skip_save_training_statistics=True, skip_save_model=True, skip_save_progress=True, skip_save_log=True, skip_save_processed_input=True, skip_save_unprocessed_output=True, skip_save_predictions=True, skip_save_eval_stats=True, skip_save_hyperopt_statistics=False, output_directory="results", gpus=None, gpu_memory_limit=None, allow_parallel_threads=True, use_horovod=None, random_seed=default_random_seed, debug=False, **kwargs, ): model_definition = check_which_model_definition(model_definition, model_definition_file) return hyperopt( model_definition=model_definition, dataset=dataset, training_set=training_set, validation_set=validation_set, test_set=test_set, training_set_metadata=training_set_metadata, data_format=data_format, experiment_name=experiment_name, model_name=model_name, # model_load_path=model_load_path, # model_resume_path=model_resume_path, skip_save_training_description=skip_save_training_description, skip_save_training_statistics=skip_save_training_statistics, skip_save_model=skip_save_model, skip_save_progress=skip_save_progress, skip_save_log=skip_save_log, skip_save_processed_input=skip_save_processed_input, skip_save_unprocessed_output=skip_save_unprocessed_output, skip_save_predictions=skip_save_predictions, skip_save_eval_stats=skip_save_eval_stats, skip_save_hyperopt_statistics=skip_save_hyperopt_statistics, output_directory=output_directory, gpus=gpus, gpu_memory_limit=gpu_memory_limit, allow_parallel_threads=allow_parallel_threads, use_horovod=use_horovod, random_seed=random_seed, debug=debug, **kwargs, )
def hyperopt_results(): """This function generates hyperopt results.""" input_features = [ text_feature(name="utterance", cell_type="lstm", reduce_output="sum"), category_feature(vocab_size=2, reduce_input="sum"), ] output_features = [category_feature(vocab_size=2, reduce_input="sum")] csv_filename = uuid.uuid4().hex[:10].upper() + ".csv" rel_path = generate_data(input_features, output_features, csv_filename) config = { "input_features": input_features, "output_features": output_features, "combiner": { "type": "concat", "num_fc_layers": 2 }, TRAINER: { "epochs": 2, "learning_rate": 0.001 }, } output_feature_name = output_features[0]["name"] hyperopt_configs = { "parameters": { "trainer.learning_rate": { "space": "loguniform", "lower": 0.0001, "upper": 0.01, }, output_feature_name + ".output_size": { "space": "choice", "categories": [32, 64, 128, 256] }, output_feature_name + ".num_fc_layers": { "space": "randint", "lower": 1, "upper": 6 }, }, "goal": "minimize", "output_feature": output_feature_name, "validation_metrics": "loss", "executor": { "type": "ray", "num_samples": 2, }, "search_alg": { "type": "variant_generator", }, } # add hyperopt parameter space to the config config["hyperopt"] = hyperopt_configs hyperopt(config, dataset=rel_path, output_directory="results", experiment_name="hyperopt_test") return os.path.join(os.path.abspath("results"), "hyperopt_test")
def hyperopt_results(): """ This function generates hyperopt results """ input_features = [ text_feature(name="utterance", cell_type="lstm", reduce_output="sum"), category_feature(vocab_size=2, reduce_input="sum")] output_features = [category_feature(vocab_size=2, reduce_input="sum")] csv_filename = uuid.uuid4().hex[:10].upper() + '.csv' rel_path = generate_data(input_features, output_features, csv_filename) config = { "input_features": input_features, "output_features": output_features, "combiner": {"type": "concat", "num_fc_layers": 2}, "training": {"epochs": 2, "learning_rate": 0.001} } output_feature_name = output_features[0]['name'] hyperopt_configs = { "parameters": { "training.learning_rate": { "type": "float", "low": 0.0001, "high": 0.01, "space": "log", "steps": 3, }, output_feature_name + ".fc_size": { "type": "int", "low": 32, "high": 256, "steps": 5 }, output_feature_name + ".num_fc_layers": { 'type': 'int', 'low': 1, 'high': 5, 'space': 'linear', 'steps': 4 } }, "goal": "minimize", 'output_feature': output_feature_name, 'validation_metrics': 'loss', 'executor': {'type': 'serial'}, 'sampler': {'type': 'random', 'num_samples': 2} } # add hyperopt parameter space to the config config['hyperopt'] = hyperopt_configs hyperopt( config, dataset=rel_path, output_directory='results' ) return os.path.abspath('results')
def test_hyperopt_run_hyperopt(csv_filename, samplers): input_features = [ text_feature(name="utterance", cell_type="lstm", reduce_output="sum"), category_feature(vocab_size=2, reduce_input="sum") ] output_features = [category_feature(vocab_size=2, reduce_input="sum")] rel_path = generate_data(input_features, output_features, csv_filename) config = { "input_features": input_features, "output_features": output_features, "combiner": { "type": "concat", "num_fc_layers": 2 }, "training": { "epochs": 2, "learning_rate": 0.001 } } output_feature_name = output_features[0]['name'] hyperopt_configs = { "parameters": { "training.learning_rate": { "type": "float", "low": 0.0001, "high": 0.01, "space": "log", "steps": 3, }, output_feature_name + ".fc_layers": { 'type': 'category', 'values': [[{ 'fc_size': 512 }, { 'fc_size': 256 }], [{ 'fc_size': 512 }], [{ 'fc_size': 256 }]] }, output_feature_name + ".fc_size": { "type": "int", "low": 32, "high": 256, "steps": 5 }, output_feature_name + ".num_fc_layers": { 'type': 'int', 'low': 1, 'high': 5, 'space': 'linear', 'steps': 4 } }, "goal": "minimize", 'output_feature': output_feature_name, 'validation_metrics': 'loss', 'executor': { 'type': 'serial' }, 'sampler': { 'type': samplers["type"], 'num_samples': 2 } } # add hyperopt parameter space to the config config['hyperopt'] = hyperopt_configs hyperopt_results = hyperopt(config, dataset=rel_path, output_directory='results_hyperopt') # check for return results assert isinstance(hyperopt_results, list) # check for existence of the hyperopt statistics file assert os.path.isfile( os.path.join('results_hyperopt', 'hyperopt_statistics.json')) if os.path.isfile( os.path.join('results_hyperopt', 'hyperopt_statistics.json')): os.remove(os.path.join('results_hyperopt', 'hyperopt_statistics.json'))
def test_hyperopt_run_hyperopt(csv_filename, ray_start_4_cpus): input_features = [ text_feature(name="utterance", cell_type="lstm", reduce_output="sum"), category_feature(vocab_size=2, reduce_input="sum") ] output_features = [category_feature(vocab_size=2, reduce_input="sum")] rel_path = generate_data(input_features, output_features, csv_filename) config = { "input_features": input_features, "output_features": output_features, "combiner": { "type": "concat", "num_fc_layers": 2 }, "training": { "epochs": 2, "learning_rate": 0.001 } } output_feature_name = output_features[0]['name'] hyperopt_configs = { "parameters": { "training.learning_rate": { "space": "loguniform", "lower": 0.001, "upper": 0.1, }, output_feature_name + ".fc_size": { "space": "randint", "lower": 32, "upper": 256 }, output_feature_name + ".num_fc_layers": { "space": "randint", "lower": 2, "upper": 6 } }, "goal": "minimize", 'output_feature': output_feature_name, 'validation_metrics': 'loss', 'executor': { 'type': 'ray' }, 'sampler': { 'type': 'ray', 'num_samples': 2 } } # add hyperopt parameter space to the config config['hyperopt'] = hyperopt_configs hyperopt_results = hyperopt(config, dataset=rel_path, output_directory='results_hyperopt') # check for return results assert isinstance(hyperopt_results, list) # check for existence of the hyperopt statistics file assert os.path.isfile( os.path.join('results_hyperopt', 'hyperopt_statistics.json'))
def test_hyperopt_run_hyperopt(csv_filename, samplers): input_features = [ text_feature(name="utterance", cell_type="lstm", reduce_output="sum"), category_feature(vocab_size=2, reduce_input="sum"), ] output_features = [category_feature(vocab_size=2, reduce_input="sum")] rel_path = generate_data(input_features, output_features, csv_filename) config = { "input_features": input_features, "output_features": output_features, "combiner": { "type": "concat", "num_fc_layers": 2 }, TRAINER: { "epochs": 2, "learning_rate": 0.001 }, } output_feature_name = output_features[0]["name"] hyperopt_configs = { "parameters": { "trainer.learning_rate": { "type": "float", "low": 0.0001, "high": 0.01, "space": "log", "steps": 3, }, output_feature_name + ".fc_layers": { "type": "category", "values": [ [{ "output_size": 64 }, { "output_size": 32 }], [{ "output_size": 64 }], [{ "output_size": 32 }], ], }, output_feature_name + ".output_size": { "type": "int", "low": 16, "high": 36, "steps": 5 }, output_feature_name + ".num_fc_layers": { "type": "int", "low": 1, "high": 5, "space": "linear", "steps": 4 }, }, "goal": "minimize", "output_feature": output_feature_name, "validation_metrics": "loss", "executor": { "type": "serial" }, "sampler": { "type": samplers["type"], "num_samples": 2 }, } # add hyperopt parameter space to the config config["hyperopt"] = hyperopt_configs hyperopt_results = hyperopt(config, dataset=rel_path, output_directory="results_hyperopt") # check for return results assert isinstance(hyperopt_results, HyperoptResults) # check for existence of the hyperopt statistics file assert os.path.isfile( os.path.join("results_hyperopt", "hyperopt_statistics.json")) if os.path.isfile( os.path.join("results_hyperopt", "hyperopt_statistics.json")): os.remove(os.path.join("results_hyperopt", "hyperopt_statistics.json"))