def _create_default_config(dataset: Union[str, dd.core.DataFrame, pd.DataFrame, DatasetInfo], target_name: str = None, time_limit_s: Union[int, float] = None) -> dict: """ Returns auto_train configs for three available combiner models. Coordinates the following tasks: - extracts fields and generates list of FieldInfo objects - gets field metadata (i.e avg. words, total non-null entries) - builds input_features and output_feautures section of config - for each combiner, adds default training, hyperopt - infers resource constraints and adds gpu and cpu resource allocation per trial # Inputs :param dataset: (str) filepath to dataset. :param target_name: (str) name of target feature :param time_limit_s: (int, float) total time allocated to auto_train. acts as the stopping parameter # Return :return: (dict) dictionaries contain auto train config files for all available combiner types """ _ray_init() resources = get_available_resources() experiment_resources = allocate_experiment_resources(resources) dataset_info = dataset if not isinstance(dataset, DatasetInfo): dataset_info = get_dataset_info(dataset) input_and_output_feature_config = get_features_config( dataset_info.fields, dataset_info.row_count, resources, target_name) model_configs = {} for model_name, path_to_defaults in model_defaults.items(): default_model_config = load_yaml(path_to_defaults) default_model_config.update(input_and_output_feature_config) default_model_config['hyperopt']['executor'].update( experiment_resources) default_model_config['hyperopt']['executor'][ 'time_budget_s'] = time_limit_s model_configs[model_name] = default_model_config return model_configs
def train_with_config( dataset: Union[str, pd.DataFrame, dd.core.DataFrame], config: dict, output_directory: str = OUTPUT_DIR, random_seed: int = default_random_seed, **kwargs, ) -> AutoTrainResults: """Performs hyperparameter optimization with respect to the given config and selects the best model. # Inputs :param dataset: (str) filepath to dataset. :param config: (dict) optional Ludwig configuration to use for training, defaults to `create_auto_config`. :param output_directory: (str) directory into which to write results, defaults to current working directory. :param random_seed: (int, default: `42`) a random seed that will be used anywhere there is a call to a random number generator, including hyperparameter search sampling, as well as data splitting, parameter initialization and training set shuffling # Returns :return: (AutoTrainResults) results containing hyperopt experiments and best model """ _ray_init() model_type = get_model_type(config) hyperopt_results = _train(config, dataset, output_directory=output_directory, model_name=model_type, random_seed=random_seed, **kwargs) # catch edge case where metric_score is nan # TODO (ASN): Decide how we want to proceed if at least one trial has # completed for trial in hyperopt_results.ordered_trials: if np.isnan(trial.metric_score): warnings.warn( "There was an error running the experiment. " "A trial failed to start. " "Consider increasing the time budget for experiment. ") experiment_analysis = hyperopt_results.experiment_analysis return AutoTrainResults(experiment_analysis)
def train_with_config( dataset: Union[str, pd.DataFrame, dd.core.DataFrame], config: dict, output_directory: str = OUTPUT_DIR, **kwargs, ) -> AutoTrainResults: """ Performs hyperparameter optimization with respect to the given config and selects the best model. # Inputs :param dataset: (str) filepath to dataset. :param config: (dict) optional Ludwig configuration to use for training, defaults to `create_auto_config`. :param output_directory: (str) directory into which to write results, defaults to current working directory. # Returns :return: (AutoTrainResults) results containing hyperopt experiments and best model """ _ray_init() model_name = config[COMBINER][TYPE] hyperopt_results = _train(config, dataset, output_directory=output_directory, model_name=model_name, **kwargs) # catch edge case where metric_score is nan # TODO (ASN): Decide how we want to proceed if at least one trial has # completed for trial in hyperopt_results.ordered_trials: if np.isnan(trial.metric_score): warnings.warn( "There was an error running the experiment. " "A trial failed to start. " "Consider increasing the time budget for experiment. ") experiment_analysis = hyperopt_results.experiment_analysis return AutoTrainResults(experiment_analysis)
def _create_default_config( dataset: Union[str, dd.core.DataFrame, pd.DataFrame, DatasetInfo], target_name: Union[str, List[str]] = None, time_limit_s: Union[int, float] = None, ) -> dict: """Returns auto_train configs for three available combiner models. Coordinates the following tasks: - extracts fields and generates list of FieldInfo objects - gets field metadata (i.e avg. words, total non-null entries) - builds input_features and output_features section of config - for each combiner, adds default training, hyperopt - infers resource constraints and adds gpu and cpu resource allocation per trial # Inputs :param dataset: (str) filepath to dataset. :param target_name: (str, List[str]) name of target feature :param time_limit_s: (int, float) total time allocated to auto_train. acts as the stopping parameter # Return :return: (dict) dictionaries contain auto train config files for all available combiner types """ _ray_init() resources = get_available_resources() experiment_resources = allocate_experiment_resources(resources) dataset_info = dataset if not isinstance(dataset, DatasetInfo): dataset_info = get_dataset_info(dataset) input_and_output_feature_config = get_features_config( dataset_info.fields, dataset_info.row_count, resources, target_name) model_configs = {} # read in base config and update with experiment resources base_automl_config = load_yaml(BASE_AUTOML_CONFIG) base_automl_config["hyperopt"]["executor"].update(experiment_resources) base_automl_config["hyperopt"]["executor"]["time_budget_s"] = time_limit_s if time_limit_s is not None: base_automl_config["hyperopt"]["sampler"]["scheduler"][ "max_t"] = time_limit_s base_automl_config.update(input_and_output_feature_config) model_configs["base_config"] = base_automl_config # read in all encoder configs for feat_type, default_configs in encoder_defaults.items(): if feat_type not in model_configs.keys(): model_configs[feat_type] = {} else: for encoder_name, encoder_config_path in default_configs.items(): model_configs[feat_type][encoder_name] = load_yaml( encoder_config_path) # read in all combiner configs model_configs["combiner"] = {} for combiner_type, default_config in combiner_defaults.items(): combiner_config = load_yaml(default_config) model_configs["combiner"][combiner_type] = combiner_config return model_configs