Beispiel #1
0
def create_auto_config(
    dataset: Union[str, pd.DataFrame, dd.core.DataFrame, DatasetInfo],
    target: Union[str, List[str]],
    time_limit_s: Union[int, float],
    tune_for_memory: bool,
    user_config: Dict = None,
) -> dict:
    """Returns an auto-generated Ludwig config with the intent of training the best model on given given dataset /
    target in the given time limit.

    # Inputs
    :param dataset: (str, pd.DataFrame, dd.core.DataFrame, DatasetInfo) data source to train over.
    :param target: (str, List[str]) name of target feature
    :param time_limit_s: (int, float) total time allocated to auto_train. acts
                         as the stopping parameter
    :param tune_for_memroy: (bool) refine hyperopt search space for available
                            host / GPU memory

    # Return
    :return: (dict) selected model configuration
    """
    default_configs = _create_default_config(dataset, target, time_limit_s)
    model_config = _model_select(dataset, default_configs, user_config)
    if tune_for_memory:
        if ray.is_initialized():
            model_config, _ = ray.get(
                ray.remote(num_cpus=1)(memory_tune_config).remote(
                    model_config, dataset))
        else:
            model_config, _ = memory_tune_config(model_config, dataset)
    return model_config
Beispiel #2
0
def create_auto_config(
    dataset: Union[str, pd.DataFrame, dd.core.DataFrame, DatasetInfo],
    target: str,
    time_limit_s: Union[int, float],
    tune_for_memory: bool,
) -> dict:
    """
    Returns an auto-generated Ludwig config with the intent of training
    the best model on given given dataset / target in the given time
    limit.

    # Inputs
    :param dataset: (str) filepath to dataset.
    :param target: (str) name of target feature
    :param time_limit_s: (int, float) total time allocated to auto_train. acts
                                    as the stopping parameter

    # Return
    :return: (dict) selected model configuration
    """
    default_configs = _create_default_config(dataset, target, time_limit_s)
    model_config = _model_select(default_configs)
    if tune_for_memory:
        if ray.is_initialized():
            model_config, _ = ray.get(
                ray.remote(num_cpus=1)(memory_tune_config).remote(
                    model_config, dataset))
        else:
            model_config, _ = memory_tune_config(model_config, dataset)
    return model_config
Beispiel #3
0
def create_auto_config(
    dataset: Union[str, pd.DataFrame, dd.core.DataFrame, DatasetInfo],
    target: Union[str, List[str]],
    time_limit_s: Union[int, float],
    tune_for_memory: bool,
    user_config: Dict = None,
    random_seed: int = default_random_seed,
    use_reference_config: bool = False,
) -> dict:
    """Returns an auto-generated Ludwig config with the intent of training the best model on given given dataset /
    target in the given time limit.

    # Inputs
    :param dataset: (str, pd.DataFrame, dd.core.DataFrame, DatasetInfo) data source to train over.
    :param target: (str, List[str]) name of target feature
    :param time_limit_s: (int, float) total time allocated to auto_train. acts
                         as the stopping parameter
    :param tune_for_memory: (bool) refine hyperopt search space for available
                            host / GPU memory
    :param user_config: (dict) override automatic selection of specified config items
    :param random_seed: (int, default: `42`) a random seed that will be used anywhere
                        there is a call to a random number generator, including
                        hyperparameter search sampling, as well as data splitting,
                        parameter initialization and training set shuffling
    :param use_reference_config: (bool) refine hyperopt search space by setting first
                                 search point from reference model config, if any

    # Return
    :return: (dict) selected model configuration
    """
    default_configs = _create_default_config(dataset, target, time_limit_s,
                                             random_seed)
    model_config, model_category, row_count = _model_select(
        dataset, default_configs, user_config, use_reference_config)
    if tune_for_memory:
        if ray.is_initialized():
            resources = get_available_resources()  # check if cluster has GPUS
            if resources["gpu"] > 0:
                model_config, fits_in_memory = ray.get(
                    ray.remote(num_gpus=1, num_cpus=1,
                               max_calls=1)(memory_tune_config).remote(
                                   model_config, dataset, model_category,
                                   row_count))
            else:
                model_config, fits_in_memory = ray.get(
                    ray.remote(num_cpus=1)(memory_tune_config).remote(
                        model_config, dataset, model_category, row_count))
        else:
            model_config, fits_in_memory = memory_tune_config(
                model_config, dataset, model_category, row_count)
        if not fits_in_memory:
            warnings.warn(
                "AutoML with tune_for_memory enabled did not return estimation that model will fit in memory. "
                "If out-of-memory occurs, consider setting AutoML user_config to reduce model memory footprint. "
            )
    return model_config