Esempio n. 1
0
def run(training_function,
        init_config: dict,
        config: Optional[dict] = None,
        cat_hp_cost: Optional[dict] = None,
        metric: Optional[str] = None,
        mode: Optional[str] = None,
        time_budget_s: Union[int, float, datetime.timedelta] = None,
        prune_attr: Optional[str] = None,
        min_resource: Optional[float] = None,
        max_resource: Optional[float] = None,
        reduction_factor: Optional[float] = None,
        report_intermediate_result: Optional[bool] = False,
        search_alg = None,
        verbose: Optional[int] = 2, 
        local_dir: Optional[str] = None,
        num_samples: Optional[int] = 1,
        resources_per_trial: Optional[dict] = None,
        mem_size = None,
        use_ray: Optional[bool] = False,
        ):
    '''The trigger for HPO.

    Example:

    .. code-block:: python

        import time
        from flaml import tune

        def compute_with_config(config):
            current_time = time.time()
            metric2minimize = (round(config['x'])-95000)**2
            time2eval = time.time() - current_time
            tune.report(metric2minimize=metric2minimize, time2eval=time2eval)

        analysis = tune.run(
            compute_with_config,
            init_config={},
            config={
                'x': tune.qloguniform(lower=1, upper=1000000, q=1),
                'y': tune.randint(lower=1, upper=1000000)
            },
            metric='metric2minimize', mode='min',
            num_samples=-1, time_budget_s=60, use_ray=False)
            
        print(analysis.trials[-1].last_result)

    Args:
        training_function: A user-defined training function. 
        init_config: A dictionary from a subset of controlled dimensions
            to the initial low-cost values. e.g.,            

            .. code-block:: python

                {'epochs': 1}

            If no such dimension, pass an empty dict {}.
        config: A dictionary to specify the search space.
        cat_hp_cost: A dictionary from a subset of categorical dimensions
            to the relative cost of each choice. 
            e.g.,
            
            .. code-block:: python

                {'tree_method': [1, 1, 2]}
            
            i.e., the relative cost of the 
            three choices of 'tree_method' is 1, 1 and 2 respectively
        metric: A string of the metric name to optimize for.
        mode: A string in ['min', 'max'] to specify the objective as
            minimization or maximization.
        time_budget_s: A float of the time budget in seconds.
        prune_attr: A string of the attribute used for pruning. 
            Not necessarily in space.
            When prune_attr is in space, it is a hyperparameter, e.g., 
                'n_iters', and the best value is unknown.
            When prune_attr is not in space, it is a resource dimension, 
                e.g., 'sample_size', and the peak performance is assumed
                to be at the max_resource.
        min_resource: A float of the minimal resource to use for the 
            prune_attr; only valid if prune_attr is not in space.
        max_resource: A float of the maximal resource to use for the 
            prune_attr; only valid if prune_attr is not in space.
        reduction_factor: A float of the reduction factor used for incremental
            pruning.
        report_intermediate_result: A boolean of whether intermediate results
            are reported. If so, early stopping and pruning can be used.
        search_alg: An instance of BlendSearch as the search algorithm
            to be used. The same instance can be used for iterative tuning. 
            e.g.,

            .. code-block:: python

                from flaml import BlendSearch
                algo = BlendSearch(metric='val_loss', mode='min',
                        space=search_space,
                        points_to_evaluate=points_to_evaluate)
                for i in range(10):
                    analysis = tune.run(compute_with_config, init_config=None,
                        search_alg=algo, use_ray=False)
                    print(analysis.trials[-1].last_result)
                    
        verbose: 0, 1, 2, or 3. Verbosity mode for ray if ray backend is used.
            0 = silent, 1 = only status updates, 2 = status and brief trial
            results, 3 = status and detailed trial results. Defaults to 2.
        local_dir: A string of the local dir to save ray logs if ray backend is
            used.
        num_samples: An integer of the number of configs to try. Defaults to 1.
        resources_per_trial: A dictionary of the hardware resources to allocate
            per trial, e.g., `{'mem': 1024**3}`. When not using ray backend,
            only 'mem' is used as approximate resource constraints
            (in conjunction with mem_size).
        mem_size: A function to estimate the memory size for a given config.
            It is used to skip configs which do not fit in memory.
        use_ray: A boolean of whether to use ray as the backend
    '''        
    global _use_ray
    global _verbose
    if not use_ray:
        _verbose = verbose
        if verbose > 0:
            import os
            os.makedirs(local_dir, exist_ok=True)
            logger.addHandler(logging.FileHandler(local_dir+'/tune_'+str(
                datetime.datetime.now())+'.log'))
            if verbose<=2:
                logger.setLevel(logging.INFO)
            else:
                logger.setLevel(logging.DEBUG)
        else:
            logger.setLevel(logging.CRITICAL)

    if search_alg is None:
        from ..searcher.blendsearch import BlendSearch
        search_alg = BlendSearch(points_to_evaluate=[init_config], 
                                metric=metric, mode=mode,
                                cat_hp_cost=cat_hp_cost,
                                space=config, prune_attr=prune_attr,
                                min_resource=min_resource,
                                max_resource=max_resource,
                                reduction_factor=reduction_factor,
                                resources_per_trial=resources_per_trial,
                                mem_size=mem_size)
    if time_budget_s:
        search_alg.set_search_properties(metric, mode, config={
            'time_budget_s':time_budget_s})
    if report_intermediate_result:
        params = {}
        # scheduler resource_dimension=prune_attr
        if prune_attr: params['time_attr'] = prune_attr
        if max_resource: params['max_t'] = max_resource
        if min_resource: params['grace_period'] = min_resource
        if reduction_factor: params['reduction_factor'] = reduction_factor
        try:
            from ray.tune.schedulers import ASHAScheduler
            scheduler = ASHAScheduler(**params)
        except:
            scheduler = None
    else:
        scheduler = None

    if use_ray:
        try:
            from ray import tune
        except:
            raise ImportError("Failed to import ray tune. "
            "Please install ray[tune] or set use_ray=False")
        _use_ray = True
        return tune.run(training_function,
                metric=metric,
                mode=mode,
                search_alg=search_alg,
                scheduler=scheduler,
                time_budget_s=time_budget_s,
                verbose=verbose,
                local_dir=local_dir,
                num_samples=num_samples,
                resources_per_trial=resources_per_trial
                )

    # simple sequential run without using tune.run() from ray
    time_start = time.time()    
    _use_ray = False
    if scheduler:
        scheduler.set_search_properties(metric=metric, mode=mode)
    from .trial_runner import SequentialTrialRunner
    global _runner
    _runner = SequentialTrialRunner(
        search_alg=search_alg,
        scheduler=scheduler,
        metric=metric,
        mode=mode,
        )
    num_trials = 0
    while time.time()-time_start<time_budget_s and (
        num_samples<0 or num_trials<num_samples):
        trial_to_run = _runner.step()
        if trial_to_run:
            num_trials += 1
            if verbose:
                logger.info(f'trial {num_trials} config: {trial_to_run.config}')
            training_function(trial_to_run.config)
            _runner.stop_trial(trial_to_run)
    return ExperimentAnalysis(_runner.get_trials(), metric=metric, mode=mode)
Esempio n. 2
0
def run(
    evaluation_function,
    config: Optional[dict] = None,
    low_cost_partial_config: Optional[dict] = None,
    cat_hp_cost: Optional[dict] = None,
    metric: Optional[str] = None,
    mode: Optional[str] = None,
    time_budget_s: Union[int, float] = None,
    points_to_evaluate: Optional[List[dict]] = None,
    evaluated_rewards: Optional[List] = None,
    resource_attr: Optional[str] = None,
    min_resource: Optional[float] = None,
    max_resource: Optional[float] = None,
    reduction_factor: Optional[float] = None,
    scheduler=None,
    search_alg=None,
    verbose: Optional[int] = 2,
    local_dir: Optional[str] = None,
    num_samples: Optional[int] = 1,
    resources_per_trial: Optional[dict] = None,
    config_constraints: Optional[List[Tuple[Callable[[dict], float], str,
                                            float]]] = None,
    metric_constraints: Optional[List[Tuple[str, str, float]]] = None,
    max_failure: Optional[int] = 100,
    use_ray: Optional[bool] = False,
    use_incumbent_result_in_evaluation: Optional[bool] = None,
):
    """The trigger for HPO.

    Example:

    ```python
    import time
    from flaml import tune

    def compute_with_config(config):
        current_time = time.time()
        metric2minimize = (round(config['x'])-95000)**2
        time2eval = time.time() - current_time
        tune.report(metric2minimize=metric2minimize, time2eval=time2eval)

    analysis = tune.run(
        compute_with_config,
        config={
            'x': tune.lograndint(lower=1, upper=1000000),
            'y': tune.randint(lower=1, upper=1000000)
        },
        metric='metric2minimize', mode='min',
        num_samples=-1, time_budget_s=60, use_ray=False)

    print(analysis.trials[-1].last_result)
    ```

    Args:
        evaluation_function: A user-defined evaluation function.
            It takes a configuration as input, outputs a evaluation
            result (can be a numerical value or a dictionary of string
            and numerical value pairs) for the input configuration.
            For machine learning tasks, it usually involves training and
            scoring a machine learning model, e.g., through validation loss.
        config: A dictionary to specify the search space.
        low_cost_partial_config: A dictionary from a subset of
            controlled dimensions to the initial low-cost values.
            e.g., ```{'n_estimators': 4, 'max_leaves': 4}```

        cat_hp_cost: A dictionary from a subset of categorical dimensions
            to the relative cost of each choice.
            e.g., ```{'tree_method': [1, 1, 2]}```
            i.e., the relative cost of the
            three choices of 'tree_method' is 1, 1 and 2 respectively
        metric: A string of the metric name to optimize for.
        mode: A string in ['min', 'max'] to specify the objective as
            minimization or maximization.
        time_budget_s: int or float | The time budget in seconds.
        points_to_evaluate: A list of initial hyperparameter
            configurations to run first.
        evaluated_rewards (list): If you have previously evaluated the
            parameters passed in as points_to_evaluate you can avoid
            re-running those trials by passing in the reward attributes
            as a list so the optimiser can be told the results without
            needing to re-compute the trial. Must be the same length as
            points_to_evaluate.
            e.g.,

    ```python
    points_to_evaluate = [
        {"b": .99, "cost_related": {"a": 3}},
        {"b": .99, "cost_related": {"a": 2}},
    ]
    evaluated_rewards=[3.0, 1.0]
    ```

            means that you know the reward for the two configs in
            points_to_evaluate are 3.0 and 1.0 respectively and want to
            inform run().

        resource_attr: A string to specify the resource dimension used by
            the scheduler via "scheduler".
        min_resource: A float of the minimal resource to use for the resource_attr.
        max_resource: A float of the maximal resource to use for the resource_attr.
        reduction_factor: A float of the reduction factor used for incremental
            pruning.
        scheduler: A scheduler for executing the experiment. Can be None, 'flaml',
            'asha' or a custom instance of the TrialScheduler class. Default is None:
            in this case when resource_attr is provided, the 'flaml' scheduler will be
            used, otherwise no scheduler will be used. When set 'flaml', an
            authentic scheduler implemented in FLAML will be used. It does not
            require users to report intermediate results in evaluation_function.
            Find more details about this scheduler in this paper
            https://arxiv.org/pdf/1911.04706.pdf).
            When set 'asha', the input for arguments "resource_attr",
            "min_resource", "max_resource" and "reduction_factor" will be passed
            to ASHA's "time_attr",  "max_t", "grace_period" and "reduction_factor"
            respectively. You can also provide a self-defined scheduler instance
            of the TrialScheduler class. When 'asha' or self-defined scheduler is
            used, you usually need to report intermediate results in the evaluation
            function. Please find examples using different types of schedulers
            and how to set up the corresponding evaluation functions in
            test/tune/test_scheduler.py. TODO: point to notebook examples.
        search_alg: An instance of BlendSearch as the search algorithm
            to be used. The same instance can be used for iterative tuning.
            e.g.,

    ```python
    from flaml import BlendSearch
    algo = BlendSearch(metric='val_loss', mode='min',
            space=search_space,
            low_cost_partial_config=low_cost_partial_config)
    for i in range(10):
        analysis = tune.run(compute_with_config,
            search_alg=algo, use_ray=False)
        print(analysis.trials[-1].last_result)
    ```

        verbose: 0, 1, 2, or 3. Verbosity mode for ray if ray backend is used.
            0 = silent, 1 = only status updates, 2 = status and brief trial
            results, 3 = status and detailed trial results. Defaults to 2.
        local_dir: A string of the local dir to save ray logs if ray backend is
            used; or a local dir to save the tuning log.
        num_samples: An integer of the number of configs to try. Defaults to 1.
        resources_per_trial: A dictionary of the hardware resources to allocate
            per trial, e.g., `{'cpu': 1}`. It is only valid when using ray backend
            (by setting 'use_ray = True'). It shall be used when you need to do
            [parallel tuning](https://microsoft.github.io/FLAML/docs/Use-Cases/Tune-User-Defined-Function#parallel-tuning).
        config_constraints: A list of config constraints to be satisfied.
            e.g., ```config_constraints = [(mem_size, '<=', 1024**3)]```

            mem_size is a function which produces a float number for the bytes
            needed for a config.
            It is used to skip configs which do not fit in memory.
        metric_constraints: A list of metric constraints to be satisfied.
            e.g., `['precision', '>=', 0.9]`. The sign can be ">=" or "<=".
        max_failure: int | the maximal consecutive number of failures to sample
            a trial before the tuning is terminated.
        use_ray: A boolean of whether to use ray as the backend.
    """
    global _use_ray
    global _verbose
    if not use_ray:
        _verbose = verbose
        if verbose > 0:
            import os

            if local_dir:
                os.makedirs(local_dir, exist_ok=True)
                logger.addHandler(
                    logging.FileHandler(
                        local_dir + "/tune_" +
                        str(datetime.datetime.now()).replace(":", "-") +
                        ".log"))
            elif not logger.handlers:
                # Add the console handler.
                _ch = logging.StreamHandler()
                logger_formatter = logging.Formatter(
                    "[%(name)s: %(asctime)s] {%(lineno)d} %(levelname)s - %(message)s",
                    "%m-%d %H:%M:%S",
                )
                _ch.setFormatter(logger_formatter)
                logger.addHandler(_ch)
            if verbose <= 2:
                logger.setLevel(logging.INFO)
            else:
                logger.setLevel(logging.DEBUG)
        else:
            logger.setLevel(logging.CRITICAL)

    from ..searcher.blendsearch import BlendSearch, CFO

    if search_alg is None:
        flaml_scheduler_resource_attr = (
            flaml_scheduler_min_resource
        ) = flaml_scheduler_max_resource = flaml_scheduler_reduction_factor = None
        if scheduler in (None, "flaml"):

            # when scheduler is set 'flaml', we will use a scheduler that is
            # authentic to the search algorithms in flaml. After setting up
            # the search algorithm accordingly, we need to set scheduler to
            # None in case it is later used in the trial runner.
            flaml_scheduler_resource_attr = resource_attr
            flaml_scheduler_min_resource = min_resource
            flaml_scheduler_max_resource = max_resource
            flaml_scheduler_reduction_factor = reduction_factor
            scheduler = None
        try:
            import optuna

            SearchAlgorithm = BlendSearch
        except ImportError:
            SearchAlgorithm = CFO
            logger.warning(
                "Using CFO for search. To use BlendSearch, run: pip install flaml[blendsearch]"
            )

        search_alg = SearchAlgorithm(
            metric=metric or DEFAULT_METRIC,
            mode=mode,
            space=config,
            points_to_evaluate=points_to_evaluate,
            evaluated_rewards=evaluated_rewards,
            low_cost_partial_config=low_cost_partial_config,
            cat_hp_cost=cat_hp_cost,
            time_budget_s=time_budget_s,
            num_samples=num_samples,
            resource_attr=flaml_scheduler_resource_attr,
            min_resource=flaml_scheduler_min_resource,
            max_resource=flaml_scheduler_max_resource,
            reduction_factor=flaml_scheduler_reduction_factor,
            config_constraints=config_constraints,
            metric_constraints=metric_constraints,
            use_incumbent_result_in_evaluation=
            use_incumbent_result_in_evaluation,
        )
    else:
        if metric is None or mode is None:
            metric = metric or search_alg.metric
            mode = mode or search_alg.mode
        if ray_import:
            from ray.tune.suggest import ConcurrencyLimiter
        else:
            from flaml.searcher.suggestion import ConcurrencyLimiter
        if (search_alg.__class__.__name__ in [
                "BlendSearch",
                "CFO",
                "CFOCat",
        ] and use_incumbent_result_in_evaluation is not None):
            search_alg.use_incumbent_result_in_evaluation = (
                use_incumbent_result_in_evaluation)
        searcher = (search_alg.searcher if isinstance(
            search_alg, ConcurrencyLimiter) else search_alg)
        if isinstance(searcher, BlendSearch):
            setting = {}
            if time_budget_s:
                setting["time_budget_s"] = time_budget_s
            if num_samples > 0:
                setting["num_samples"] = num_samples
            searcher.set_search_properties(metric, mode, config, setting)
        else:
            searcher.set_search_properties(metric, mode, config)
    if scheduler == "asha":
        params = {}
        # scheduler resource_dimension=resource_attr
        if resource_attr:
            params["time_attr"] = resource_attr
        if max_resource:
            params["max_t"] = max_resource
        if min_resource:
            params["grace_period"] = min_resource
        if reduction_factor:
            params["reduction_factor"] = reduction_factor
        if ray_import:
            from ray.tune.schedulers import ASHAScheduler

            scheduler = ASHAScheduler(**params)
    if use_ray:
        try:
            from ray import tune
        except ImportError:
            raise ImportError("Failed to import ray tune. "
                              "Please install ray[tune] or set use_ray=False")
        _use_ray = True
        return tune.run(
            evaluation_function,
            metric=metric,
            mode=mode,
            search_alg=search_alg,
            scheduler=scheduler,
            time_budget_s=time_budget_s,
            verbose=verbose,
            local_dir=local_dir,
            num_samples=num_samples,
            resources_per_trial=resources_per_trial,
        )

    # simple sequential run without using tune.run() from ray
    time_start = time.time()
    _use_ray = False
    if scheduler:
        scheduler.set_search_properties(metric=metric, mode=mode)
    from .trial_runner import SequentialTrialRunner

    global _runner
    _runner = SequentialTrialRunner(
        search_alg=search_alg,
        scheduler=scheduler,
        metric=metric,
        mode=mode,
    )
    num_trials = 0
    if time_budget_s is None:
        time_budget_s = np.inf
    fail = 0
    ub = (len(evaluated_rewards) if evaluated_rewards else 0) + max_failure
    while (time.time() - time_start < time_budget_s
           and (num_samples < 0 or num_trials < num_samples) and fail < ub):
        trial_to_run = _runner.step()
        if trial_to_run:
            num_trials += 1
            if verbose:
                logger.info(
                    f"trial {num_trials} config: {trial_to_run.config}")
            result = evaluation_function(trial_to_run.config)
            if result is not None:
                if isinstance(result, dict):
                    report(**result)
                else:
                    report(_metric=result)
            _runner.stop_trial(trial_to_run)
            fail = 0
        else:
            fail += 1  # break with ub consecutive failures
    if fail == ub:
        logger.warning(
            f"fail to sample a trial for {max_failure} times in a row, stopping."
        )
    if verbose > 0:
        logger.handlers.clear()
    return ExperimentAnalysis(_runner.get_trials(), metric=metric, mode=mode)