Esempio n. 1
0
def test_config_constraint():
    from flaml import tune

    # Test dict return value
    def evaluate_config_dict(config):
        metric = (round(config["x"]) - 85000)**2 - config["x"] / config["y"]
        return {"metric": metric}

    def config_constraint(config):
        if config["y"] >= config["x"]:
            return 1
        else:
            return 0

    tune.run(
        evaluate_config_dict,
        config={
            "x": tune.qloguniform(lower=1, upper=100000, q=1),
            "y": tune.qrandint(lower=2, upper=100000, q=2),
        },
        config_constraints=[(config_constraint, ">", 0.5)],
        metric="metric",
        mode="max",
        num_samples=100,
    )
Esempio n. 2
0
def test_run_training_function_return_value():
    from flaml import tune

    # Test dict return value
    def evaluate_config_dict(config):
        metric = (round(config["x"]) - 85000)**2 - config["x"] / config["y"]
        return {"metric": metric}

    tune.run(
        evaluate_config_dict,
        config={
            "x": tune.qloguniform(lower=1, upper=100000, q=1),
            "y": tune.qrandint(lower=2, upper=100000, q=2),
        },
        metric="metric",
        mode="max",
        num_samples=100,
    )

    # Test scalar return value
    def evaluate_config_scalar(config):
        metric = (round(config["x"]) - 85000)**2 - config["x"] / config["y"]
        return metric

    tune.run(
        evaluate_config_scalar,
        config={
            "x": tune.qloguniform(lower=1, upper=100000, q=1),
            "y": tune.qlograndint(lower=2, upper=100000, q=2),
        },
        num_samples=100,
        mode="max",
    )
Esempio n. 3
0
def test_nested():
    from flaml import tune
    search_space = {
        # test nested search space
        "cost_related": {
            "a": tune.randint(1, 8),
        },
        "b": tune.uniform(0.5, 1.0),
    }

    def simple_func(config):
        obj = (config["cost_related"]["a"] - 4)**2 \
            + (config["b"] - config["cost_related"]["a"])**2
        tune.report(obj=obj)
        tune.report(obj=obj, ab=config["cost_related"]["a"] * config["b"])

    analysis = tune.run(simple_func,
                        config=search_space,
                        low_cost_partial_config={"cost_related": {
                            "a": 1
                        }},
                        metric="obj",
                        mode="min",
                        metric_constraints=[("ab", "<=", 4)],
                        local_dir='logs/',
                        num_samples=-1,
                        time_budget_s=1)

    best_trial = analysis.get_best_trial()
    logger.info(f"Best config: {best_trial.config}")
    logger.info(f"Best result: {best_trial.last_result}")
Esempio n. 4
0
def test_tune_lgbm_csv():
    # load a built-in search space from flaml
    flaml_lgbm_search_space = LGBMEstimator.search_space(X_train.shape)
    # specify the search space as a dict from hp name to domain; you can define your own search space same way
    config_search_space = {
        hp: space["domain"]
        for hp, space in flaml_lgbm_search_space.items()
    }
    # give guidance about hp values corresponding to low training cost, i.e., {"n_estimators": 4, "num_leaves": 4}
    low_cost_partial_config = {
        hp: space["low_cost_init_value"]
        for hp, space in flaml_lgbm_search_space.items()
        if "low_cost_init_value" in space
    }
    # initial points to evaluate
    points_to_evaluate = [{
        hp: space["init_value"]
        for hp, space in flaml_lgbm_search_space.items()
        if "init_value" in space
    }]
    # run the tuning, minimizing mse, with total time budget 3 seconds
    analysis = tune.run(
        train_lgbm,
        metric="mse",
        mode="min",
        config=config_search_space,
        low_cost_partial_config=low_cost_partial_config,
        points_to_evaluate=points_to_evaluate,
        time_budget_s=3,
        num_samples=-1,
    )
    print(analysis.best_result)
Esempio n. 5
0
def _test_flaml_raytune_consistency(num_samples=-1,
                                    max_concurrent_trials=1,
                                    searcher_name="cfo"):
    try:
        from ray import tune as raytune
    except ImportError:
        print(
            "skip _test_flaml_raytune_consistency because ray tune cannot be imported."
        )
        return
    np.random.seed(100)
    searcher = setup_searcher(searcher_name)
    analysis = tune.run(
        evaluate_config,  # the function to evaluate a config
        config=config_search_space,  # the search space
        low_cost_partial_config=
        low_cost_partial_config,  # a initial (partial) config with low cost
        metric="metric",  # the name of the metric used for optimization
        mode="min",  # the optimization mode, 'min' or 'max'
        num_samples=
        num_samples,  # the maximal number of configs to try, -1 means infinite
        time_budget_s=None,  # the time budget in seconds
        local_dir="logs/",  # the local directory to store logs
        search_alg=searcher,
        # verbose=0,          # verbosity
        # use_ray=True, # uncomment when performing parallel tuning using ray
    )
    flaml_best_config = analysis.best_config
    flaml_config_in_results = [v["config"] for v in analysis.results.values()]
    print(analysis.best_trial.last_result)  # the best trial's result
    print("best flaml", searcher_name, flaml_best_config)  # the best config
    print("flaml config in results", searcher_name, flaml_config_in_results)

    np.random.seed(100)
    searcher = setup_searcher(searcher_name)
    from ray.tune.suggest import ConcurrencyLimiter

    search_alg = ConcurrencyLimiter(searcher, max_concurrent_trials)
    analysis = raytune.run(
        evaluate_config,  # the function to evaluate a config
        config=config_search_space,
        metric="metric",  # the name of the metric used for optimization
        mode="min",  # the optimization mode, 'min' or 'max'
        num_samples=
        num_samples,  # the maximal number of configs to try, -1 means infinite
        local_dir="logs/",  # the local directory to store logs
        # max_concurrent_trials=max_concurrent_trials,
        # resources_per_trial={"cpu": max_concurrent_trials, "gpu": 0},
        search_alg=search_alg,
    )
    ray_best_config = analysis.best_config
    ray_config_in_results = [v["config"] for v in analysis.results.values()]
    print(analysis.best_trial.last_result)  # the best trial's result
    print("ray best", searcher_name, analysis.best_config)  # the best config
    print("ray config in results", searcher_name, ray_config_in_results)
    assert ray_best_config == flaml_best_config, "best config should be the same"
    assert (flaml_config_in_results == ray_config_in_results
            ), "results from raytune and flaml should be the same"
Esempio n. 6
0
def test_simple(method=None):
    automl = AutoML()
    automl.add_learner(learner_name="XGBoost2D", learner_class=XGBoost2D)

    automl_settings = {
        "estimator_list": ["XGBoost2D"],
        "task": "classification",
        "log_file_name": f"test/xgboost2d_{dataset}_{method}.log",
        "n_jobs": 1,
        "hpo_method": method,
        "log_type": "all",
        "retrain_full": "budget",
        "keep_search_state": True,
        "time_budget": 1,
    }
    from sklearn.externals._arff import ArffException

    try:
        X, y = fetch_openml(name=dataset, return_X_y=True)
    except (ArffException, ValueError):
        from sklearn.datasets import load_wine

        X, y = load_wine(return_X_y=True)
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.33,
                                                        random_state=42)
    automl.fit(X_train=X_train, y_train=y_train, **automl_settings)
    print(automl.estimator_list)
    print(automl.search_space)
    print(automl.points_to_evaluate)
    config = automl.best_config.copy()
    config["learner"] = automl.best_estimator
    automl.trainable(config)
    from flaml import tune
    from flaml.automl import size
    from functools import partial

    analysis = tune.run(
        automl.trainable,
        automl.search_space,
        metric="val_loss",
        mode="min",
        low_cost_partial_config=automl.low_cost_partial_config,
        points_to_evaluate=automl.points_to_evaluate,
        cat_hp_cost=automl.cat_hp_cost,
        resource_attr=automl.resource_attr,
        min_resource=automl.min_resource,
        max_resource=automl.max_resource,
        time_budget_s=automl._state.time_budget,
        config_constraints=[(partial(size,
                                     automl._state), "<=", automl._mem_thres)],
        metric_constraints=automl.metric_constraints,
        num_samples=5,
    )
    print(analysis.trials[-1])
Esempio n. 7
0
 def run_part_from_scratch(self):
     np.random.seed(162)
     search_alg, cost = self.set_basic_conf()
     search_alg = ConcurrencyLimiter(search_alg, 1)
     results_exp_1 = tune.run(
         cost, num_samples=5, search_alg=search_alg, verbose=0, local_dir=self.tmpdir
     )
     checkpoint_path = os.path.join(self.tmpdir, self.experiment_name)
     search_alg.save(checkpoint_path)
     return results_exp_1, np.random.get_state(), checkpoint_path
Esempio n. 8
0
def test_scheduler(scheduler=None):
    from functools import partial

    resource_attr = "samplesize"
    max_resource = 10000

    # specify the objective functions
    if scheduler is None:
        evaluation_obj = simple_obj
    elif scheduler == "flaml":
        evaluation_obj = partial(obj_w_suggested_resource, resource_attr)
    elif scheduler == "asha" or isinstance(scheduler, TrialScheduler):
        evaluation_obj = partial(obj_w_intermediate_report, max_resource)
    else:
        try:
            from ray.tune.schedulers import TrialScheduler as RayTuneTrialScheduler
        except ImportError:
            print(
                "skip this condition, which may require TrialScheduler from ray tune, \
                as ray tune cannot be imported.")
            return
        if isinstance(scheduler, RayTuneTrialScheduler):
            evaluation_obj = partial(obj_w_intermediate_report, max_resource)
        else:
            raise ValueError

    analysis = tune.run(
        evaluation_obj,
        config={
            "x": tune.uniform(5, 20),
            "y": tune.uniform(0, 10),
            "z": tune.uniform(0, 10),
        },
        metric="sphere_projection",
        mode="max",
        verbose=1,
        resource_attr=resource_attr,
        scheduler=scheduler,
        max_resource=max_resource,
        min_resource=100,
        reduction_factor=2,
        time_budget_s=1,
        num_samples=500,
    )

    print("Best hyperparameters found were: ", analysis.best_config)
    # print(analysis.get_best_trial)
    return analysis.best_config
Esempio n. 9
0
def test_nested():
    from flaml import tune
    search_space = {
        # test nested search space
        "cost_related": {
            "a": tune.randint(1, 8),
        },
        "b": tune.uniform(0.5, 1.0),
    }

    def simple_func(config):
        tune.report(metric=(config["cost_related"]["a"] - 4)**2 *
                    (config["b"] - 0.7)**2)

    analysis = tune.run(simple_func,
                        init_config={"cost_related": {
                            "a": 1,
                        }},
                        metric="metric",
                        mode="min",
                        config=search_space,
                        local_dir='logs/',
                        num_samples=-1,
                        time_budget_s=1)
Esempio n. 10
0
def test_searcher():
    from flaml.searcher.suggestion import OptunaSearch, Searcher, ConcurrencyLimiter
    from flaml.searcher.blendsearch import BlendSearch, CFO, RandomSearch
    from flaml.tune import sample as flamlsample

    searcher = Searcher()
    try:
        searcher = Searcher(metric=1, mode=1)
    except ValueError:
        # Mode must either be a list or string
        pass
    searcher = Searcher(metric=["m1", "m2"], mode=["max", "min"])
    searcher.set_search_properties(None, None, None)
    searcher.suggest = searcher.on_pause = searcher.on_unpause = lambda _: {}
    searcher.on_trial_complete = lambda trial_id, result, error: None
    searcher = ConcurrencyLimiter(searcher, max_concurrent=2, batch=True)
    searcher.on_trial_complete("t0")
    searcher.suggest("t1")
    searcher.suggest("t2")
    searcher.on_pause("t1")
    searcher.on_unpause("t1")
    searcher.suggest("t3")
    searcher.on_trial_complete("t1", {})
    searcher.on_trial_complete("t2", {})
    searcher.set_state({})
    print(searcher.get_state())
    import optuna

    config = {
        "a": optuna.distributions.UniformDistribution(6, 8),
        "b": optuna.distributions.LogUniformDistribution(1e-4, 1e-2),
    }
    searcher = OptunaSearch(["a", config["a"]], metric="m", mode="max")
    try:
        searcher.suggest("t0")
    except ValueError:
        # not enough values to unpack (expected 3, got 1)
        pass
    searcher = OptunaSearch(
        config,
        points_to_evaluate=[{
            "a": 6,
            "b": 1e-3
        }],
        evaluated_rewards=[{
            "m": 2
        }],
        metric="m",
        mode="max",
    )
    try:
        searcher.add_evaluated_point({}, None, error=True)
    except ValueError:
        # nconsistent parameters set() and distributions {'b', 'a'}.
        pass
    try:
        searcher.add_evaluated_point({"a", 1, "b", 0.01}, None, pruned=True)
    except AttributeError:
        # 'set' object has no attribute 'keys'
        pass
    try:
        searcher.add_evaluated_point({
            "a": 1,
            "b": 0.01
        },
                                     None,
                                     intermediate_values=[0.1])
    except ValueError:
        # `value` is supposed to be set for a complete trial.
        pass
    try:
        searcher = OptunaSearch(config, points_to_evaluate=1)
    except TypeError:
        # points_to_evaluate expected to be a list, got <class 'int'>
        pass
    try:
        searcher = OptunaSearch(config, points_to_evaluate=[1])
    except TypeError:
        # points_to_evaluate expected to include list or dict
        pass
    try:
        searcher = OptunaSearch(config, points_to_evaluate=[{"a": 1}])
    except ValueError:
        # Dim of point {'a': 1} and parameter_names {'a': UniformDistribution(high=8.0, low=6.0), 'b': LogUniformDistribution(high=0.01, low=0.0001)} do not match.
        pass
    try:
        searcher = OptunaSearch(config,
                                points_to_evaluate=[{
                                    "a": 1,
                                    "b": 0.01
                                }],
                                evaluated_rewards=1)
    except TypeError:
        # valuated_rewards expected to be a list, got <class 'int'>.
        pass
    try:
        searcher = OptunaSearch(config,
                                points_to_evaluate=[{
                                    "a": 1,
                                    "b": 0.01
                                }],
                                evaluated_rewards=[1, 2])
    except ValueError:
        # Dim of evaluated_rewards [1, 2] and points_to_evaluate [{'a': 1, 'b': 0.01}] do not match.
        pass
    config = {"a": sample.uniform(6, 8), "b": sample.loguniform(1e-4, 1e-2)}
    OptunaSearch.convert_search_space({"a": 1})
    try:
        OptunaSearch.convert_search_space({"a": {"grid_search": [1, 2]}})
    except ValueError:
        # Grid search parameters cannot be automatically converted to an Optuna search space.
        pass
    OptunaSearch.convert_search_space({"a": flamlsample.quniform(1, 3, 1)})
    try:
        searcher = OptunaSearch(
            config,
            points_to_evaluate=[{
                "a": 6,
                "b": 1e-3
            }],
            evaluated_rewards=[{
                "m": 2
            }],
            metric="m",
            mode="max",
        )
    except ValueError:
        # Optuna search does not support parameters of type `Float` with samplers of type `_Uniform`
        pass
    searcher = OptunaSearch(long_define_search_space, metric="m", mode="min")
    try:
        searcher.suggest("t0")
    except TypeError:
        # The return value of the define-by-run function passed in the `space` argument should be either None or a `dict` with `str` keys.
        pass
    searcher = OptunaSearch(wrong_define_search_space, metric="m", mode="min")
    try:
        searcher.suggest("t0")
    except TypeError:
        # At least one of the keys in the dict returned by the define-by-run function passed in the `space` argument was not a `str`.
        pass
    searcher = OptunaSearch(metric="m", mode="min")
    try:
        searcher.suggest("t0")
    except RuntimeError:
        # Trying to sample a configuration from OptunaSearch, but no search space has been defined.
        pass
    try:
        searcher.add_evaluated_point({}, 1)
    except RuntimeError:
        # Trying to sample a configuration from OptunaSearch, but no search space has been defined.
        pass
    searcher = OptunaSearch(define_search_space)
    try:
        searcher.suggest("t0")
    except RuntimeError:
        # Trying to sample a configuration from OptunaSearch, but the `metric` (None) or `mode` (None) parameters have not been set.
        pass
    try:
        searcher.add_evaluated_point({}, 1)
    except RuntimeError:
        # Trying to sample a configuration from OptunaSearch, but the `metric` (None) or `mode` (None) parameters have not been set.
        pass
    searcher = OptunaSearch(
        define_search_space,
        points_to_evaluate=[{
            "a": 6,
            "b": 1e-3
        }],
        # evaluated_rewards=[{'m': 2}], metric='m', mode='max'
        mode="max",
    )
    # searcher = OptunaSearch()
    # searcher.set_search_properties('m', 'min', define_search_space)
    searcher.set_search_properties("m", "min", config)
    searcher.suggest("t1")
    searcher.on_trial_complete("t1", None, False)
    searcher.suggest("t2")
    searcher.on_trial_complete("t2", None, True)
    searcher.suggest("t3")
    searcher.on_trial_complete("t3", {"m": np.nan})
    searcher.save("test/tune/optuna.pickle")
    searcher.restore("test/tune/optuna.pickle")
    try:
        searcher = BlendSearch(metric="m",
                               global_search_alg=searcher,
                               metric_constraints=[("c", "<", 1)])
    except AssertionError:
        # sign of metric constraints must be <= or >=.
        pass
    searcher = BlendSearch(metric="m",
                           global_search_alg=searcher,
                           metric_constraints=[("c", "<=", 1)])
    searcher.set_search_properties(metric="m2",
                                   config=config,
                                   setting={"time_budget_s": 0})
    c = searcher.suggest("t1")
    searcher.on_trial_complete("t1", {"config": c}, True)
    c = searcher.suggest("t2")
    searcher.on_trial_complete("t2", {
        "config": c,
        "m2": 1,
        "c": 2,
        "time_total_s": 1
    })
    config1 = config.copy()
    config1["_choice_"] = 0
    searcher._expand_admissible_region(
        lower={"root": [{
            "a": 0.5
        }, {
            "a": 0.4
        }]},
        upper={"root": [{
            "a": 0.9
        }, {
            "a": 0.8
        }]},
        space={"root": config1},
    )
    searcher = CFO(
        metric="m",
        mode="min",
        space=config,
        points_to_evaluate=[{
            "a": 7,
            "b": 1e-3
        }, {
            "a": 6,
            "b": 3e-4
        }],
        evaluated_rewards=[1, 1],
    )
    searcher.suggest("t1")
    searcher.suggest("t2")
    searcher.on_trial_result("t3", {})
    c = searcher.generate_parameters(1)
    searcher.receive_trial_result(1, c, {"default": 0})
    searcher.update_search_space({
        "a": {
            "_value": [1, 2],
            "_type": "choice",
        },
        "b": {
            "_value": [1, 3],
            "_type": "randint",
        },
        "c": {
            "_value": [0.1, 3],
            "_type": "uniform",
        },
        "d": {
            "_value": [2, 8, 2],
            "_type": "quniform",
        },
        "e": {
            "_value": [2, 8],
            "_type": "loguniform",
        },
        "f": {
            "_value": [2, 8, 2],
            "_type": "qloguniform",
        },
        "g": {
            "_value": [0, 2],
            "_type": "normal",
        },
        "h": {
            "_value": [0, 2, 2],
            "_type": "qnormal",
        },
    })
    np.random.seed(7654321)
    searcher = RandomSearch(
        space=config,
        points_to_evaluate=[{
            "a": 7,
            "b": 1e-3
        }, {
            "a": 6,
            "b": 3e-4
        }],
    )
    print(searcher.suggest("t1"))
    print(searcher.suggest("t2"))
    print(searcher.suggest("t3"))
    print(searcher.suggest("t4"))
    searcher.on_trial_complete({"t1"}, {})
    searcher.on_trial_result({"t2"}, {})
    np.random.seed(654321)
    searcher = RandomSearch(
        space=config,
        points_to_evaluate=[{
            "a": 7,
            "b": 1e-3
        }, {
            "a": 6,
            "b": 3e-4
        }],
    )
    print(searcher.suggest("t1"))
    print(searcher.suggest("t2"))
    print(searcher.suggest("t3"))
    searcher = RandomSearch(space={})
    print(searcher.suggest("t1"))
    searcher = BlendSearch(space={})
    print(searcher.suggest("t1"))
    from flaml import tune

    tune.run(lambda x: 1, config={}, use_ray=use_ray)
Esempio n. 11
0
def _test_xgboost(method="BlendSearch"):
    try:
        import ray
    except ImportError:
        return
    if method == "BlendSearch":
        from flaml import tune
    else:
        from ray import tune
    search_space = {
        "max_depth":
        tune.randint(1, 9)
        if method in ["BlendSearch", "BOHB", "Optuna"] else tune.randint(1, 9),
        "min_child_weight":
        tune.choice([1, 2, 3]),
        "subsample":
        tune.uniform(0.5, 1.0),
        "eta":
        tune.loguniform(1e-4, 1e-1),
    }
    max_iter = 10
    for num_samples in [128]:
        time_budget_s = 60
        for n_cpu in [2]:
            start_time = time.time()
            # ray.init(address='auto')
            if method == "BlendSearch":
                analysis = tune.run(
                    train_breast_cancer,
                    config=search_space,
                    low_cost_partial_config={
                        "max_depth": 1,
                    },
                    cat_hp_cost={
                        "min_child_weight": [6, 3, 2],
                    },
                    metric="eval-logloss",
                    mode="min",
                    max_resource=max_iter,
                    min_resource=1,
                    scheduler="asha",
                    # You can add "gpu": 0.1 to allocate GPUs
                    resources_per_trial={"cpu": 1},
                    local_dir="logs/",
                    num_samples=num_samples * n_cpu,
                    time_budget_s=time_budget_s,
                    use_ray=True,
                )
            else:
                if "ASHA" == method:
                    algo = None
                elif "BOHB" == method:
                    from ray.tune.schedulers import HyperBandForBOHB
                    from ray.tune.suggest.bohb import TuneBOHB

                    algo = TuneBOHB(max_concurrent=n_cpu)
                    scheduler = HyperBandForBOHB(max_t=max_iter)
                elif "Optuna" == method:
                    from ray.tune.suggest.optuna import OptunaSearch

                    algo = OptunaSearch()
                elif "CFO" == method:
                    from flaml import CFO

                    algo = CFO(
                        low_cost_partial_config={
                            "max_depth": 1,
                        },
                        cat_hp_cost={
                            "min_child_weight": [6, 3, 2],
                        },
                    )
                elif "CFOCat" == method:
                    from flaml.searcher.cfo_cat import CFOCat

                    algo = CFOCat(
                        low_cost_partial_config={
                            "max_depth": 1,
                        },
                        cat_hp_cost={
                            "min_child_weight": [6, 3, 2],
                        },
                    )
                elif "Dragonfly" == method:
                    from ray.tune.suggest.dragonfly import DragonflySearch

                    algo = DragonflySearch()
                elif "SkOpt" == method:
                    from ray.tune.suggest.skopt import SkOptSearch

                    algo = SkOptSearch()
                elif "Nevergrad" == method:
                    from ray.tune.suggest.nevergrad import NevergradSearch
                    import nevergrad as ng

                    algo = NevergradSearch(optimizer=ng.optimizers.OnePlusOne)
                elif "ZOOpt" == method:
                    from ray.tune.suggest.zoopt import ZOOptSearch

                    algo = ZOOptSearch(budget=num_samples * n_cpu)
                elif "Ax" == method:
                    from ray.tune.suggest.ax import AxSearch

                    algo = AxSearch()
                elif "HyperOpt" == method:
                    from ray.tune.suggest.hyperopt import HyperOptSearch

                    algo = HyperOptSearch()
                    scheduler = None
                if method != "BOHB":
                    from ray.tune.schedulers import ASHAScheduler

                    scheduler = ASHAScheduler(max_t=max_iter, grace_period=1)
                analysis = tune.run(
                    train_breast_cancer,
                    metric="eval-logloss",
                    mode="min",
                    # You can add "gpu": 0.1 to allocate GPUs
                    resources_per_trial={"cpu": 1},
                    config=search_space,
                    local_dir="logs/",
                    num_samples=num_samples * n_cpu,
                    time_budget_s=time_budget_s,
                    scheduler=scheduler,
                    search_alg=algo,
                )
            # # Load the best model checkpoint
            # import os
            # best_bst = xgb.Booster()
            # best_bst.load_model(os.path.join(analysis.best_checkpoint,
            #  "model.xgb"))
            best_trial = analysis.get_best_trial("eval-logloss", "min", "all")
            accuracy = 1.0 - best_trial.metric_analysis["eval-error"]["min"]
            logloss = best_trial.metric_analysis["eval-logloss"]["min"]
            logger.info(f"method={method}")
            logger.info(f"n_samples={num_samples*n_cpu}")
            logger.info(f"time={time.time()-start_time}")
            logger.info(f"Best model eval loss: {logloss:.4f}")
            logger.info(f"Best model total accuracy: {accuracy:.4f}")
            logger.info(f"Best model parameters: {best_trial.config}")
Esempio n. 12
0
 def run_full(self):
     np.random.seed(162)
     search_alg3, cost = self.set_basic_conf()
     search_alg3 = ConcurrencyLimiter(search_alg3, 1)
     return tune.run(cost, num_samples=10, search_alg=search_alg3, verbose=0)
Esempio n. 13
0
    def test_logging_level(self):

        from flaml import logger, logger_formatter

        with tempfile.TemporaryDirectory() as d:

            training_log = os.path.join(d, "training.log")

            # Configure logging for the FLAML logger
            # and add a handler that outputs to a buffer.
            logger.setLevel(logging.INFO)
            buf = io.StringIO()
            ch = logging.StreamHandler(buf)
            ch.setFormatter(logger_formatter)
            logger.addHandler(ch)

            # Run a simple job.
            automl = AutoML()
            automl_settings = {
                "time_budget": 1,
                "metric": "rmse",
                "task": "regression",
                "log_file_name": training_log,
                "log_training_metric": True,
                "n_jobs": 1,
                "model_history": True,
                "keep_search_state": True,
                "learner_selector": "roundrobin",
            }
            X_train, y_train = fetch_california_housing(return_X_y=True)
            n = len(y_train) >> 1
            print(automl.model, automl.classes_, automl.predict(X_train))
            automl.fit(X_train=X_train[:n],
                       y_train=y_train[:n],
                       X_val=X_train[n:],
                       y_val=y_train[n:],
                       **automl_settings)
            logger.info(automl.search_space)
            logger.info(automl.low_cost_partial_config)
            logger.info(automl.points_to_evaluate)
            logger.info(automl.cat_hp_cost)
            import optuna as ot

            study = ot.create_study()
            from flaml.tune.space import define_by_run_func, add_cost_to_space

            sample = define_by_run_func(study.ask(), automl.search_space)
            logger.info(sample)
            logger.info(unflatten_hierarchical(sample, automl.search_space))
            add_cost_to_space(automl.search_space,
                              automl.low_cost_partial_config,
                              automl.cat_hp_cost)
            logger.info(automl.search_space["ml"].categories)
            if automl.best_config:
                config = automl.best_config.copy()
                config["learner"] = automl.best_estimator
                automl.trainable({"ml": config})
            from flaml import tune, BlendSearch
            from flaml.automl import size
            from functools import partial

            low_cost_partial_config = automl.low_cost_partial_config
            search_alg = BlendSearch(
                metric="val_loss",
                mode="min",
                space=automl.search_space,
                low_cost_partial_config=low_cost_partial_config,
                points_to_evaluate=automl.points_to_evaluate,
                cat_hp_cost=automl.cat_hp_cost,
                resource_attr=automl.resource_attr,
                min_resource=automl.min_resource,
                max_resource=automl.max_resource,
                config_constraints=[(partial(size, automl._state), "<=",
                                     automl._mem_thres)],
                metric_constraints=automl.metric_constraints,
            )
            analysis = tune.run(
                automl.trainable,
                search_alg=search_alg,  # verbose=2,
                time_budget_s=1,
                num_samples=-1,
            )
            print(
                min(trial.last_result["val_loss"]
                    for trial in analysis.trials))
            config = analysis.trials[-1].last_result["config"]["ml"]
            automl._state._train_with_config(config["learner"], config)
            for _ in range(3):
                print(
                    search_alg._ls.complete_config(
                        low_cost_partial_config,
                        search_alg._ls_bound_min,
                        search_alg._ls_bound_max,
                    ))
            # Check if the log buffer is populated.
            self.assertTrue(len(buf.getvalue()) > 0)

        import pickle

        with open("automl.pkl", "wb") as f:
            pickle.dump(automl, f, pickle.HIGHEST_PROTOCOL)
        print(automl.__version__)
        pred1 = automl.predict(X_train)
        with open("automl.pkl", "rb") as f:
            automl = pickle.load(f)
        pred2 = automl.predict(X_train)
        delta = pred1 - pred2
        assert max(delta) == 0 and min(delta) == 0
        automl.save_best_config("test/housing.json")
Esempio n. 14
0
# load a built-in search space from flaml
flaml_lgbm_search_space = LGBMEstimator.search_space(X_train.shape)
# specify the search space as a dict from hp name to domain; you can define your own search space same way
config_search_space = {
    hp: space["domain"]
    for hp, space in flaml_lgbm_search_space.items()
}
# give guidance about hp values corresponding to low training cost, i.e., {"n_estimators": 4, "num_leaves": 4}
low_cost_partial_config = {
    hp: space["low_cost_init_value"]
    for hp, space in flaml_lgbm_search_space.items()
    if "low_cost_init_value" in space
}
# initial points to evaluate
points_to_evaluate = [{
    hp: space["init_value"]
    for hp, space in flaml_lgbm_search_space.items() if "init_value" in space
}]
# run the tuning, minimizing mse, with total time budget 3 seconds
analysis = tune.run(
    train_lgbm,
    metric="mse",
    mode="min",
    config=config_search_space,
    low_cost_partial_config=low_cost_partial_config,
    points_to_evaluate=points_to_evaluate,
    time_budget_s=3,
    num_samples=-1,
)
print(analysis.best_result)
Esempio n. 15
0
if __name__ == "__main__":
    ray_on_aml = Ray_On_AML()
    ray = ray_on_aml.getRay()
    if ray:
        X, y = load_breast_cancer(return_X_y=True)
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25)
        X_train_ref = ray.put(X_train)
        flaml_lgbm_search_space = LGBMEstimator.search_space(X_train.shape)
        config_search_space = {
            hp: space["domain"] for hp, space in flaml_lgbm_search_space.items()
        }
        low_cost_partial_config = {
            hp: space["low_cost_init_value"]
            for hp, space in flaml_lgbm_search_space.items()
            if "low_cost_init_value" in space
        }

        analysis = tune.run(
            train_breast_cancer,
            metric="mean_accuracy",
            mode="max",
            config=config_search_space,
            num_samples=-1,
            time_budget_s=60,
            use_ray=True,
        )

        # print("Best hyperparameters found were: ", analysis.best_config)
        print("The best trial's result: ", analysis.best_trial.last_result)
Esempio n. 16
0
 def run_explicit_restore(self, random_state, checkpoint_path):
     search_alg2, cost = self.set_basic_conf()
     search_alg2 = ConcurrencyLimiter(search_alg2, 1)
     search_alg2.restore(checkpoint_path)
     np.random.set_state(random_state)
     return tune.run(cost, num_samples=5, search_alg=search_alg2, verbose=0)
Esempio n. 17
0
def test_nested():
    from flaml import tune, CFO

    search_space = {
        # test nested search space
        "cost_related": {
            "a": tune.randint(1, 9),
        },
        "b": tune.uniform(0.5, 1.0),
    }

    def simple_func(config):
        obj = (config["cost_related"]["a"] -
               4)**2 + (config["b"] - config["cost_related"]["a"])**2
        tune.report(obj=obj)
        tune.report(obj=obj, ab=config["cost_related"]["a"] * config["b"])

    analysis = tune.run(
        simple_func,
        search_alg=CFO(
            space=search_space,
            metric="obj",
            mode="min",
            low_cost_partial_config={"cost_related": {
                "a": 1
            }},
            points_to_evaluate=[
                {
                    "b": 0.99,
                    "cost_related": {
                        "a": 3
                    }
                },
                {
                    "b": 0.99,
                    "cost_related": {
                        "a": 2
                    }
                },
                {
                    "cost_related": {
                        "a": 8
                    }
                },
            ],
            metric_constraints=[("ab", "<=", 4)],
        ),
        local_dir="logs/",
        num_samples=-1,
        time_budget_s=1,
    )

    best_trial = analysis.get_best_trial()
    logger.info(f"CFO best config: {best_trial.config}")
    logger.info(f"CFO best result: {best_trial.last_result}")

    analysis = tune.run(
        simple_func,
        search_alg=BlendSearch(
            experimental=True,
            space=search_space,
            metric="obj",
            mode="min",
            low_cost_partial_config={"cost_related": {
                "a": 1
            }},
            points_to_evaluate=[
                {
                    "b": 0.99,
                    "cost_related": {
                        "a": 3
                    }
                },
                {
                    "b": 0.99,
                    "cost_related": {
                        "a": 2
                    }
                },
                {
                    "cost_related": {
                        "a": 8
                    }
                },
            ],
            metric_constraints=[("ab", "<=", 4)],
        ),
        local_dir="logs/",
        num_samples=-1,
        time_budget_s=1,
    )

    best_trial = analysis.get_best_trial()
    logger.info(f"BlendSearch exp best config: {best_trial.config}")
    logger.info(f"BlendSearch exp best result: {best_trial.last_result}")

    points_to_evaluate = [
        {
            "b": 0.99,
            "cost_related": {
                "a": 3
            }
        },
        {
            "b": 0.99,
            "cost_related": {
                "a": 2
            }
        },
    ]
    analysis = tune.run(
        simple_func,
        config=search_space,
        low_cost_partial_config={"cost_related": {
            "a": 1
        }},
        points_to_evaluate=points_to_evaluate,
        evaluated_rewards=[(config["cost_related"]["a"] - 4)**2 +
                           (config["b"] - config["cost_related"]["a"])**2
                           for config in points_to_evaluate],
        metric="obj",
        mode="min",
        metric_constraints=[("ab", "<=", 4)],
        local_dir="logs/",
        num_samples=-1,
        time_budget_s=1,
    )

    best_trial = analysis.get_best_trial()
    logger.info(f"BlendSearch best config: {best_trial.config}")
    logger.info(f"BlendSearch best result: {best_trial.last_result}")
Esempio n. 18
0
def _test_xgboost(method='BlendSearch'):
    try:
        import ray
    except ImportError:
        return
    if method == 'BlendSearch':
        from flaml import tune
    else:
        from ray import tune
    search_space = {
        # You can mix constants with search space objects.
        "max_depth": tune.randint(1, 8) if method in [
            "BlendSearch", "BOHB", "Optuna"] else tune.randint(1, 9),
        "min_child_weight": tune.choice([1, 2, 3]),
        "subsample": tune.uniform(0.5, 1.0),
        "eta": tune.loguniform(1e-4, 1e-1)
    }
    max_iter = 10
    for num_samples in [256]:
        time_budget_s = 60 #None
        for n_cpu in [8]:
            start_time = time.time()
            ray.init(num_cpus=n_cpu, num_gpus=0)
            if method == 'BlendSearch':
                analysis = tune.run(
                    train_breast_cancer,
                    init_config={
                        "max_depth": 1,
                        "min_child_weight": 3,
                    },
                    cat_hp_cost={
                        "min_child_weight": [6, 3, 2],
                    },
                    metric="eval-logloss",
                    mode="min",
                    max_resource=max_iter,
                    min_resource=1,
                    report_intermediate_result=True,
                    # You can add "gpu": 0.1 to allocate GPUs
                    resources_per_trial={"cpu": 1},
                    config=search_space,
                    local_dir='logs/',
                    num_samples=num_samples*n_cpu,
                    time_budget_s=time_budget_s,
                    use_ray=True)
            else:
                if 'ASHA' == method:
                    algo = None
                elif 'BOHB' == method:
                    from ray.tune.schedulers import HyperBandForBOHB
                    from ray.tune.suggest.bohb import TuneBOHB
                    algo = TuneBOHB(max_concurrent=n_cpu)
                    scheduler = HyperBandForBOHB(max_t=max_iter)
                elif 'Optuna' == method:
                    from ray.tune.suggest.optuna import OptunaSearch
                    algo = OptunaSearch()
                elif 'CFO' == method:
                    from flaml import CFO
                    algo = CFO(points_to_evaluate=[{
                        "max_depth": 1,
                        "min_child_weight": 3,
                    }], cat_hp_cost={
                        "min_child_weight": [6, 3, 2],
                    })
                elif 'Dragonfly' == method:
                    from ray.tune.suggest.dragonfly import DragonflySearch
                    algo = DragonflySearch()
                elif 'SkOpt' == method:
                    from ray.tune.suggest.skopt import SkOptSearch
                    algo = SkOptSearch()
                elif 'Nevergrad' == method:
                    from ray.tune.suggest.nevergrad import NevergradSearch
                    import nevergrad as ng
                    algo = NevergradSearch(optimizer=ng.optimizers.OnePlusOne)
                elif 'ZOOpt' == method:
                    from ray.tune.suggest.zoopt import ZOOptSearch
                    algo = ZOOptSearch(budget=num_samples*n_cpu)
                elif 'Ax' == method:
                    from ray.tune.suggest.ax import AxSearch
                    algo = AxSearch()
                elif 'HyperOpt' == method:
                    from ray.tune.suggest.hyperopt import HyperOptSearch
                    algo = HyperOptSearch()
                    scheduler = None
                if method != 'BOHB':
                    from ray.tune.schedulers import ASHAScheduler
                    scheduler = ASHAScheduler(
                        max_t=max_iter,
                        grace_period=1)
                analysis = tune.run(
                    train_breast_cancer,
                    metric="eval-logloss",
                    mode="min",
                    # You can add "gpu": 0.1 to allocate GPUs
                    resources_per_trial={"cpu": 1},
                    config=search_space, local_dir='logs/',
                    num_samples=num_samples*n_cpu, time_budget_s=time_budget_s,
                    scheduler=scheduler, search_alg=algo)
            ray.shutdown()
            # # Load the best model checkpoint
            # best_bst = xgb.Booster()
            # best_bst.load_model(os.path.join(analysis.best_checkpoint,
            #  "model.xgb"))
            best_trial = analysis.get_best_trial("eval-logloss","min","all")
            accuracy = 1. - best_trial.metric_analysis["eval-error"]["min"]
            logloss = best_trial.metric_analysis["eval-logloss"]["min"]
            logger.info(f"method={method}")
            logger.info(f"n_samples={num_samples*n_cpu}")
            logger.info(f"time={time.time()-start_time}")
            logger.info(f"Best model eval loss: {logloss:.4f}")
            logger.info(f"Best model total accuracy: {accuracy:.4f}")
            logger.info(f"Best model parameters: {best_trial.config}")
Esempio n. 19
0
def test_record_incumbent(method="BlendSearch"):

    if method != "CFOCat":
        search_space = {
            "x1": tune.randint(1, 9),
            "x2": tune.randint(1, 9),
            "x3": tune.randint(1, 9),
            "x4": tune.randint(1, 9),
            "x5": tune.randint(1, 9),
        }
    else:
        search_space = {
            "x1": tune.choice([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]),
            "x2": tune.choice([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]),
            "x3": tune.choice([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]),
            "x4": tune.choice([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]),
            "x5": tune.choice([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]),
        }

    max_iter = 100
    num_samples = 128
    time_budget_s = 1
    n_cpu = 1

    if method == "BlendSearch":
        tune.run(
            evaluation_function=rosenbrock_function,
            config=search_space,
            verbose=0,
            metric="funcLoss",
            mode="min",
            max_resource=max_iter,
            min_resource=1,
            local_dir="logs/",
            num_samples=num_samples * n_cpu,
            time_budget_s=time_budget_s,
            use_incumbent_result_in_evaluation=True,
        )
        return
    elif method == "CFO":
        from flaml import CFO

        algo = CFO(
            use_incumbent_result_in_evaluation=True,
        )
    elif method == "CFOCat":
        from flaml.searcher.cfo_cat import CFOCat

        algo = CFOCat(
            use_incumbent_result_in_evaluation=True,
        )
    else:
        raise NotImplementedError
    tune.run(
        evaluation_function=rosenbrock_function,
        metric="funcLoss",
        mode="min",
        config=search_space,
        local_dir="logs/",
        num_samples=num_samples * n_cpu,
        time_budget_s=time_budget_s,
        search_alg=algo,
    )