def test_config_constraint(): from flaml import tune # Test dict return value def evaluate_config_dict(config): metric = (round(config["x"]) - 85000)**2 - config["x"] / config["y"] return {"metric": metric} def config_constraint(config): if config["y"] >= config["x"]: return 1 else: return 0 tune.run( evaluate_config_dict, config={ "x": tune.qloguniform(lower=1, upper=100000, q=1), "y": tune.qrandint(lower=2, upper=100000, q=2), }, config_constraints=[(config_constraint, ">", 0.5)], metric="metric", mode="max", num_samples=100, )
def test_run_training_function_return_value(): from flaml import tune # Test dict return value def evaluate_config_dict(config): metric = (round(config["x"]) - 85000)**2 - config["x"] / config["y"] return {"metric": metric} tune.run( evaluate_config_dict, config={ "x": tune.qloguniform(lower=1, upper=100000, q=1), "y": tune.qrandint(lower=2, upper=100000, q=2), }, metric="metric", mode="max", num_samples=100, ) # Test scalar return value def evaluate_config_scalar(config): metric = (round(config["x"]) - 85000)**2 - config["x"] / config["y"] return metric tune.run( evaluate_config_scalar, config={ "x": tune.qloguniform(lower=1, upper=100000, q=1), "y": tune.qlograndint(lower=2, upper=100000, q=2), }, num_samples=100, mode="max", )
def test_nested(): from flaml import tune search_space = { # test nested search space "cost_related": { "a": tune.randint(1, 8), }, "b": tune.uniform(0.5, 1.0), } def simple_func(config): obj = (config["cost_related"]["a"] - 4)**2 \ + (config["b"] - config["cost_related"]["a"])**2 tune.report(obj=obj) tune.report(obj=obj, ab=config["cost_related"]["a"] * config["b"]) analysis = tune.run(simple_func, config=search_space, low_cost_partial_config={"cost_related": { "a": 1 }}, metric="obj", mode="min", metric_constraints=[("ab", "<=", 4)], local_dir='logs/', num_samples=-1, time_budget_s=1) best_trial = analysis.get_best_trial() logger.info(f"Best config: {best_trial.config}") logger.info(f"Best result: {best_trial.last_result}")
def test_tune_lgbm_csv(): # load a built-in search space from flaml flaml_lgbm_search_space = LGBMEstimator.search_space(X_train.shape) # specify the search space as a dict from hp name to domain; you can define your own search space same way config_search_space = { hp: space["domain"] for hp, space in flaml_lgbm_search_space.items() } # give guidance about hp values corresponding to low training cost, i.e., {"n_estimators": 4, "num_leaves": 4} low_cost_partial_config = { hp: space["low_cost_init_value"] for hp, space in flaml_lgbm_search_space.items() if "low_cost_init_value" in space } # initial points to evaluate points_to_evaluate = [{ hp: space["init_value"] for hp, space in flaml_lgbm_search_space.items() if "init_value" in space }] # run the tuning, minimizing mse, with total time budget 3 seconds analysis = tune.run( train_lgbm, metric="mse", mode="min", config=config_search_space, low_cost_partial_config=low_cost_partial_config, points_to_evaluate=points_to_evaluate, time_budget_s=3, num_samples=-1, ) print(analysis.best_result)
def _test_flaml_raytune_consistency(num_samples=-1, max_concurrent_trials=1, searcher_name="cfo"): try: from ray import tune as raytune except ImportError: print( "skip _test_flaml_raytune_consistency because ray tune cannot be imported." ) return np.random.seed(100) searcher = setup_searcher(searcher_name) analysis = tune.run( evaluate_config, # the function to evaluate a config config=config_search_space, # the search space low_cost_partial_config= low_cost_partial_config, # a initial (partial) config with low cost metric="metric", # the name of the metric used for optimization mode="min", # the optimization mode, 'min' or 'max' num_samples= num_samples, # the maximal number of configs to try, -1 means infinite time_budget_s=None, # the time budget in seconds local_dir="logs/", # the local directory to store logs search_alg=searcher, # verbose=0, # verbosity # use_ray=True, # uncomment when performing parallel tuning using ray ) flaml_best_config = analysis.best_config flaml_config_in_results = [v["config"] for v in analysis.results.values()] print(analysis.best_trial.last_result) # the best trial's result print("best flaml", searcher_name, flaml_best_config) # the best config print("flaml config in results", searcher_name, flaml_config_in_results) np.random.seed(100) searcher = setup_searcher(searcher_name) from ray.tune.suggest import ConcurrencyLimiter search_alg = ConcurrencyLimiter(searcher, max_concurrent_trials) analysis = raytune.run( evaluate_config, # the function to evaluate a config config=config_search_space, metric="metric", # the name of the metric used for optimization mode="min", # the optimization mode, 'min' or 'max' num_samples= num_samples, # the maximal number of configs to try, -1 means infinite local_dir="logs/", # the local directory to store logs # max_concurrent_trials=max_concurrent_trials, # resources_per_trial={"cpu": max_concurrent_trials, "gpu": 0}, search_alg=search_alg, ) ray_best_config = analysis.best_config ray_config_in_results = [v["config"] for v in analysis.results.values()] print(analysis.best_trial.last_result) # the best trial's result print("ray best", searcher_name, analysis.best_config) # the best config print("ray config in results", searcher_name, ray_config_in_results) assert ray_best_config == flaml_best_config, "best config should be the same" assert (flaml_config_in_results == ray_config_in_results ), "results from raytune and flaml should be the same"
def test_simple(method=None): automl = AutoML() automl.add_learner(learner_name="XGBoost2D", learner_class=XGBoost2D) automl_settings = { "estimator_list": ["XGBoost2D"], "task": "classification", "log_file_name": f"test/xgboost2d_{dataset}_{method}.log", "n_jobs": 1, "hpo_method": method, "log_type": "all", "retrain_full": "budget", "keep_search_state": True, "time_budget": 1, } from sklearn.externals._arff import ArffException try: X, y = fetch_openml(name=dataset, return_X_y=True) except (ArffException, ValueError): from sklearn.datasets import load_wine X, y = load_wine(return_X_y=True) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42) automl.fit(X_train=X_train, y_train=y_train, **automl_settings) print(automl.estimator_list) print(automl.search_space) print(automl.points_to_evaluate) config = automl.best_config.copy() config["learner"] = automl.best_estimator automl.trainable(config) from flaml import tune from flaml.automl import size from functools import partial analysis = tune.run( automl.trainable, automl.search_space, metric="val_loss", mode="min", low_cost_partial_config=automl.low_cost_partial_config, points_to_evaluate=automl.points_to_evaluate, cat_hp_cost=automl.cat_hp_cost, resource_attr=automl.resource_attr, min_resource=automl.min_resource, max_resource=automl.max_resource, time_budget_s=automl._state.time_budget, config_constraints=[(partial(size, automl._state), "<=", automl._mem_thres)], metric_constraints=automl.metric_constraints, num_samples=5, ) print(analysis.trials[-1])
def run_part_from_scratch(self): np.random.seed(162) search_alg, cost = self.set_basic_conf() search_alg = ConcurrencyLimiter(search_alg, 1) results_exp_1 = tune.run( cost, num_samples=5, search_alg=search_alg, verbose=0, local_dir=self.tmpdir ) checkpoint_path = os.path.join(self.tmpdir, self.experiment_name) search_alg.save(checkpoint_path) return results_exp_1, np.random.get_state(), checkpoint_path
def test_scheduler(scheduler=None): from functools import partial resource_attr = "samplesize" max_resource = 10000 # specify the objective functions if scheduler is None: evaluation_obj = simple_obj elif scheduler == "flaml": evaluation_obj = partial(obj_w_suggested_resource, resource_attr) elif scheduler == "asha" or isinstance(scheduler, TrialScheduler): evaluation_obj = partial(obj_w_intermediate_report, max_resource) else: try: from ray.tune.schedulers import TrialScheduler as RayTuneTrialScheduler except ImportError: print( "skip this condition, which may require TrialScheduler from ray tune, \ as ray tune cannot be imported.") return if isinstance(scheduler, RayTuneTrialScheduler): evaluation_obj = partial(obj_w_intermediate_report, max_resource) else: raise ValueError analysis = tune.run( evaluation_obj, config={ "x": tune.uniform(5, 20), "y": tune.uniform(0, 10), "z": tune.uniform(0, 10), }, metric="sphere_projection", mode="max", verbose=1, resource_attr=resource_attr, scheduler=scheduler, max_resource=max_resource, min_resource=100, reduction_factor=2, time_budget_s=1, num_samples=500, ) print("Best hyperparameters found were: ", analysis.best_config) # print(analysis.get_best_trial) return analysis.best_config
def test_nested(): from flaml import tune search_space = { # test nested search space "cost_related": { "a": tune.randint(1, 8), }, "b": tune.uniform(0.5, 1.0), } def simple_func(config): tune.report(metric=(config["cost_related"]["a"] - 4)**2 * (config["b"] - 0.7)**2) analysis = tune.run(simple_func, init_config={"cost_related": { "a": 1, }}, metric="metric", mode="min", config=search_space, local_dir='logs/', num_samples=-1, time_budget_s=1)
def test_searcher(): from flaml.searcher.suggestion import OptunaSearch, Searcher, ConcurrencyLimiter from flaml.searcher.blendsearch import BlendSearch, CFO, RandomSearch from flaml.tune import sample as flamlsample searcher = Searcher() try: searcher = Searcher(metric=1, mode=1) except ValueError: # Mode must either be a list or string pass searcher = Searcher(metric=["m1", "m2"], mode=["max", "min"]) searcher.set_search_properties(None, None, None) searcher.suggest = searcher.on_pause = searcher.on_unpause = lambda _: {} searcher.on_trial_complete = lambda trial_id, result, error: None searcher = ConcurrencyLimiter(searcher, max_concurrent=2, batch=True) searcher.on_trial_complete("t0") searcher.suggest("t1") searcher.suggest("t2") searcher.on_pause("t1") searcher.on_unpause("t1") searcher.suggest("t3") searcher.on_trial_complete("t1", {}) searcher.on_trial_complete("t2", {}) searcher.set_state({}) print(searcher.get_state()) import optuna config = { "a": optuna.distributions.UniformDistribution(6, 8), "b": optuna.distributions.LogUniformDistribution(1e-4, 1e-2), } searcher = OptunaSearch(["a", config["a"]], metric="m", mode="max") try: searcher.suggest("t0") except ValueError: # not enough values to unpack (expected 3, got 1) pass searcher = OptunaSearch( config, points_to_evaluate=[{ "a": 6, "b": 1e-3 }], evaluated_rewards=[{ "m": 2 }], metric="m", mode="max", ) try: searcher.add_evaluated_point({}, None, error=True) except ValueError: # nconsistent parameters set() and distributions {'b', 'a'}. pass try: searcher.add_evaluated_point({"a", 1, "b", 0.01}, None, pruned=True) except AttributeError: # 'set' object has no attribute 'keys' pass try: searcher.add_evaluated_point({ "a": 1, "b": 0.01 }, None, intermediate_values=[0.1]) except ValueError: # `value` is supposed to be set for a complete trial. pass try: searcher = OptunaSearch(config, points_to_evaluate=1) except TypeError: # points_to_evaluate expected to be a list, got <class 'int'> pass try: searcher = OptunaSearch(config, points_to_evaluate=[1]) except TypeError: # points_to_evaluate expected to include list or dict pass try: searcher = OptunaSearch(config, points_to_evaluate=[{"a": 1}]) except ValueError: # Dim of point {'a': 1} and parameter_names {'a': UniformDistribution(high=8.0, low=6.0), 'b': LogUniformDistribution(high=0.01, low=0.0001)} do not match. pass try: searcher = OptunaSearch(config, points_to_evaluate=[{ "a": 1, "b": 0.01 }], evaluated_rewards=1) except TypeError: # valuated_rewards expected to be a list, got <class 'int'>. pass try: searcher = OptunaSearch(config, points_to_evaluate=[{ "a": 1, "b": 0.01 }], evaluated_rewards=[1, 2]) except ValueError: # Dim of evaluated_rewards [1, 2] and points_to_evaluate [{'a': 1, 'b': 0.01}] do not match. pass config = {"a": sample.uniform(6, 8), "b": sample.loguniform(1e-4, 1e-2)} OptunaSearch.convert_search_space({"a": 1}) try: OptunaSearch.convert_search_space({"a": {"grid_search": [1, 2]}}) except ValueError: # Grid search parameters cannot be automatically converted to an Optuna search space. pass OptunaSearch.convert_search_space({"a": flamlsample.quniform(1, 3, 1)}) try: searcher = OptunaSearch( config, points_to_evaluate=[{ "a": 6, "b": 1e-3 }], evaluated_rewards=[{ "m": 2 }], metric="m", mode="max", ) except ValueError: # Optuna search does not support parameters of type `Float` with samplers of type `_Uniform` pass searcher = OptunaSearch(long_define_search_space, metric="m", mode="min") try: searcher.suggest("t0") except TypeError: # The return value of the define-by-run function passed in the `space` argument should be either None or a `dict` with `str` keys. pass searcher = OptunaSearch(wrong_define_search_space, metric="m", mode="min") try: searcher.suggest("t0") except TypeError: # At least one of the keys in the dict returned by the define-by-run function passed in the `space` argument was not a `str`. pass searcher = OptunaSearch(metric="m", mode="min") try: searcher.suggest("t0") except RuntimeError: # Trying to sample a configuration from OptunaSearch, but no search space has been defined. pass try: searcher.add_evaluated_point({}, 1) except RuntimeError: # Trying to sample a configuration from OptunaSearch, but no search space has been defined. pass searcher = OptunaSearch(define_search_space) try: searcher.suggest("t0") except RuntimeError: # Trying to sample a configuration from OptunaSearch, but the `metric` (None) or `mode` (None) parameters have not been set. pass try: searcher.add_evaluated_point({}, 1) except RuntimeError: # Trying to sample a configuration from OptunaSearch, but the `metric` (None) or `mode` (None) parameters have not been set. pass searcher = OptunaSearch( define_search_space, points_to_evaluate=[{ "a": 6, "b": 1e-3 }], # evaluated_rewards=[{'m': 2}], metric='m', mode='max' mode="max", ) # searcher = OptunaSearch() # searcher.set_search_properties('m', 'min', define_search_space) searcher.set_search_properties("m", "min", config) searcher.suggest("t1") searcher.on_trial_complete("t1", None, False) searcher.suggest("t2") searcher.on_trial_complete("t2", None, True) searcher.suggest("t3") searcher.on_trial_complete("t3", {"m": np.nan}) searcher.save("test/tune/optuna.pickle") searcher.restore("test/tune/optuna.pickle") try: searcher = BlendSearch(metric="m", global_search_alg=searcher, metric_constraints=[("c", "<", 1)]) except AssertionError: # sign of metric constraints must be <= or >=. pass searcher = BlendSearch(metric="m", global_search_alg=searcher, metric_constraints=[("c", "<=", 1)]) searcher.set_search_properties(metric="m2", config=config, setting={"time_budget_s": 0}) c = searcher.suggest("t1") searcher.on_trial_complete("t1", {"config": c}, True) c = searcher.suggest("t2") searcher.on_trial_complete("t2", { "config": c, "m2": 1, "c": 2, "time_total_s": 1 }) config1 = config.copy() config1["_choice_"] = 0 searcher._expand_admissible_region( lower={"root": [{ "a": 0.5 }, { "a": 0.4 }]}, upper={"root": [{ "a": 0.9 }, { "a": 0.8 }]}, space={"root": config1}, ) searcher = CFO( metric="m", mode="min", space=config, points_to_evaluate=[{ "a": 7, "b": 1e-3 }, { "a": 6, "b": 3e-4 }], evaluated_rewards=[1, 1], ) searcher.suggest("t1") searcher.suggest("t2") searcher.on_trial_result("t3", {}) c = searcher.generate_parameters(1) searcher.receive_trial_result(1, c, {"default": 0}) searcher.update_search_space({ "a": { "_value": [1, 2], "_type": "choice", }, "b": { "_value": [1, 3], "_type": "randint", }, "c": { "_value": [0.1, 3], "_type": "uniform", }, "d": { "_value": [2, 8, 2], "_type": "quniform", }, "e": { "_value": [2, 8], "_type": "loguniform", }, "f": { "_value": [2, 8, 2], "_type": "qloguniform", }, "g": { "_value": [0, 2], "_type": "normal", }, "h": { "_value": [0, 2, 2], "_type": "qnormal", }, }) np.random.seed(7654321) searcher = RandomSearch( space=config, points_to_evaluate=[{ "a": 7, "b": 1e-3 }, { "a": 6, "b": 3e-4 }], ) print(searcher.suggest("t1")) print(searcher.suggest("t2")) print(searcher.suggest("t3")) print(searcher.suggest("t4")) searcher.on_trial_complete({"t1"}, {}) searcher.on_trial_result({"t2"}, {}) np.random.seed(654321) searcher = RandomSearch( space=config, points_to_evaluate=[{ "a": 7, "b": 1e-3 }, { "a": 6, "b": 3e-4 }], ) print(searcher.suggest("t1")) print(searcher.suggest("t2")) print(searcher.suggest("t3")) searcher = RandomSearch(space={}) print(searcher.suggest("t1")) searcher = BlendSearch(space={}) print(searcher.suggest("t1")) from flaml import tune tune.run(lambda x: 1, config={}, use_ray=use_ray)
def _test_xgboost(method="BlendSearch"): try: import ray except ImportError: return if method == "BlendSearch": from flaml import tune else: from ray import tune search_space = { "max_depth": tune.randint(1, 9) if method in ["BlendSearch", "BOHB", "Optuna"] else tune.randint(1, 9), "min_child_weight": tune.choice([1, 2, 3]), "subsample": tune.uniform(0.5, 1.0), "eta": tune.loguniform(1e-4, 1e-1), } max_iter = 10 for num_samples in [128]: time_budget_s = 60 for n_cpu in [2]: start_time = time.time() # ray.init(address='auto') if method == "BlendSearch": analysis = tune.run( train_breast_cancer, config=search_space, low_cost_partial_config={ "max_depth": 1, }, cat_hp_cost={ "min_child_weight": [6, 3, 2], }, metric="eval-logloss", mode="min", max_resource=max_iter, min_resource=1, scheduler="asha", # You can add "gpu": 0.1 to allocate GPUs resources_per_trial={"cpu": 1}, local_dir="logs/", num_samples=num_samples * n_cpu, time_budget_s=time_budget_s, use_ray=True, ) else: if "ASHA" == method: algo = None elif "BOHB" == method: from ray.tune.schedulers import HyperBandForBOHB from ray.tune.suggest.bohb import TuneBOHB algo = TuneBOHB(max_concurrent=n_cpu) scheduler = HyperBandForBOHB(max_t=max_iter) elif "Optuna" == method: from ray.tune.suggest.optuna import OptunaSearch algo = OptunaSearch() elif "CFO" == method: from flaml import CFO algo = CFO( low_cost_partial_config={ "max_depth": 1, }, cat_hp_cost={ "min_child_weight": [6, 3, 2], }, ) elif "CFOCat" == method: from flaml.searcher.cfo_cat import CFOCat algo = CFOCat( low_cost_partial_config={ "max_depth": 1, }, cat_hp_cost={ "min_child_weight": [6, 3, 2], }, ) elif "Dragonfly" == method: from ray.tune.suggest.dragonfly import DragonflySearch algo = DragonflySearch() elif "SkOpt" == method: from ray.tune.suggest.skopt import SkOptSearch algo = SkOptSearch() elif "Nevergrad" == method: from ray.tune.suggest.nevergrad import NevergradSearch import nevergrad as ng algo = NevergradSearch(optimizer=ng.optimizers.OnePlusOne) elif "ZOOpt" == method: from ray.tune.suggest.zoopt import ZOOptSearch algo = ZOOptSearch(budget=num_samples * n_cpu) elif "Ax" == method: from ray.tune.suggest.ax import AxSearch algo = AxSearch() elif "HyperOpt" == method: from ray.tune.suggest.hyperopt import HyperOptSearch algo = HyperOptSearch() scheduler = None if method != "BOHB": from ray.tune.schedulers import ASHAScheduler scheduler = ASHAScheduler(max_t=max_iter, grace_period=1) analysis = tune.run( train_breast_cancer, metric="eval-logloss", mode="min", # You can add "gpu": 0.1 to allocate GPUs resources_per_trial={"cpu": 1}, config=search_space, local_dir="logs/", num_samples=num_samples * n_cpu, time_budget_s=time_budget_s, scheduler=scheduler, search_alg=algo, ) # # Load the best model checkpoint # import os # best_bst = xgb.Booster() # best_bst.load_model(os.path.join(analysis.best_checkpoint, # "model.xgb")) best_trial = analysis.get_best_trial("eval-logloss", "min", "all") accuracy = 1.0 - best_trial.metric_analysis["eval-error"]["min"] logloss = best_trial.metric_analysis["eval-logloss"]["min"] logger.info(f"method={method}") logger.info(f"n_samples={num_samples*n_cpu}") logger.info(f"time={time.time()-start_time}") logger.info(f"Best model eval loss: {logloss:.4f}") logger.info(f"Best model total accuracy: {accuracy:.4f}") logger.info(f"Best model parameters: {best_trial.config}")
def run_full(self): np.random.seed(162) search_alg3, cost = self.set_basic_conf() search_alg3 = ConcurrencyLimiter(search_alg3, 1) return tune.run(cost, num_samples=10, search_alg=search_alg3, verbose=0)
def test_logging_level(self): from flaml import logger, logger_formatter with tempfile.TemporaryDirectory() as d: training_log = os.path.join(d, "training.log") # Configure logging for the FLAML logger # and add a handler that outputs to a buffer. logger.setLevel(logging.INFO) buf = io.StringIO() ch = logging.StreamHandler(buf) ch.setFormatter(logger_formatter) logger.addHandler(ch) # Run a simple job. automl = AutoML() automl_settings = { "time_budget": 1, "metric": "rmse", "task": "regression", "log_file_name": training_log, "log_training_metric": True, "n_jobs": 1, "model_history": True, "keep_search_state": True, "learner_selector": "roundrobin", } X_train, y_train = fetch_california_housing(return_X_y=True) n = len(y_train) >> 1 print(automl.model, automl.classes_, automl.predict(X_train)) automl.fit(X_train=X_train[:n], y_train=y_train[:n], X_val=X_train[n:], y_val=y_train[n:], **automl_settings) logger.info(automl.search_space) logger.info(automl.low_cost_partial_config) logger.info(automl.points_to_evaluate) logger.info(automl.cat_hp_cost) import optuna as ot study = ot.create_study() from flaml.tune.space import define_by_run_func, add_cost_to_space sample = define_by_run_func(study.ask(), automl.search_space) logger.info(sample) logger.info(unflatten_hierarchical(sample, automl.search_space)) add_cost_to_space(automl.search_space, automl.low_cost_partial_config, automl.cat_hp_cost) logger.info(automl.search_space["ml"].categories) if automl.best_config: config = automl.best_config.copy() config["learner"] = automl.best_estimator automl.trainable({"ml": config}) from flaml import tune, BlendSearch from flaml.automl import size from functools import partial low_cost_partial_config = automl.low_cost_partial_config search_alg = BlendSearch( metric="val_loss", mode="min", space=automl.search_space, low_cost_partial_config=low_cost_partial_config, points_to_evaluate=automl.points_to_evaluate, cat_hp_cost=automl.cat_hp_cost, resource_attr=automl.resource_attr, min_resource=automl.min_resource, max_resource=automl.max_resource, config_constraints=[(partial(size, automl._state), "<=", automl._mem_thres)], metric_constraints=automl.metric_constraints, ) analysis = tune.run( automl.trainable, search_alg=search_alg, # verbose=2, time_budget_s=1, num_samples=-1, ) print( min(trial.last_result["val_loss"] for trial in analysis.trials)) config = analysis.trials[-1].last_result["config"]["ml"] automl._state._train_with_config(config["learner"], config) for _ in range(3): print( search_alg._ls.complete_config( low_cost_partial_config, search_alg._ls_bound_min, search_alg._ls_bound_max, )) # Check if the log buffer is populated. self.assertTrue(len(buf.getvalue()) > 0) import pickle with open("automl.pkl", "wb") as f: pickle.dump(automl, f, pickle.HIGHEST_PROTOCOL) print(automl.__version__) pred1 = automl.predict(X_train) with open("automl.pkl", "rb") as f: automl = pickle.load(f) pred2 = automl.predict(X_train) delta = pred1 - pred2 assert max(delta) == 0 and min(delta) == 0 automl.save_best_config("test/housing.json")
# load a built-in search space from flaml flaml_lgbm_search_space = LGBMEstimator.search_space(X_train.shape) # specify the search space as a dict from hp name to domain; you can define your own search space same way config_search_space = { hp: space["domain"] for hp, space in flaml_lgbm_search_space.items() } # give guidance about hp values corresponding to low training cost, i.e., {"n_estimators": 4, "num_leaves": 4} low_cost_partial_config = { hp: space["low_cost_init_value"] for hp, space in flaml_lgbm_search_space.items() if "low_cost_init_value" in space } # initial points to evaluate points_to_evaluate = [{ hp: space["init_value"] for hp, space in flaml_lgbm_search_space.items() if "init_value" in space }] # run the tuning, minimizing mse, with total time budget 3 seconds analysis = tune.run( train_lgbm, metric="mse", mode="min", config=config_search_space, low_cost_partial_config=low_cost_partial_config, points_to_evaluate=points_to_evaluate, time_budget_s=3, num_samples=-1, ) print(analysis.best_result)
if __name__ == "__main__": ray_on_aml = Ray_On_AML() ray = ray_on_aml.getRay() if ray: X, y = load_breast_cancer(return_X_y=True) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25) X_train_ref = ray.put(X_train) flaml_lgbm_search_space = LGBMEstimator.search_space(X_train.shape) config_search_space = { hp: space["domain"] for hp, space in flaml_lgbm_search_space.items() } low_cost_partial_config = { hp: space["low_cost_init_value"] for hp, space in flaml_lgbm_search_space.items() if "low_cost_init_value" in space } analysis = tune.run( train_breast_cancer, metric="mean_accuracy", mode="max", config=config_search_space, num_samples=-1, time_budget_s=60, use_ray=True, ) # print("Best hyperparameters found were: ", analysis.best_config) print("The best trial's result: ", analysis.best_trial.last_result)
def run_explicit_restore(self, random_state, checkpoint_path): search_alg2, cost = self.set_basic_conf() search_alg2 = ConcurrencyLimiter(search_alg2, 1) search_alg2.restore(checkpoint_path) np.random.set_state(random_state) return tune.run(cost, num_samples=5, search_alg=search_alg2, verbose=0)
def test_nested(): from flaml import tune, CFO search_space = { # test nested search space "cost_related": { "a": tune.randint(1, 9), }, "b": tune.uniform(0.5, 1.0), } def simple_func(config): obj = (config["cost_related"]["a"] - 4)**2 + (config["b"] - config["cost_related"]["a"])**2 tune.report(obj=obj) tune.report(obj=obj, ab=config["cost_related"]["a"] * config["b"]) analysis = tune.run( simple_func, search_alg=CFO( space=search_space, metric="obj", mode="min", low_cost_partial_config={"cost_related": { "a": 1 }}, points_to_evaluate=[ { "b": 0.99, "cost_related": { "a": 3 } }, { "b": 0.99, "cost_related": { "a": 2 } }, { "cost_related": { "a": 8 } }, ], metric_constraints=[("ab", "<=", 4)], ), local_dir="logs/", num_samples=-1, time_budget_s=1, ) best_trial = analysis.get_best_trial() logger.info(f"CFO best config: {best_trial.config}") logger.info(f"CFO best result: {best_trial.last_result}") analysis = tune.run( simple_func, search_alg=BlendSearch( experimental=True, space=search_space, metric="obj", mode="min", low_cost_partial_config={"cost_related": { "a": 1 }}, points_to_evaluate=[ { "b": 0.99, "cost_related": { "a": 3 } }, { "b": 0.99, "cost_related": { "a": 2 } }, { "cost_related": { "a": 8 } }, ], metric_constraints=[("ab", "<=", 4)], ), local_dir="logs/", num_samples=-1, time_budget_s=1, ) best_trial = analysis.get_best_trial() logger.info(f"BlendSearch exp best config: {best_trial.config}") logger.info(f"BlendSearch exp best result: {best_trial.last_result}") points_to_evaluate = [ { "b": 0.99, "cost_related": { "a": 3 } }, { "b": 0.99, "cost_related": { "a": 2 } }, ] analysis = tune.run( simple_func, config=search_space, low_cost_partial_config={"cost_related": { "a": 1 }}, points_to_evaluate=points_to_evaluate, evaluated_rewards=[(config["cost_related"]["a"] - 4)**2 + (config["b"] - config["cost_related"]["a"])**2 for config in points_to_evaluate], metric="obj", mode="min", metric_constraints=[("ab", "<=", 4)], local_dir="logs/", num_samples=-1, time_budget_s=1, ) best_trial = analysis.get_best_trial() logger.info(f"BlendSearch best config: {best_trial.config}") logger.info(f"BlendSearch best result: {best_trial.last_result}")
def _test_xgboost(method='BlendSearch'): try: import ray except ImportError: return if method == 'BlendSearch': from flaml import tune else: from ray import tune search_space = { # You can mix constants with search space objects. "max_depth": tune.randint(1, 8) if method in [ "BlendSearch", "BOHB", "Optuna"] else tune.randint(1, 9), "min_child_weight": tune.choice([1, 2, 3]), "subsample": tune.uniform(0.5, 1.0), "eta": tune.loguniform(1e-4, 1e-1) } max_iter = 10 for num_samples in [256]: time_budget_s = 60 #None for n_cpu in [8]: start_time = time.time() ray.init(num_cpus=n_cpu, num_gpus=0) if method == 'BlendSearch': analysis = tune.run( train_breast_cancer, init_config={ "max_depth": 1, "min_child_weight": 3, }, cat_hp_cost={ "min_child_weight": [6, 3, 2], }, metric="eval-logloss", mode="min", max_resource=max_iter, min_resource=1, report_intermediate_result=True, # You can add "gpu": 0.1 to allocate GPUs resources_per_trial={"cpu": 1}, config=search_space, local_dir='logs/', num_samples=num_samples*n_cpu, time_budget_s=time_budget_s, use_ray=True) else: if 'ASHA' == method: algo = None elif 'BOHB' == method: from ray.tune.schedulers import HyperBandForBOHB from ray.tune.suggest.bohb import TuneBOHB algo = TuneBOHB(max_concurrent=n_cpu) scheduler = HyperBandForBOHB(max_t=max_iter) elif 'Optuna' == method: from ray.tune.suggest.optuna import OptunaSearch algo = OptunaSearch() elif 'CFO' == method: from flaml import CFO algo = CFO(points_to_evaluate=[{ "max_depth": 1, "min_child_weight": 3, }], cat_hp_cost={ "min_child_weight": [6, 3, 2], }) elif 'Dragonfly' == method: from ray.tune.suggest.dragonfly import DragonflySearch algo = DragonflySearch() elif 'SkOpt' == method: from ray.tune.suggest.skopt import SkOptSearch algo = SkOptSearch() elif 'Nevergrad' == method: from ray.tune.suggest.nevergrad import NevergradSearch import nevergrad as ng algo = NevergradSearch(optimizer=ng.optimizers.OnePlusOne) elif 'ZOOpt' == method: from ray.tune.suggest.zoopt import ZOOptSearch algo = ZOOptSearch(budget=num_samples*n_cpu) elif 'Ax' == method: from ray.tune.suggest.ax import AxSearch algo = AxSearch() elif 'HyperOpt' == method: from ray.tune.suggest.hyperopt import HyperOptSearch algo = HyperOptSearch() scheduler = None if method != 'BOHB': from ray.tune.schedulers import ASHAScheduler scheduler = ASHAScheduler( max_t=max_iter, grace_period=1) analysis = tune.run( train_breast_cancer, metric="eval-logloss", mode="min", # You can add "gpu": 0.1 to allocate GPUs resources_per_trial={"cpu": 1}, config=search_space, local_dir='logs/', num_samples=num_samples*n_cpu, time_budget_s=time_budget_s, scheduler=scheduler, search_alg=algo) ray.shutdown() # # Load the best model checkpoint # best_bst = xgb.Booster() # best_bst.load_model(os.path.join(analysis.best_checkpoint, # "model.xgb")) best_trial = analysis.get_best_trial("eval-logloss","min","all") accuracy = 1. - best_trial.metric_analysis["eval-error"]["min"] logloss = best_trial.metric_analysis["eval-logloss"]["min"] logger.info(f"method={method}") logger.info(f"n_samples={num_samples*n_cpu}") logger.info(f"time={time.time()-start_time}") logger.info(f"Best model eval loss: {logloss:.4f}") logger.info(f"Best model total accuracy: {accuracy:.4f}") logger.info(f"Best model parameters: {best_trial.config}")
def test_record_incumbent(method="BlendSearch"): if method != "CFOCat": search_space = { "x1": tune.randint(1, 9), "x2": tune.randint(1, 9), "x3": tune.randint(1, 9), "x4": tune.randint(1, 9), "x5": tune.randint(1, 9), } else: search_space = { "x1": tune.choice([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]), "x2": tune.choice([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]), "x3": tune.choice([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]), "x4": tune.choice([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]), "x5": tune.choice([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]), } max_iter = 100 num_samples = 128 time_budget_s = 1 n_cpu = 1 if method == "BlendSearch": tune.run( evaluation_function=rosenbrock_function, config=search_space, verbose=0, metric="funcLoss", mode="min", max_resource=max_iter, min_resource=1, local_dir="logs/", num_samples=num_samples * n_cpu, time_budget_s=time_budget_s, use_incumbent_result_in_evaluation=True, ) return elif method == "CFO": from flaml import CFO algo = CFO( use_incumbent_result_in_evaluation=True, ) elif method == "CFOCat": from flaml.searcher.cfo_cat import CFOCat algo = CFOCat( use_incumbent_result_in_evaluation=True, ) else: raise NotImplementedError tune.run( evaluation_function=rosenbrock_function, metric="funcLoss", mode="min", config=search_space, local_dir="logs/", num_samples=num_samples * n_cpu, time_budget_s=time_budget_s, search_alg=algo, )