def test_exhaust_initial_calls(base_estimator): # check a model is fitted and used to make suggestions after we added # at least n_initial_points via tell() opt = Optimizer([(-2.0, 2.0)], base_estimator, n_initial_points=2, acq_optimizer="sampling", random_state=1) x0 = opt.ask() # random point x1 = opt.ask() # random point assert x0 != x1 # first call to tell() r1 = opt.tell(x1, 3.) assert len(r1.models) == 0 x2 = opt.ask() # random point assert x1 != x2 # second call to tell() r2 = opt.tell(x2, 4.) if base_estimator.lower() == 'dummy': assert len(r2.models) == 0 else: assert len(r2.models) == 1 # this is the first non-random point x3 = opt.ask() assert x2 != x3 x4 = opt.ask() r3 = opt.tell(x3, 1.) # no new information was added so should be the same, unless we are using # the dummy estimator which will forever return random points and never # fits any models if base_estimator.lower() == 'dummy': assert x3 != x4 assert len(r3.models) == 0 else: assert x3 == x4 assert len(r3.models) == 2
def test_model_queue_size(): # Check if model_queue_size limits the model queue size base_estimator = ExtraTreesRegressor(random_state=2) opt = Optimizer([(-2.0, 2.0)], base_estimator, n_initial_points=1, acq_optimizer="sampling", model_queue_size=2) opt.run(bench1, n_iter=3) # tell() computes the next point ready for the next call to ask() # hence there are three after three iterations assert_equal(len(opt.models), 2) assert_equal(len(opt.Xi), 3) opt.ask() assert_equal(len(opt.models), 2) assert_equal(len(opt.Xi), 3) assert_equal(opt.ask(), opt.ask())
def test_multiple_asks(): # calling ask() multiple times without a tell() inbetween should # be a "no op" base_estimator = ExtraTreesRegressor(random_state=2) opt = Optimizer([(-2.0, 2.0)], base_estimator, n_random_starts=1, acq_optimizer="sampling") opt.run(bench1, n_iter=3) # tell() computes the next point ready for the next call to ask() # hence there are three after three iterations assert_equal(len(opt.models), 3) assert_equal(len(opt.Xi), 3) opt.ask() assert_equal(len(opt.models), 3) assert_equal(len(opt.Xi), 3) assert_equal(opt.ask(), opt.ask())
def test_dict_list_space_representation(): """ Tests whether the conversion of the dictionary and list representation of a point from a search space works properly. """ chef_space = { 'Cooking time': (0, 1200), # in minutes 'Main ingredient': [ 'cheese', 'cherimoya', 'chicken', 'chard', 'chocolate', 'chicory' ], 'Secondary ingredient': [ 'love', 'passion', 'dedication' ], 'Cooking temperature': (-273.16, 10000.0) # in Celsius } opt = Optimizer(dimensions=dimensions_aslist(chef_space)) point = opt.ask() # check if the back transformed point and original one are equivalent assert_equal( point, point_aslist(chef_space, point_asdict(chef_space, point)) )
def test_multiple_asks(): # calling ask() multiple times without a tell() inbetween should # be a "no op" base_estimator = ExtraTreesRegressor(random_state=2) opt = Optimizer([(-2.0, 2.0)], base_estimator, n_initial_points=1, acq_optimizer="sampling") opt.run(bench1, n_iter=3) # tell() computes the next point ready for the next call to ask() # hence there are three after three iterations assert_equal(len(opt.models), 3) assert_equal(len(opt.Xi), 3) opt.ask() assert_equal(len(opt.models), 3) assert_equal(len(opt.Xi), 3) assert_equal(opt.ask(), opt.ask())
def test_categorical_only2(): from numpy import linalg from skopt.space import Categorical from skopt.learning import GaussianProcessRegressor space = [Categorical([1, 2, 3]), Categorical([4, 5, 6])] opt = Optimizer(space, base_estimator=GaussianProcessRegressor(alpha=1e-7), acq_optimizer='lbfgs', n_initial_points=10, n_jobs=2) next_x = opt.ask(n_points=4) assert len(next_x) == 4 opt.tell(next_x, [linalg.norm(x) for x in next_x]) next_x = opt.ask(n_points=4) assert len(next_x) == 4 opt.tell(next_x, [linalg.norm(x) for x in next_x]) next_x = opt.ask(n_points=4) assert len(next_x) == 4
def test_acq_optimizer_with_time_api(base_estimator, acq_func): opt = Optimizer([(-2.0, 2.0),], base_estimator=base_estimator, acq_func=acq_func, acq_optimizer="sampling", n_initial_points=2) x1 = opt.ask() opt.tell(x1, (bench1(x1), 1.0)) x2 = opt.ask() res = opt.tell(x2, (bench1(x2), 2.0)) # x1 and x2 are random. assert_true(x1 != x2) assert_true(len(res.models) == 1) assert_array_equal(res.func_vals.shape, (2,)) assert_array_equal(res.log_time.shape, (2,)) # x3 = opt.ask() with pytest.raises(TypeError) as e: opt.tell(x2, bench1(x2))
def test_acq_optimizer_with_time_api(base_estimator, acq_func): opt = Optimizer([(-2.0, 2.0),], base_estimator=base_estimator, acq_func=acq_func, acq_optimizer="sampling", n_initial_points=2) x1 = opt.ask() opt.tell(x1, (bench1(x1), 1.0)) x2 = opt.ask() res = opt.tell(x2, (bench1(x2), 2.0)) # x1 and x2 are random. assert x1 != x2 assert len(res.models) == 1 assert_array_equal(res.func_vals.shape, (2,)) assert_array_equal(res.log_time.shape, (2,)) # x3 = opt.ask() with pytest.raises(TypeError) as e: opt.tell(x2, bench1(x2))
def test_categorical_only(): from skopt.space import Categorical cat1 = Categorical([2, 3, 4, 5, 6, 7, 8, 9, 10, 11]) cat2 = Categorical([2, 3, 4, 5, 6, 7, 8, 9, 10, 11]) opt = Optimizer([cat1, cat2]) for n in range(15): x = opt.ask() res = opt.tell(x, 12 * n) assert len(res.x_iters) == 15 next_x = opt.ask(n_points=4) assert len(next_x) == 4 cat3 = Categorical(["2", "3", "4", "5", "6", "7", "8", "9", "10", "11"]) cat4 = Categorical(["2", "3", "4", "5", "6", "7", "8", "9", "10", "11"]) opt = Optimizer([cat3, cat4]) for n in range(15): x = opt.ask() res = opt.tell(x, 12 * n) assert len(res.x_iters) == 15 next_x = opt.ask(n_points=4) assert len(next_x) == 4
def test_defaults_are_equivalent(): # check that the defaults of Optimizer reproduce the defaults of # gp_minimize space = [(-5., 10.), (0., 15.)] opt = Optimizer(space, random_state=1) for n in range(15): x = opt.ask() res_opt = opt.tell(x, branin(x)) res_min = gp_minimize(branin, space, n_calls=15, random_state=1) assert res_min.space == res_opt.space # tolerate small differences in the points sampled assert np.allclose(res_min.x_iters, res_opt.x_iters, atol=1e-5) assert np.allclose(res_min.x, res_opt.x, atol=1e-5)
def test_defaults_are_equivalent(): # check that the defaults of Optimizer reproduce the defaults of # gp_minimize space = [(-5., 10.), (0., 15.)] #opt = Optimizer(space, 'ET', acq_func="EI", random_state=1) opt = Optimizer(space, random_state=1) for n in range(12): x = opt.ask() res_opt = opt.tell(x, branin(x)) #res_min = forest_minimize(branin, space, n_calls=12, random_state=1) res_min = gp_minimize(branin, space, n_calls=12, random_state=1) assert res_min.space == res_opt.space # tolerate small differences in the points sampled assert np.allclose(res_min.x_iters, res_opt.x_iters) #, atol=1e-5) assert np.allclose(res_min.x, res_opt.x) #, atol=1e-5) res_opt2 = opt.get_result() assert np.allclose(res_min.x_iters, res_opt2.x_iters) # , atol=1e-5) assert np.allclose(res_min.x, res_opt2.x) # , atol=1e-5)
def crossval_optimize_params(self, opt_metric, dataset, cv=3, opt_evals=50, metrics=None, verbose=False, client=None, workers=1, timeout=100, push_data=False, data_check=True, **kwargs): """Find optimal hyperparameters for all models Args: opt_metric (modelgym.metrics.Metric): metric to optimize dataset (modelgym.utils.XYCDataset or None): dataset cv (int or list of tuples of (XYCDataset, XYCDataset)): if int, then number of cross-validation folds or cross-validation folds themselves otherwise. opt_evals (int): number of cross-validation evaluations metrics (list of modelgym.metrics.Metric, optional): additional metrics to evaluate verbose (bool): Enable verbose output. **kwargs: ignored Note: if cv is int, than dataset is split into cv parts for cross validation. Otherwise, cv folds are used. """ for name, model_space in self.model_spaces.items(): self.ind2names[name] = [param.name for param in model_space.space] if metrics is None: metrics = [opt_metric] metrics.append(opt_metric) if isinstance(dataset, Path) or isinstance(dataset, str): if Path(dataset).expanduser().exists(): dataset = read_csv(dataset) else: raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), dataset) if isinstance(dataset, DataFrame): if data_check: if dataset.isnull().values.any(): raise ValueError("Dataset has NA values") if "y" not in list(dataset.columns): raise ValueError("Dataset doesn't have 'y' column") logging.info("Dataset is ok") else: raise ValueError( "Dataset should be DataFrame or path to the DataFrame") data_path = "" if client is None: cv_pairs = cv_split(dataset, cv) else: with tempfile.NamedTemporaryFile() as temp: dataset.to_csv(temp.name, index=False) data_path = client.send_data(temp.name, push_data) for name, model_space in self.model_spaces.items(): if client is None: fn = lambda params: self._eval_fn(model_type=model_space. model_class, params=params, cv=cv_pairs, metrics=metrics, verbose=verbose, space_name=name) best = self.optimizer(fn, model_space.space, n_calls=opt_evals, n_random_starts=min(1, opt_evals)) else: optimizer = Optimizer(dimensions=model_space.space, random_state=1, acq_func="gp_hedge") for _ in log_progress(range(opt_evals), every=1): x = optimizer.ask( n_points=workers) # x is a list of n_points points x_named = [] for params in x: x_named.append({ self.ind2names[name][i]: params[i] for i in range(len(params)) }) job_id_list = [] for model_params in x_named: model_info = { "models": [{ "type": model_space.model_class.__name__, "params": model_params }], "metrics": [m.name for m in metrics[1:]], "return_models": False, "cv": cv } job_id_list.append( client.eval_model(model_info=model_info, data_path=data_path)) result_list = client.gather_results(job_id_list, timeout=timeout) if result_list == []: continue y_succeed = [ result for result in result_list if not result is None ] x_succeed = [ x_dot for i, x_dot in enumerate(x) if not result_list[i] is None ] self.logs += y_succeed for res in y_succeed: if self.best_results.get(name) is None: self.best_results[name] = {"output": {"loss": 0}} if res.get("output").get( "loss") < self.best_results.get(name).get( "output").get("loss"): self.best_results[name] = res if y_succeed != []: best = optimizer.tell(x_succeed, [ res.get("output").get("loss") for res in y_succeed ]) return self.best_results
# Search from 0.0 to 6.0 dimensions = ((0.0, 6.0), ) # Initialize estimator. gpr = GaussianProcessRegressor(kernel=Matern(), noise=0.0) optimizer = Optimizer(dimensions=dimensions, base_estimator=gpr, n_random_starts=0, acq_func="LCB", random_state=0) # Tell some points to the optimizer and ask for the next point. X = np.reshape(np.linspace(5.0, 6.0, 10), (-1, 1)).tolist() y = [black_box(xi) for xi in X] optimizer.tell(X, y) x_cand = optimizer.ask() y_cand = black_box(x_cand) plot = plot_space(X, y, optimizer.models[-1], x_cand) plot.show() # Tell and ask again. optimizer.tell(x_cand, y_cand) X = X + [x_cand] y = y + [y_cand] x_cand = optimizer.ask() y_cand = black_box(x_cand) plot = plot_space(X, y, optimizer.models[-1], x_cand) plot.show() # Tell and ask again. optimizer.tell(x_cand, y_cand)