def job(loss): scorer = SklearnScorer( X, y, words, postfx, rules_apply=0.8, max_endings=75 ) space = { 'alpha': (0.0001, 1.0, 'log-uniform'), 'l1_ratio': (0.001, 0.999), 'loss': [loss], 'epsilon': (0.001, 10.0, 'log-uniform'), 'threshold': (0.00001, 0.001, 'log-uniform'), } opt = Optimizer(point_aslist(space, space)) for i in range(128): p = opt.ask() p = point_asdict(space, p) f = scorer(p) opt.tell(point_aslist(space, p), f) print(f) print(i, scorer.best_obj, scorer.best_params) import json json.dump(scorer.result, open(loss + '.json', 'w'), indent=2, sort_keys=True)
def test_dict_list_space_representation(): """ Tests whether the conversion of the dictionary and list representation of a point from a search space works properly. """ chef_space = { 'Cooking time': (0, 1200), # in minutes 'Main ingredient': [ 'cheese', 'cherimoya', 'chicken', 'chard', 'chocolate', 'chicory' ], 'Secondary ingredient': [ 'love', 'passion', 'dedication' ], 'Cooking temperature': (-273.16, 10000.0) # in Celsius } opt = Optimizer(dimensions=dimensions_aslist(chef_space)) point = opt.ask() # check if the back transformed point and original one are equivalent assert_equal( point, point_aslist(chef_space, point_asdict(chef_space, point)) )
def _step(self, X, y, search_space, optimizer, groups=None, n_points=1): """Generate n_jobs parameters and evaluate them in parallel. """ # get parameter values to evaluate params = optimizer.ask(n_points=n_points) # convert parameters to python native types params = [[np.asscalar(np.array(v)) for v in p] for p in params] # make lists into dictionaries params_dict = [point_asdict(search_space, p) for p in params] # HACK: self.cv_results_ is reset at every call to _fit, keep current all_cv_results = self.cv_results_ # HACK: this adds compatibility with different versions of sklearn refit = self.refit self.refit = False self._fit(X, y, groups, params_dict) self.refit = refit # merge existing and new cv_results_ for k in self.cv_results_: all_cv_results[k].extend(self.cv_results_[k]) self.cv_results_ = all_cv_results self.best_index_ = np.argmax(self.cv_results_['mean_test_score']) # feed the point and objective back into optimizer local_results = self.cv_results_['mean_test_score'][-len(params):] # optimizer minimizes objective, hence provide negative score return optimizer.tell(params, [-score for score in local_results])
def _step(self, X, y, search_space, optimizer, groups=None, n_points=1): """Generate n_jobs parameters and evaluate them in parallel. """ if isinstance(n_points, np.int64): n_points = int(n_points) # NOTE: THIS IS THE CODE ADDED TO BayesSearchCV (see class docstr) # get parameter values to evaluate params = optimizer.ask(n_points=n_points) params_dict = [point_asdict(search_space, p) for p in params] # HACK: self.cv_results_ is reset at every call to _fit, keep current all_cv_results = self.cv_results_ # HACK: this adds compatibility with different versions of sklearn refit = self.refit self.refit = False self._fit(X, y, groups, params_dict) self.refit = refit # merge existing and new cv_results_ for k in self.cv_results_: all_cv_results[k].extend(self.cv_results_[k]) self.cv_results_ = all_cv_results self.best_index_ = np.argmax(self.cv_results_['mean_test_score']) # feed the point and objective back into optimizer local_results = self.cv_results_['mean_test_score'][-len(params):] # optimizer minimizes objective, hence provide negative score return optimizer.tell(params, [-score for score in local_results])
def __call__(self, x): x_dict = point_asdict(self.search_space, x) x_dict['model__lr'] = np.log(x_dict['model__lr']) x_list = point_aslist(self.search_space, x_dict) score = ffnn_predict([x_list], self.simulator)[0][0] return -np.mean(score)
def _step( self, optimizer: Optimizer, data: Dataset, metrics: List[str], cv: Any, n_jobs: int, verbose: int, ) -> Result: """ Performs a step in the Bayesian optimization Parameters ---------- optimizer: Optimizer An instance of skopt's Optimizer data: Dataset Instance of data to train on metrics: List of str List of metrics to calculate results for cv: Any Either a CV object from sklearn or an int to specify number of folds n_jobs Number of jobs to calculate in parallel verbose Verbosity level of the method Returns ------- Result """ params = optimizer.ask() params = [np.array(p).item() for p in params] # make lists into dictionaries params_dict = point_asdict(self.param_grid, params) estimator = clone(self.estimator).set_params(**params_dict) logger.info("Fitting estimator...") logger.debug("Fitting estimator %s", estimator) result = Result.from_estimator( estimator=estimator, data=data, metrics=metrics, cv=cv, n_jobs=n_jobs, verbose=verbose, ) logger.info("Result: %s", result) optimizer.tell([params], [-result.metrics[0].score]) return result
def _step(self, X, y, search_space, optimizer, groups=None, n_points=1): """Generate n_jobs parameters and evaluate them in parallel.""" # get parameter values to evaluate # TODO: Until n_points is supported, we will wrap the return value in a list params = [optimizer.ask(n_points=n_points)] # convert parameters to python native types params = [[np.array(v).item() for v in p] for p in params] # make lists into dictionaries params_dict = [point_asdict(search_space, p) for p in params] # HACK: self.cv_results_ is reset at every call to _fit, keep current all_cv_results = self.cv_results_ # HACK: this adds compatibility with different versions of sklearn refit = self.refit self.refit = False self._fit(X, y, groups, params_dict) self.refit = refit # merge existing and new cv_results_ for k in self.cv_results_: all_cv_results[k].extend(self.cv_results_[k]) all_cv_results["rank_test_score"] = list( np.asarray( rankdata(-np.array(all_cv_results["mean_test_score"]), method="min"), dtype=np.int32, )) if self.return_train_score: all_cv_results["rank_train_score"] = list( np.asarray( rankdata(-np.array(all_cv_results["mean_train_score"]), method="min"), dtype=np.int32, )) self.cv_results_ = all_cv_results self.best_index_ = np.argmax(self.cv_results_["mean_test_score"]) # feed the point and objective back into optimizer local_results = self.cv_results_["mean_test_score"][-len(params):] # optimizer minimizes objective, hence provide negative score return optimizer.tell( params, [-score for score in local_results], n_samples=self.n_samples, gp_samples=self.gp_samples, gp_burnin=self.gp_burnin, progress=False, )
def __call__(self, x): model = Pipeline([ ('features', ColumnSubset()), ('scaler', StandardScaler()), ('model', GradientBoostingRegressor()), ]) x_dict = point_asdict(self.search_space, x) model.set_params(**x_dict) X, y = self.dataset scores = cross_val_score(model, X, y) return -np.mean(scores)
def best_params_(self): check_is_fitted(self, "cv_results_") if self.return_policy == "best_setting" or len(self.optimizers_) > 1: if len(self.optimizers_) > 1: logging.warning( "Return policy 'best_mean' is incompatible with multiple search" "spaces. Reverting to 'best_setting'." ) return self.cv_results_["params"][self.best_index_] if self.return_policy == "best_mean": random_state = self.optimizer_kwargs_["random_state"] # We construct a result object manually here, since in skopt versions up to # 0.7.4 they were not saved yet: opt = self.optimizers_[0] result_object = create_result( opt.Xi, opt.yi, space=opt.space, rng=random_state, models=[opt.gp] ) point, _ = expected_minimum( res=result_object, n_random_starts=100, random_state=random_state, ) dict = point_asdict(self.search_spaces, point) return dict
def values_to_dict(self, values): return point_asdict(self.search_space, values)
print(search_result.fun) ############################################################################# for fitness, x in sorted(zip(search_result.func_vals, search_result.x_iters)): print(fitness, x) ############################################################################# space = search_result.space print(search_result.x_iters) search_space = {name: space[name][1] for name in space.dimension_names} print(point_asdict(search_space, default_parameters)) ############################################################################# print("Plotting now ...") _ = plot_histogram(result=search_result, dimension_identifier='learning_rate', bins=20) plt.show() ############################################################################# _ = plot_objective_2D(result=search_result, dimension_identifier1='learning_rate', dimension_identifier2='num_dense_nodes') plt.show() #############################################################################