def tune_parameter(X, y, clf, params): # X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42) gs = BayesSearchCV(estimator=clf, search_spaces=params, scoring="f1", n_iter=100, optimizer_kwargs={"base_estimator": "GP"}, verbose=2, n_jobs=-1, cv=4, refit=True, random_state=1234) gs.fit(X, y, callback=DeltaXStopper(0.000001)) best_params = gs.best_params_ best_score = gs.best_score_ print(best_params) print(best_score) str_time = str(datetime.datetime.now().strftime("%Y-%m-%d_%H-%M")) with open("kuaishou_stats.csv", 'a', newline='') as f: writer = csv.writer(f) writer.writerow(["the best params for svm: "]) for key, value in best_params.items(): writer.writerow([key, value]) writer.writerow(["the best score for svm: ", best_score, str_time]) return gs
def optimize(config, objective_fn, initial_points=None, logger_fn=None): best_params = {} best_score = 0.0 metadata = None def skopt_logger(result): x0 = result.x_iters # list of input points y0 = result.func_vals # evaluation of input points num_iters = len(x0) params = config.param_dict_from_values(x0[-1]) params = _convert_param_types(params) score = -1 * y0[-1] if logger_fn: logger_fn(num_iters, score, params) if config.selected_method == 'grid': grid_space = config.param_dict_from_values( [list(dim.categories) for dim in config.space]) for i, params in enumerate(list(ParameterGrid(grid_space))): # keep the same order as in the configuration to make reading logs easier ordered_params = {k: params[k] for k in config.dimension_names()} score = -1 * objective_fn(ordered_params) if logger_fn: logger_fn(i + 1, score, ordered_params) if score > best_score: best_score = score best_params = ordered_params.copy() elif config.selected_method == 'bayesian' or config.selected_method == 'random': if config.selected_method == 'random': config.num_initial_points = config.num_iterations def list_based_objective_fn(param_values): """Convert params to a dict first.""" return objective_fn(config.param_dict_from_values(param_values)) res = gp_minimize( func=list_based_objective_fn, dimensions=config.space, n_calls=config. num_iterations, # total calls to func, includes initial points n_initial_points=config. num_initial_points, # random points to seed process verbose=False, callback=[DeltaXStopper(0.001), skopt_logger], x0=initial_points) best_params = config.param_dict_from_values(res.x) best_score = -1 * res.fun metadata = res else: raise ValueError(f"Unsupported method: {config.selected_method}") final_params = merge_params([config.default, best_params]) return best_score, _convert_param_types(best_params), _convert_param_types( final_params), metadata
def get_bayes_scikit_score_cv(X_train,y_train,X_test,y_test, X_val=None, y_val= None, max_evals = 25, folds=5, original = None): space = get_baesian_space(dictem = True) opt_cat = BayesSearchCV(CatBoostClassifier(logging_level='Silent'), space['CAT'], n_iter = max_evals, random_state = 0) opt_xgb = BayesSearchCV(XGBClassifier(), space['XGB'], n_iter = max_evals, random_state = 0) opt_lgbm = BayesSearchCV(LGBMClassifier(), space['LGBM'], n_iter = max_evals, random_state = 0) _ = opt_cat.fit(X_train, y_train, callback = [DeltaXStopper(0.01), DeltaYStopper(0.01)]) __ = opt_xgb.fit(X_train, y_train, callback = [DeltaXStopper(0.01), DeltaYStopper(0.01)]) ___ = opt_lgbm.fit(X_train, y_train, callback = [DeltaXStopper(0.01), DeltaYStopper(0.01)]) scores = [opt_cat.score(X_test, y_test), opt_xgb.score(X_test, y_test), opt_lgbm.score(X_test, y_test)] train_scores = [opt_cat.best_score_, opt_xgb.best_score_, opt_lgbm.best_score_] score = max(scores) cross_score = max(train_scores) neptune.log_metric(f'skopt-{max_evals}-iterations-{folds}-folds', score) neptune.log_metric('skopt train holdout score', cross_score) return score
def test_early_stopping_delta_x(minimizer): n_calls = 11 res = minimizer(bench1, callback=DeltaXStopper(0.1), dimensions=[(-1., 1.)], x0=[[-0.1], [0.1], [-0.9]], n_calls=n_calls, n_random_starts=0, random_state=1) assert len(res.x_iters) < n_calls
def test_early_stopping_delta_x_empty_result_object(minimizer): # check that the callback handles the case of being passed an empty # results object, e.g. at the start of the optimization loop n_calls = 15 res = minimizer(bench1, callback=DeltaXStopper(0.1), dimensions=[(-1., 1.)], n_calls=n_calls, n_random_starts=1, random_state=1) assert len(res.x_iters) < n_calls
def optimized_bayesian_search_2(X,y,model,parameter,seed,eval_method): start_time = time.time() search= BayesSearchCV( model, parameter, n_jobs=-1, n_iter=200, scoring=eval_method, cv=5, random_state=seed, verbose= 0, optimizer_kwargs={'base_estimator': 'GP'} ) search.fit(X,y,callback=DeltaXStopper(0.0001)) end_time = time.time() return [search.cv_results_,search.best_index_,end_time-start_time]
print((title + " took %.2f seconds, candidates checked: %d, best CV score: %.3f " + u"\u00B1" + " %.3f") % (time() - start, len(optimizer.cv_results_['params']), best_score, best_score_std)) print('Best parameters:') pprint.pprint(best_params) print() return best_params # Converting average precision score into a scorer suitable for model selection avg_prec = make_scorer(average_precision_score, greater_is_better=True, needs_proba=True) # Setting a 5-fold stratified cross-validation (note: shuffle=True) skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=0) opt = BayesSearchCV(clf, search_spaces, scoring=avg_prec, cv=skf, n_iter=40, n_jobs=-1, refit=True, optimizer_kwargs={'base_estimator': 'GP'}, random_state=22, return_train_score=False, ) best_params = report_perf(opt, X, y, 'LightGBM', callbacks=[DeltaXStopper(0.001), DeadlineStopper(60 * 5)])
def _check_parameters(self): """Check the validity of the input parameters.""" if self.mapping is None: self.mapping = {str(v): v for v in sorted(self.y.unique())} if self.scaled is None: self.scaled = check_scaling(self.X) # Create model subclasses ================================== >> models = [] for m in self._models: if isinstance(m, str): acronym = get_acronym(m, must_be_equal=False) # Check if packages for non-sklearn models are available if acronym in OPTIONAL_PACKAGES: try: importlib.import_module(OPTIONAL_PACKAGES[acronym]) except ImportError: raise ValueError( f"Unable to import the {OPTIONAL_PACKAGES[acronym]} " "package. Make sure it is installed.") # Check for regression/classification-only models if self.goal.startswith("class") and acronym in ONLY_REG: raise ValueError( f"The {acronym} model can't perform classification tasks!" ) elif self.goal.startswith("reg") and acronym in ONLY_CLASS: raise ValueError( f"The {acronym} model can't perform regression tasks!") models.append(MODEL_LIST[acronym](self, acronym + m[len(acronym):])) elif not isinstance(m, BaseModel): # Model is custom estimator models.append(CustomModel(self, estimator=m)) else: # Model is already a model subclass (can happen with reruns) models.append(m) self._models = CustomDict({m.name: m for m in models}) # Check validity metric ==================================== >> if None in self._metric: self._metric = CustomDict(get_default_metric(self.task)) # Ignore if it's the same metric as previous call elif not all([hasattr(m, "name") for m in self._metric]): self._metric = self._prepare_metric( metric=self._metric, greater_is_better=self.greater_is_better, needs_proba=self.needs_proba, needs_threshold=self.needs_threshold, ) # Check validity sequential parameters ===================== >> for param in ["n_calls", "n_initial_points", "bagging"]: p = lst(getattr(self, param)) if len(p) != 1 and len(p) != len(self._models): raise ValueError( f"Invalid value for the {param} parameter. Length " "should be equal to the number of models, got len" f"(models)={len(self._models)} and len({param})={len(p)}.") for i, model in enumerate(self._models): if param in ("n_calls", "bagging") and p[i % len(p)] < 0: raise ValueError( f"Invalid value for the {param} parameter. " f"Value should be >=0, got {p[i % len(p)]}.") elif param == "n_initial_points" and p[i % len(p)] <= 0: raise ValueError( f"Invalid value for the {param} parameter. " f"Value should be >0, got {p[i % len(p)]}.") setattr(model, "_" + param, p[i % len(p)]) # Prepare bo parameters ===================================== >> # Choose a base estimator (GP is chosen as default) self._base_estimator = self.bo_params.get("base_estimator", "GP") if isinstance(self._base_estimator, str): if self._base_estimator.lower() not in ("gp", "et", "rf", "gbrt"): raise ValueError( f"Invalid value for the base_estimator parameter, got " f"{self._base_estimator}. Value should be one of: 'GP', " f"'ET', 'RF', 'GBRT'.") if self.bo_params.get("callbacks"): self._callbacks = lst(self.bo_params["callbacks"]) if "max_time" in self.bo_params: if self.bo_params["max_time"] <= 0: raise ValueError( "Invalid value for the max_time parameter. " f"Value should be >0, got {self.bo_params['max_time']}.") self._callbacks.append(DeadlineStopper(self.bo_params["max_time"])) if "delta_x" in self.bo_params: if self.bo_params["delta_x"] < 0: raise ValueError( "Invalid value for the delta_x parameter. " f"Value should be >=0, got {self.bo_params['delta_x']}.") self._callbacks.append(DeltaXStopper(self.bo_params["delta_x"])) if "delta_y" in self.bo_params: if self.bo_params["delta_y"] < 0: raise ValueError( "Invalid value for the delta_y parameter. " f"Value should be >=0, got {self.bo_params['delta_y']}.") self._callbacks.append( DeltaYStopper(self.bo_params["delta_y"], n_best=5)) if self.bo_params.get("plot"): self._callbacks.append(PlotCallback(self)) if "cv" in self.bo_params: if self.bo_params["cv"] <= 0: raise ValueError( "Invalid value for the max_time parameter. " f"Value should be >=0, got {self.bo_params['cv']}.") self._cv = self.bo_params["cv"] if "early_stopping" in self.bo_params: if self.bo_params["early_stopping"] <= 0: raise ValueError( "Invalid value for the early_stopping parameter. " f"Value should be >=0, got {self.bo_params['early_stopping']}." ) self._early_stopping = self.bo_params["early_stopping"] # Add custom dimensions to every model subclass if self.bo_params.get("dimensions"): for name, model in self._models.items(): # If not dict, the dimensions are for all models if not isinstance(self.bo_params["dimensions"], dict): model._dimensions = self.bo_params["dimensions"] else: # Dimensions for every specific model for key, value in self.bo_params["dimensions"].items(): # Parameters for this model only if key.lower() == name: model._dimensions = value break kwargs = [ "base_estimator", "max_time", "delta_x", "delta_y", "early_stopping", "cv", "callbacks", "dimensions", "plot", ] # The remaining bo_params are added as kwargs to the optimizer self._bo_kwargs = { k: v for k, v in self.bo_params.items() if k not in kwargs } # Prepare est_params ======================================= >> if self.est_params: for name, model in self._models.items(): params = {} for key, value in self.est_params.items(): # Parameters for this model only if key.lower() == name: params.update(value) # Parameters for all models elif key.lower() not in self._models.keys(): params.update({key: value}) for key, value in params.items(): if key.endswith("_fit"): model._est_params_fit[key[:-4]] = value else: model._est_params[key] = value
def main(): start_time_main = time.time() print_info('Reading config files...', ':') run_config = config_file_to_dict(config_path + 'run_params.conf') data_config = config_file_to_dict(config_path + 'data_params.conf') model_config = config_file_to_dict(config_path + 'model_params.conf') if run_mode_user in run_config: frac_train_sample = run_config[run_mode_user]['frac_train_sample'] num_test_samples = run_config[run_mode_user]['num_test_samples'] num_CV_folds = run_config[run_mode_user]['num_CV_folds'] do_optimize_params = run_config[run_mode_user]['do_optimize_params'] n_iter = run_config[run_mode_user]['n_iter'] print_info('Chosen run mode is {}: {}'.format( run_mode_user, run_config[run_mode_user])) else: raise KeyError('{} is not a valid run mode setting ' \ '(use, e.g., "run_params")'.format( run_mode_user)) # collection of performance measures to be applied to the test set(s) scoring_funcs = ['accuracy_score', 'precision_score', 'recall_score', \ 'f1_score'] final_results_labels = [ 'dataset', 'model', 'model_params', 'num_test_sets', 'num_CV_folds', 'elapsed_time_train', 'elapsed_time_test' ] final_results_labels += ['test_{}_1fold'.format(i) for i in scoring_funcs] final_results_labels += ['train_{}'.format(i) for i in scoring_funcs] final_results_labels += ['test_{}'.format(i) for i in scoring_funcs] final_results_labels += [ 'test_{}_bootstrap'.format(i) for i in scoring_funcs ] final_results_labels += [ 'test_{}_diff_max'.format(i) for i in scoring_funcs ] final_results_labels += [ 'test_{}_diff_max_bootstrap'.format(i) for i in scoring_funcs ] final_results_labels += [ 'test_{}_diff_mean'.format(i) for i in scoring_funcs ] final_results_labels += [ 'test_{}_diff_std'.format(i) for i in scoring_funcs ] final_results_labels += [ 'test_{}_diff_mean_bootstrap'.format(i) for i in scoring_funcs ] final_results_labels += [ 'test_{}_diff_std_bootstrap'.format(i) for i in scoring_funcs ] final_results = pd.DataFrame(columns=final_results_labels) # loop over all sections of the data params config file for d_cnt, d in enumerate(data_config): print_info( 'Processing dataset: {} ({} of {})'.format(d, d_cnt + 1, len(data_config)), '=', 50) current_data_results = {} current_data_params = data_config[d] check_data_config_requirements(current_data_params) print_info('Loading data...', ':') data = load_data(current_data_params) print_info('Preparing target vector...', ':') X = data.drop(current_data_params['data_target_col'], axis=1) y = data[current_data_params['data_target_col']] y = parse_target_labels( y, current_data_params['data_target_positive_label'], current_data_params['data_target_negative_label']) del data print_info('Dimensions of feature matrix X: {}'.format(X.shape)) print_info('Dimensions of target vector y: {}'.format(y.shape)) print_info('Splitting the data: splitting off the training sample...', ':') X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=1 - frac_train_sample) del X, y print_info('Preprocessing the data...', ':') pm = PreprocessingManager(om.get_session_folder()) for func in current_data_params['data_preprocessing']: X_train = getattr(pm, func)(X_train, False) X_test = getattr(pm, func)(X_test, True) print_class_counts(y_train, 'training', background=0, signal=1) print_class_counts(y_test, 'test', background=0, signal=1) # hyperparameter optimization, if required if num_CV_folds is None: print_info('Optimizing the number of cross-validation folds...', ':') num_CV_folds = get_optimal_CV_n_folds(X_train.as_matrix(), y_train.as_matrix()) for mod in model_config: print_info('Training model: {}'.format(mod), '-', 50) try: model_params = model_config[mod] model = supported_models[mod](**model_params) except KeyError: raise KeyError('Model {} not supported. Choose a valid input ' \ 'from this list: {}'.format(mod, supported_models)) fitkwargs = {'X': X_train, 'y': y_train} if do_optimize_params: print_info('Optimizing hyperparameters...', ':') model = hyperparameter_search(model, n_iter, num_CV_folds) if mod != 'GaussianNB': fitkwargs['callback'] = DeltaXStopper(1e-2) start_time_train = time.time() print_info('Fitting the model...', ':') model.fit(**fitkwargs) elapsed_time_train = time.time() - start_time_train model_parameters = get_search_results(model) # evaluate model on the training sample print_info('Evaluating the model on the training sample...', ':') for scoring_func in scoring_funcs: try: model_scores_train = evaluate_nfold(X_train, y_train, model, 1, scoring=scoring_func) current_data_results['train_{}'.format( scoring_func)] = model_scores_train[0] except ValueError: warnings.warn('ValueError when evaluating with {}. ' \ 'Ignoring and continuing...'.format( scoring_func)) # evaluate model on the test sample(s) print_info('Evaluating the model on the test sample(s)...', ':') test_performance_1fold = -1 # must be initialized with a negative number for t in range(1, num_test_samples + 1): start_time_test = time.time() for scoring_func in scoring_funcs: try: model_scores_test = evaluate_nfold( X_test, y_test, model, t, scoring=scoring_func, bootstrapping=False) model_scores_test_bootstrap = evaluate_nfold( X_test, y_test, model, t, scoring=scoring_func, bootstrapping=True) if test_performance_1fold < 0: test_performance_1fold = model_scores_test[0] else: pass current_data_results['test_{}_1fold'.format( scoring_func)] = test_performance_1fold current_data_results['test_{}'.format( scoring_func)] = str(model_scores_test) current_data_results['test_{}_bootstrap'.format( scoring_func)] = str(model_scores_test_bootstrap) current_data_results['test_{}_diff_max'.format( scoring_func )] = max(model_scores_test) - min(model_scores_test) current_data_results[ 'test_{}_diff_max_bootstrap'.format( scoring_func )] = max(model_scores_test_bootstrap) - min( model_scores_test_bootstrap) scores_mean, scores_std = performance_difference( model_scores_test) current_data_results['test_{}_diff_mean'.format( scoring_func)] = scores_mean current_data_results['test_{}_diff_std'.format( scoring_func)] = scores_std scores_mean_bootstrap, scores_std_bootstrap = performance_difference( model_scores_test_bootstrap) current_data_results[ 'test_{}_diff_mean_bootstrap'.format( scoring_func)] = scores_mean_bootstrap current_data_results[ 'test_{}_diff_std_bootstrap'.format( scoring_func)] = scores_std_bootstrap except ValueError: warnings.warn('ValueError when evaluating with {}. ' \ 'Ignoring and continuing...'.format( scoring_func)) current_data_results['test_{}_1fold'.format( scoring_func)] = -1 #current_data_results['test_{}'.format( # scoring_func)] = "-1" #current_data_results['test_{}_bootstrap'.format( # scoring_func)] = "-1" current_data_results['test_{}_diff_mean'.format( scoring_func)] = -1 current_data_results['test_{}_diff_std'.format( scoring_func)] = -1 current_data_results['test_{}_diff_mean_bootstrap'. format(scoring_func)] = -1 current_data_results['test_{}_diff_std_bootstrap'. format(scoring_func)] = -1 print_info('Model score differences (mean, std) for {} ' \ 'test sample folds: {:.5f}, {:.5f}'.format( t, scores_mean, scores_std)) model_params_string = ','.join('{}:{}'.format(key, val) \ for key, val in \ sorted(model_parameters.items())) current_data_results['dataset'] = str(d) current_data_results['model'] = str(mod) current_data_results['model_params'] = model_params_string current_data_results['num_test_sets'] = t current_data_results['num_CV_folds'] = num_CV_folds current_data_results['elapsed_time_train'] = elapsed_time_train current_data_results['elapsed_time_test'] = time.time( ) - start_time_test final_results = final_results.append(current_data_results, ignore_index=True) print_info('Creating results plots...', ':') scoring_func_plot = 'f1_score' train_differences = [] current_data_plot_nsplits = final_results.query( '(dataset=="{}") & (model=="{}")'.format(d, mod))['num_test_sets'] # explicit conversion to floats is necessary for the np.isfinite method, # which is implicitely called during plotting current_data_plot_xyvals = [ current_data_plot_nsplits.values.astype(np.float32) ] current_data_plot_xyvals_bootstrap = [ current_data_plot_nsplits.values.astype(np.float32) ] current_data_plot_xyvals_max = [ current_data_plot_nsplits.values.astype(np.float32) ] current_data_plot_xyvals_max_bootstrap = [ current_data_plot_nsplits.values.astype(np.float32) ] for mod in model_config: current_data_plot_xyvals.append( final_results.query('(dataset=="{}") & (model=="{}")'.format( d, mod))['test_{}_diff_mean'.format( scoring_func_plot)].values.astype(np.float32)) current_data_plot_xyvals.append( final_results.query('(dataset=="{}") & (model=="{}")'.format( d, mod))['test_{}_diff_std'.format( scoring_func_plot)].values.astype(np.float32)) current_data_plot_xyvals_max.append( final_results.query('(dataset=="{}") & (model=="{}")'.format( d, mod))['test_{}_diff_max'.format( scoring_func_plot)].values.astype(np.float32)) current_data_plot_xyvals_max.append( np.zeros(current_data_plot_xyvals_max[-1].shape)) current_data_plot_xyvals_max_bootstrap.append( final_results.query('(dataset=="{}") & (model=="{}")'.format( d, mod))['test_{}_diff_max_bootstrap'.format( scoring_func_plot)].values.astype(np.float32)) current_data_plot_xyvals_max_bootstrap.append( np.zeros(current_data_plot_xyvals_max_bootstrap[-1].shape)) train_differences.append( abs(final_results.query('(dataset=="{}") & '\ '(model=="{}")'.format( d,mod))['train_{}'.format( scoring_func_plot)].iloc[0] - final_results.query('(dataset=="{}") & (model=="{}")'.format( d,mod))['test_{}_1fold'.format(scoring_func_plot)].iloc[0]) ) current_data_plot_xyvals_bootstrap.append( final_results.query('(dataset=="{}") & (model=="{}")'.format( d, mod))['test_{}_diff_mean_bootstrap'.format( scoring_func_plot)].values.astype(np.float32)) current_data_plot_xyvals_bootstrap.append( final_results.query('(dataset=="{}") & (model=="{}")'.format( d, mod))['test_{}_diff_std_bootstrap'.format( scoring_func_plot)].values.astype(np.float32)) xmax_list = [None] for i in range(10, 100, 10): if num_test_samples > i: xmax_list.append(i) for lim in xmax_list: current_data_plot = plot_performance_diff( *current_data_plot_xyvals, labels=[m for m in model_config], train_difference=train_differences, xmax=lim, xlabel='number of samples', ylabel='mean performance difference') plot_filename = '{}_performance-diff_num-splits_full'.format(d) if lim is not None: plot_filename += '_zoomed-{}'.format(lim) om.save(current_data_plot, plot_filename) current_data_plot = plot_performance_diff( *current_data_plot_xyvals, labels=[m for m in model_config], xmax=lim, xlabel='number of samples', ylabel='mean performance difference') plot_filename = '{}_performance-diff_num-splits'.format(d) if lim is not None: plot_filename += '_zoomed-{}'.format(lim) om.save(current_data_plot, plot_filename) current_data_plot = plot_performance_diff( *current_data_plot_xyvals_max, labels=[m for m in model_config], xmax=lim, xlabel='number of samples', ylabel='maximum performance difference') plot_filename = '{}_performance-diff_max_num-splits'.format(d) if lim is not None: plot_filename += '_zoomed-{}'.format(lim) om.save(current_data_plot, plot_filename) current_data_plot = plot_performance_diff( *current_data_plot_xyvals_max_bootstrap, labels=[m for m in model_config], xmax=lim, xlabel='number of samples', ylabel='maximum performance difference') plot_filename = '{}_performance-diff_max_num-splits_bootstrap'.format( d) if lim is not None: plot_filename += '_zoomed-{}'.format(lim) om.save(current_data_plot, plot_filename) current_data_plot = plot_performance_diff( *current_data_plot_xyvals_bootstrap, labels=[m for m in model_config], train_difference=train_differences, xmax=lim, xlabel='number of samples', ylabel='mean performance difference') plot_filename = '{}_performance-diff_num-splits_full_bootstrap'.format( d) if lim is not None: plot_filename += '_zoomed-{}'.format(lim) om.save(current_data_plot, plot_filename) current_data_plot = plot_performance_diff( *current_data_plot_xyvals_bootstrap, labels=[m for m in model_config], xmax=lim, xlabel='number of samples', ylabel='mean performance difference') plot_filename = '{}_performance-diff_num-splits_bootstrap'.format( d) if lim is not None: plot_filename += '_zoomed-{}'.format(lim) om.save(current_data_plot, plot_filename) print_info('Saving the final results...', ':') om.save(final_results, '{}_final-results'.format(d)) final_results_dict = final_results.to_dict('dict') final_results_dict['relation'] = str(d) # needed for ARFF final_results_dict['description'] = u'' # needed for ARFF om.save(final_results, '{}_final-results'.format(d), to_arff=True) print_info('\n') print_info( 'Everything done. (Elapsed overall time: {} seconds)\n'.format( time.time() - start_time_main))
def _optimize_skopt() -> Union[pd.Series, Tuple[pd.Series, pd.Series], Tuple[pd.Series, pd.Series, dict]]: nonlocal max_tries max_tries = get_max_tries(max_tries, _grid_size) dimensions = construct_dimensions(kwargs) memoized_run = lru_cache()(lambda tup: self.run(**dict(tup))) INVALID = 1e300 progress = tqdm(dimensions, total=max_tries, desc='Backtest.optimize', leave=False) @use_named_args(dimensions=dimensions) def objective_function(**params): progress.update(1) # Check constraints # TODO: Adjust after https://github.com/scikit-optimize/scikit-optimize/pull/971 if not constraint(AttrDict(params)): return INVALID res = memoized_run(tuple(params.items())) value = -maximize(res) if np.isnan(value): return INVALID return value with warnings.catch_warnings(): warnings.filterwarnings('ignore', 'The objective has been evaluated at this point before.') res = forest_minimize( func=objective_function, dimensions=dimensions, n_calls=max_tries, base_estimator=ExtraTreesRegressor(n_estimators=20, min_samples_leaf=2), acq_func='LCB', kappa=3, n_jobs=-1, n_initial_points=min(max_tries, 20 + 3 * len(kwargs)), initial_point_generator='lhs', # 'sobel' requires n_initial_points ~ 2**N callback=DeltaXStopper(9e-7), random_state=random_state, ) stats = self.run(**dict(zip(kwargs.keys(), res.x))) output = [stats] if return_heatmap: heatmap = pd.Series( dict(zip(map(tuple, res.x_iters), -res.func_vals)), name=maximize_key, ) heatmap.index.names = kwargs.keys() heatmap = heatmap[heatmap != -INVALID] heatmap.sort_index(inplace=True) output.append(heatmap) if return_optimization: valid = res.func_vals != INVALID res.x_iters = list(compress(res.x_iters, valid)) res.func_vals = res.func_vals[valid] output.append(res) progress.clear() progress.close() return stats if len(output) == 1 else tuple(output)