def grid_scores_(self): grid_scores = list() scores_per_config = defaultdict(list) config_list = list() for run_key in self.runhistory_.data: run_value = self.runhistory_.data[run_key] config_id = run_key.config_id cost = run_value.cost if config_id not in config_list: config_list.append(config_id) scores_per_config[config_id].append(cost) for config_id in config_list: scores = [1 - score for score in scores_per_config[config_id]] mean_score = np.mean(scores) config = self.runhistory_.ids_config[config_id] grid_score = _CVScoreTuple(config.get_dictionary(), mean_score, scores) grid_scores.append(grid_score) return grid_scores
def grid_scores_(self): warnings.warn( "The grid_scores_ attribute was deprecated in version 0.18" " in favor of the more elaborate cv_results_ attribute." " The grid_scores_ attribute will not be available from 0.20", DeprecationWarning) check_is_fitted(self, 'cv_results_') grid_scores = list() for i, (params, mean, std) in enumerate( zip(self.cv_results_['params'], self.cv_results_['mean_test_score'], self.cv_results_['std_test_score'])): scores = np.array(list(self.cv_results_['split%d_test_score' % s][i] for s in range(self.n_splits_)), dtype=np.float64) grid_scores.append(_CVScoreTuple(params, mean, scores)) return grid_scores
def grid_scores_(self): import numpy as np if self.multimetric_: raise AttributeError("grid_scores_ attribute is not available for" " multi-metric evaluation.") warnings.warn( "The grid_scores_ attribute was deprecated in version 0.18" " in favor of the more elaborate cv_results_ attribute." " The grid_scores_ attribute will not be available from 0.20", DeprecationWarning) grid_scores = list() for i, (params, mean, std) in enumerate( zip(self.cv_results_['params'], self.cv_results_['mean_test_score'], self.cv_results_['std_test_score'])): scores = np.array(list(self.cv_results_['split%d_test_score' % s][i] for s in range(self.n_splits)), dtype=np.float64) grid_scores.append(_CVScoreTuple(params, mean, scores)) return grid_scores
def fit(self, X, y=None, labels=None): #return self._fit( # X, y, labels, # parameter_iterable # parameter_iterable, \in Sized, it actually does len(parameter_iterable) in _fit #) # FIXME code duplication from BaseSearchCV._fit estimator = self.estimator cv = _split.check_cv(self.cv, y, classifier=is_classifier(estimator)) self.scorer_ = check_scoring(self.estimator, scoring=self.scoring) n_samples = _num_samples(X) X, y, labels = indexable(X, y, labels) if y is not None: if len(y) != n_samples: raise ValueError('Target variable (y) has a different number ' 'of samples (%i) than data (X: %i samples)' % (len(y), n_samples)) n_splits = cv.get_n_splits(X, y, labels) if self.verbose > 0 and isinstance(parameter_iterable, Sized): n_candidates = len(parameter_iterable) print("Fitting {0} folds for each of {1} candidates, totalling" " {2} fits".format(n_splits, n_candidates, n_candidates * n_splits)) base_estimator = clone(self.estimator) pre_dispatch = self.pre_dispatch # FIXME how to handle pre_dispatch # FIXME recursively getting new parameters to evaluate # parameter_iterable = ... # the magic # # # The evaluation (Parallel) stuff # out = Parallel( # n_jobs=self.n_jobs, verbose=self.verbose, # pre_dispatch=pre_dispatch # )(delayed(_fit_and_score)(clone(base_estimator), X, y, self.scorer_, # train, test, self.verbose, parameters, # self.fit_params, return_parameters=True, # error_score=self.error_score) # for parameters in parameter_iterable # for train, test in cv.split(X, y, labels)) # # n_fits on each (train, test) def cross_validation(raw_parameters): parameters = dict(zip( self.param_grid.keys(), raw_parameters )) # TODO more robust way of doing this print(parameters) return Parallel( n_jobs=self.n_jobs, verbose=self.verbose, pre_dispatch=pre_dispatch )(delayed(_fit_and_score)(clone(base_estimator), X, y, self.scorer_, train, test, self.verbose, parameters, self.fit_params, return_parameters=True, error_score=self.error_score) for train, test in cv.split(X, y, labels)) x = cartesian_product(*self.param_grid.values()) # FIXME implement as non-recursive def bo_(x_obs, y_obs, n_iter): if n_iter > 0: kernel = kernels.Matern() + kernels.WhiteKernel() gp = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=16) gp.fit(x_obs, 1-y_obs) a = a_EI(gp, x_obs=x_obs, y_obs=1-y_obs) argmax_f_x_ = x[np.argmax(a(x))] # heavy evaluation f_argmax_f_x_ = cross_validation(argmax_f_x_) y_ob = np.atleast_2d(mean_mean_validation_scores(f_argmax_f_x_)).T return f_argmax_f_x_ + bo_( x_obs=np.vstack((x_obs, argmax_f_x_)), y_obs=np.vstack((y_obs, y_ob)), n_iter=n_iter-1, ) else: return [] # FIXME (most informative) decision like Numerical Probabilistics stuff for integrations # sobol initilization? sampled_x_ind = np.random.choice( x.shape[0], size=self.n_initial_points, replace=False, ) print(sampled_x_ind) x_obs = x[sampled_x_ind] f_x_obs = list(map(cross_validation, x_obs)) y_obs = np.atleast_2d(list(map(mean_mean_validation_scores, f_x_obs))).T out = sum(f_x_obs, []) + bo_(x_obs, y_obs, n_iter=self.n_iter) n_fits = len(out) scores = list() grid_scores = list() for grid_start in range(0, n_fits, n_splits): n_test_samples = 0 score = 0 all_scores = [] for this_score, this_n_test_samples, _ , parameters in \ out[grid_start:grid_start + n_splits]: all_scores.append(this_score) if self.iid: this_score *= this_n_test_samples n_test_samples += this_n_test_samples score += this_score if self.iid: score /= float(n_test_samples) else: score /= float(n_splits) scores.append((score, parameters)) grid_scores.append(_search._CVScoreTuple( parameters, score, np.array(all_scores))) self.grid_scores_ = grid_scores best = sorted(grid_scores, key=lambda x: x.mean_validation_score, reverse=True)[0] self.best_params_ = best.parameters self.best_score_ = best.mean_validation_score if self.refit: best_estimator = clone(base_estimator).set_params( **best.parameters) if y is not None: best_estimator.fit(X, y, **self.fit_params) else: best_estimator.fit(X, **self.fit_params) self.best_estimator_ = best_estimator return self
def _fit(self, Z, parameter_iterable): """Actual fitting, performing the search over parameters.""" self.scorer_ = check_scoring(self.estimator, scoring=self.scoring) cv = self.cv cv = _check_cv(cv, Z) if self.verbose > 0: if isinstance(parameter_iterable, Sized): n_candidates = len(parameter_iterable) print("Fitting {0} folds for each of {1} candidates, totalling" " {2} fits".format(len(cv), n_candidates, n_candidates * len(cv))) base_estimator = clone(self.estimator) pre_dispatch = self.pre_dispatch out = Parallel( n_jobs=self.n_jobs, verbose=self.verbose, pre_dispatch=pre_dispatch, backend="threading")( delayed(_fit_and_score)(clone(base_estimator), Z, self.scorer_, train, test, self.verbose, parameters, self.fit_params, return_parameters=True, error_score=self.error_score) for parameters in parameter_iterable for train, test in cv) # Out is a list of triplet: score, estimator, n_test_samples n_fits = len(out) n_folds = len(cv) scores = list() grid_scores = list() for grid_start in range(0, n_fits, n_folds): n_test_samples = 0 score = 0 all_scores = [] for this_score, this_n_test_samples, _, parameters in \ out[grid_start:grid_start + n_folds]: all_scores.append(this_score) if self.iid: this_score *= this_n_test_samples n_test_samples += this_n_test_samples score += this_score if self.iid: score /= float(n_test_samples) else: score /= float(n_folds) scores.append((score, parameters)) # TODO: shall we also store the test_fold_sizes? grid_scores.append( _CVScoreTuple(parameters, score, np.array(all_scores))) # Store the computed scores self.grid_scores_ = grid_scores # Find the best parameters by comparing on the mean validation score: # note that `sorted` is deterministic in the way it breaks ties best = sorted(grid_scores, key=lambda x: x.mean_validation_score, reverse=True)[0] self.best_params_ = best.parameters self.best_score_ = best.mean_validation_score if self.refit: # fit the best estimator using the entire dataset # clone first to work around broken estimators best_estimator = clone(base_estimator).set_params( **best.parameters) best_estimator.fit(Z, **self.fit_params) self.best_estimator_ = best_estimator return self