def _generate_meta_estimator_instances_with_pipeline(): """Generate instances of meta-estimators fed with a pipeline Are considered meta-estimators all estimators accepting one of "estimator", "base_estimator" or "estimators". """ for _, Estimator in sorted(all_estimators()): sig = set(signature(Estimator).parameters) if "estimator" in sig or "base_estimator" in sig or "regressor" in sig: if is_regressor(Estimator): estimator = make_pipeline(TfidfVectorizer(), Ridge()) param_grid = {"ridge__alpha": [0.1, 1.0]} else: estimator = make_pipeline(TfidfVectorizer(), LogisticRegression()) param_grid = {"logisticregression__C": [0.1, 1.0]} if "param_grid" in sig or "param_distributions" in sig: # SearchCV estimators extra_params = {"n_iter": 2} if "n_iter" in sig else {} yield Estimator(estimator, param_grid, **extra_params) else: yield Estimator(estimator) elif "transformer_list" in sig: # FeatureUnion transformer_list = [ ("trans1", make_pipeline(TfidfVectorizer(), MaxAbsScaler())), ( "trans2", make_pipeline(TfidfVectorizer(), StandardScaler(with_mean=False)), ), ] yield Estimator(transformer_list) elif "estimators" in sig: # stacking, voting if is_regressor(Estimator): estimator = [ ("est1", make_pipeline(TfidfVectorizer(), Ridge(alpha=0.1))), ("est2", make_pipeline(TfidfVectorizer(), Ridge(alpha=1))), ] else: estimator = [ ( "est1", make_pipeline(TfidfVectorizer(), LogisticRegression(C=0.1)), ), ("est2", make_pipeline(TfidfVectorizer(), LogisticRegression(C=1))), ] yield Estimator(estimator) else: continue
def _convert_train_data(self, X, y, weights=None): """Convert the training data to a form accepted by Lolo Args: X (ndarray): Input variables y (ndarray): Output variables weights (ndarray): Wegihts for each sample Returns train_data (JavaObject): Pointer to the training data in Java """ # Make some default weights if weights is None: weights = np.ones(len(y)) # Convert y and w to float64 or int32 with native ordering y = np.array(y, dtype=np.float64 if is_regressor(self) else np.int32) weights = np.array(weights, dtype=np.float64) # Convert X, y, and w to Java Objects X_java = send_feature_array(self.gateway, X) if self._num_outputs == 1: y_java = send_1D_array(self.gateway, y, is_regressor(self)) else: y_java = send_feature_array(self.gateway, y) assert y_java.length() == len(y) == len(X) w_java = send_1D_array(self.gateway, weights, True) assert w_java.length() == len(weights) return self.gateway.jvm.io.citrine.lolo.util.LoloPyDataLoader.zipTrainingData( X_java, y_java), w_java
def _convert_train_data(self, X, y, weights=None): """Convert the training data to a form accepted by Lolo Args: X (ndarray): Input variables y (ndarray): Output variables weights (ndarray): Wegihts for each sample Returns train_data (JavaObject): Pointer to the training data in Java """ # Make some default weights if weights is None: weights = np.ones(len(y)) # Convert x, y, and w to float64 and int8 with native ordering X = np.array(X, dtype=np.float64) y = np.array(y, dtype=np.float64 if is_regressor(self) else np.int32) weights = np.array(weights, dtype=np.float64) big_end = sys.byteorder == "big" # Convert X and y to Java Objects X_java = self.gateway.jvm.io.citrine.lolo.util.LoloPyDataLoader.getFeatureArray( X.tobytes(), X.shape[1], big_end) y_java = self.gateway.jvm.io.citrine.lolo.util.LoloPyDataLoader.get1DArray( y.tobytes(), is_regressor(self), big_end) assert y_java.length() == len(y) == len(X) w_java = self.gateway.jvm.io.citrine.lolo.util.LoloPyDataLoader.get1DArray( np.array(weights).tobytes(), True, big_end) assert w_java.length() == len(weights) return self.gateway.jvm.io.citrine.lolo.util.LoloPyDataLoader.zipTrainingData( X_java, y_java), w_java
def yield_all_checks(name, estimator): tags = estimator._get_tags() if "2darray" not in tags["X_types"]: warnings.warn("Can't test estimator {} which requires input " " of type {}".format(name, tags["X_types"]), SkipTestWarning) return if tags["_skip_test"]: warnings.warn("Explicit SKIP via _skip_test tag for estimator " "{}.".format(name), SkipTestWarning) return yield from _yield_checks(name, estimator) if is_classifier(estimator): yield from _yield_classifier_checks(name, estimator) if is_regressor(estimator): yield from _yield_regressor_checks(name, estimator) if hasattr(estimator, 'transform'): if not tags["allow_variable_length"]: # Transformer tests ensure that shapes are the same at fit and # transform time, hence we need to skip them for estimators that # allow variable-length inputs yield from _yield_transformer_checks(name, estimator) if isinstance(estimator, ClusterMixin): yield from _yield_clustering_checks(name, estimator) if is_outlier_detector(estimator): yield from _yield_outliers_checks(name, estimator) # We are not strict on presence/absence of the 3rd dimension # yield check_fit2d_predict1d if not tags["non_deterministic"]: yield check_methods_subset_invariance yield check_fit2d_1sample yield check_fit2d_1feature yield check_fit1d yield check_get_params_invariance yield check_set_params yield check_dict_unchanged yield check_dont_overwrite_parameters yield check_fit_idempotent if (is_classifier(estimator) or is_regressor(estimator) or isinstance(estimator, ClusterMixin)): if tags["allow_variable_length"]: yield check_different_length_fit_predict_transform
def check_regressor(regressor=None, random_state=None): """Check if a regressor is given and if it is valid, otherwise set default regressor. Parameters ---------- regressor : sklearn-like regressor, optional, default=None. random_state : int, RandomState instance or None, default=None Used to set random_state of the default regressor. Returns ------- regressor Raises ------ ValueError Raise error if given regressor is not a valid sklearn-like regressor. """ if regressor is None: regressor = GradientBoostingRegressor(max_depth=5, random_state=random_state) else: if not is_regressor(regressor): raise ValueError( f"`regressor` should be a sklearn-like regressor, " f"but found: {regressor}") regressor = clone(regressor) return regressor
def decision_function(self, x): """output f(x) for given samples Parameters --------- x : array-like of shape (n_samples, d) containing the input dataset Returns ------- np.array of shape (n_samples,) containing f(x) """ check_is_fitted(self, "reg") x = x.copy() if is_regressor(self): pred = stats.predict( self.reg, pd.DataFrame( x, columns=['x' + str(i + 1) for i in range(x.shape[1])])) elif is_classifier(self): pred = stats.predict( self.reg.reg, pd.DataFrame( x, columns=['x' + str(i + 1) for i in range(x.shape[1])])) return pred.flatten()
def decision_function(self, x): """output f(x) for given samples Parameters --------- x : array-like of shape (n_samples, 1) containing the input dataset Returns ------- np.array of shape (n_samples,) containing f(x) """ check_is_fitted(self, "sm_") x = x.copy() x[x < self.xmin] = self.xmin x[x > self.xmax] = self.xmax if isinstance(self.sm_, (np.ndarray, np.int, int, np.floating, float)): pred = self.sm_ * np.ones(x.shape[0]) else: if is_classifier(self): pred = bigsplines.predict_bigssg(self.sm_, ro.r("data.frame")(x=x))[1] if is_regressor(self): pred = bigsplines.predict_bigspline(self.sm_, ro.r("data.frame")(x=x)) return pred
def get_estimator_params(estimator): """Get estimator parameters. Tip: Check Sklearn-Neptune integration `documentation <https://docs-beta.neptune.ai/essentials/integrations/machine-learning-frameworks/sklearn>`_ for the full example. Args: estimator (:obj:`estimator`): | Scikit-learn estimator from which to log parameters. Returns: ``dict`` with all parameters mapped to their values. Examples: .. code:: python3 import neptune.new.integrations.sklearn as npt_utils rfr = RandomForestRegressor() run = neptune.init(project='my_workspace/my_project') run['estimator/params'] = npt_utils.get_estimator_params(rfr) """ assert is_regressor(estimator) or is_classifier(estimator) or isinstance(estimator, KMeans), \ 'Estimator should be sklearn regressor, classifier or kmeans clusterer.' return estimator.get_params()
def train(self, x_train, y_train): md = self.model(random_state=SEED) if 'base_estimator' in md.get_params( ) and md.get_params()['base_estimator'] is None: if is_classifier(self.model): base_param = { 'base_estimator': DecisionTreeClassifier(random_state=SEED) } elif is_regressor(self.model): base_param = { 'base_estimator': DecisionTreeRegressor(random_state=SEED) } md = md.set_params(**base_param) if self.estimator in self.hyper.keys(): md.set_params(**self.hyper[self.estimator]['params']) gt_train, pd_train, md = self._cross_validation(md, x_train, y_train, run_only_once=True) matches = self._report(gt_train, pd_train, md, prefix='train') return gt_train, pd_train, matches, self.output
def __init__(self, X, t, y, cv, outcome_learner, effect_learner, outcome_param_grid={}, effect_param_grid={}, params_outcome={}, params_effect={}): super().__init__(X, t, y, cv) self.outcome_learner = copy.deepcopy(outcome_learner) \ .set_params(**params_outcome) self.outcome_param_grid = outcome_param_grid self.effect_learner = copy.deepcopy(effect_learner) \ .set_params(**params_effect) self.effect_param_grid = effect_param_grid # Check if outcome learner is regressor or classifier, and initialize the # appropriate metalearner object if is_regressor(self.outcome_learner): self.meta_learner_class = BaseRRegressor else: self.meta_learner_class = BaseRClassifier self._make_meta_learner()
def _stack_layers(self, *layers, default=None): if is_classifier(self): stack = partial(StackingClassifier, cv=self.cv, n_jobs=self.n_jobs, passthrough=self.passthrough, verbose=0) elif is_regressor(self): stack = partial(StackingRegressor, cv=self.cv, n_jobs=self.n_jobs, passthrough=self.passthrough, verbose=0) layers = list(layers) if len(layers) == 1: if isinstance(layers[0], list): [(str(i) + '_' + layers[0][i].__class__.__name__, layers[0][i]) for i in range(len(layers[0]))] return stack(layers[0], default) else: return layers[0] elif len(layers) == 2: if not isinstance(layers[0], list): layers[0] = [layers[0]] if not isinstance(layers[1], list): layers[1] = [layers[1]] layers[0] = [(str(i) + '_' + layers[0][i].__class__.__name__, layers[0][i]) for i in range(len(layers[0]))] return self._stack_layers(*[stack(layers[0], estimator) for estimator in layers[1]], default=default) elif len(layers) > 2: return self._stack_layers(self._stack_layers(layers[0], layers[1], default=default), *layers[2:], default=default) raise Exception
def _validate_hyperparameters(self) -> None: """Validate the hyperparameters.""" if not (hasattr(self.input_to_node, "fit") and hasattr(self.input_to_node, "fit_transform") and hasattr(self.input_to_node, "transform")): raise TypeError("All input_to_node should be transformers and" "implement fit and transform '{0}' (type {1}) " "doesn't".format(self.input_to_node, type(self.input_to_node))) if not (hasattr(self.node_to_node, "fit") and hasattr(self.node_to_node, "fit_transform") and hasattr(self.node_to_node, "transform")): raise TypeError("All node_to_node should be transformers and" "implement fit and transform '{0}' (type {1}) " "doesn't".format(self.node_to_node, type(self.node_to_node))) if (self._requires_sequence != "auto" and not isinstance(self._requires_sequence, bool)): raise ValueError('Invalid value for requires_sequence, got {0}' .format(self._requires_sequence)) if not is_regressor(self._regressor): raise TypeError("The last step should be a regressor and " "implement fit and predict '{0}' (type {1})" "doesn't".format(self._regressor, type(self._regressor)))
def _validate_hyperparameters(self): """ Validates the hyperparameters. Returns ------- """ self.random_state = check_random_state(self.random_state) if not (hasattr(self.input_to_node, "fit") and hasattr(self.input_to_node, "fit_transform") and hasattr(self.input_to_node, "transform")): raise TypeError("All input_to_node should be transformers " "and implement fit and transform " "'%s' (type %s) doesn't" % (self.input_to_node, type(self.input_to_node))) if self._chunk_size is not None and ( not isinstance(self._chunk_size, int) or self._chunk_size < 0): raise ValueError('Invalid value for chunk_size, got {0}'.format( self._chunk_size)) if not is_regressor(self._regressor): raise TypeError("The last step should be a regressor " "and implement fit and predict" "'%s' (type %s) doesn't" % (self._regressor, type(self._regressor)))
def __init__(self, estimator, n_estimators=100, perc=100, alpha=0.05, two_step=True, max_iter=100, random_state=None, verbose=0, importance_type='gini', scale_permutation_bytree=False): self.estimator = estimator self.n_estimators = n_estimators self.perc = perc self.alpha = alpha self.two_step = two_step self.max_iter = max_iter self.random_state = random_state self.verbose = verbose self.importance_type = importance_type self.scale_permutation_bytree = scale_permutation_bytree if is_classifier(self.estimator): self.task = 'classification' elif is_regressor(self.estimator): self.task = 'regression' else: self.task = 'other'
def test_meta_estimators_delegate_data_validation(estimator): # Check that meta-estimators delegate data validation to the inner # estimator(s). rng = np.random.RandomState(0) set_random_state(estimator) n_samples = 30 X = rng.choice(np.array(["aa", "bb", "cc"], dtype=object), size=n_samples) if is_regressor(estimator): y = rng.normal(size=n_samples) else: y = rng.randint(3, size=n_samples) # We convert to lists to make sure it works on array-like X = _enforce_estimator_tags_x(estimator, X).tolist() y = _enforce_estimator_tags_y(estimator, y).tolist() # Calling fit should not raise any data validation exception since X is a # valid input datastructure for the first step of the pipeline passed as # base estimator to the meta estimator. estimator.fit(X, y) # n_features_in_ should not be defined since data is not tabular data. assert not hasattr(estimator, "n_features_in_")
def test_StackerClassifier(): np.random.seed(123) X = np.random.randn(100, 10) y = 1 * (np.random.randn(100) > 0) stacker = StackerClassifier( models=[ RandomForestClassifier(random_state=123), LogisticRegression(C=1, random_state=123) ], cv=10, blender=LogisticRegression(C=1, random_state=123), ) stacker.fit(X, y) yhat = stacker.predict(X) assert yhat.ndim == 1 assert yhat.shape[0] == X.shape[0] assert list(set(yhat)) == [0, 1] assert list(stacker.classes_) == [0, 1] yhat_proba = stacker.predict_proba(X) assert yhat_proba.shape == (y.shape[0], 2) assert not is_regressor(stacker) assert is_classifier(stacker)
def __init__(self, X, t, y, cv, base_learner, param_grid={}, params_treat={}, params_control={}): super().__init__(X, t, y, cv) self.treatment_outcome_learner = copy.deepcopy(base_learner) \ .set_params(**params_treat) self.control_outcome_learner = copy.deepcopy(base_learner) \ .set_params(**params_control) self.param_grid = param_grid # Check if base learner is regressor or classifier, and initialize the # appropriate metalearner object if is_regressor(self.treatment_outcome_learner): self.meta_learner_class = BaseTRegressor self.scoring = "r2" else: self.meta_learner_class = BaseTClassifier self.scoring = "neg_log_loss" self._make_meta_learner()
def __init__(self, X, t, y, cv, outcome_learner, effect_learner, outcome_param_grid = {}, effect_param_grid = {}, params_treat = {}, params_control = {}, params_treat_effect = {}, params_control_effect = {}): super().__init__(X, t, y, cv) self.treatment_outcome_learner = copy.deepcopy(outcome_learner) \ .set_params(**params_treat) self.control_outcome_learner = copy.deepcopy(outcome_learner) \ .set_params(**params_control) self.outcome_param_grid = outcome_param_grid # param grid for effect learner self.treatment_effect_learner = copy.deepcopy(effect_learner) \ .set_params(**params_treat_effect) self.control_effect_learner = copy.deepcopy(effect_learner) \ .set_params(**params_control_effect) self.effect_param_grid = effect_param_grid # param grid for effect learner # Flag to tune the outcome learners. Set to True if params_treat or # params_control is the empty dictionary. The outcome learners are tuned # exactly the same way as those for a T-learner, so we don't have to # repeat this part of the tuning. self.tune_outcome_learners = (len(params_treat) == 0) or \ (len(params_control) == 0) # Check if outcome learner is regressor or classifier, and initialize the # appropriate metalearner object if is_regressor(self.treatment_outcome_learner): self.meta_learner_class = BaseXRegressor self.scoring = "r2" else: self.meta_learner_class = BaseXClassifier self.scoring = "neg_log_loss" self._make_meta_learner()
def predict(features_df, mdl, target_class_names=None, cols_to_save=None): from sklearn.base import is_classifier, is_regressor import pandas as pd if cols_to_save is not None: existing_cols_to_save = list( set(cols_to_save).intersection(features_df.columns)) res_df = features_df[existing_cols_to_save].copy() else: res_df = pd.DataFrame() if is_classifier(mdl): pred = mdl.predict_proba(features_df) if pred.shape[1] == 2: res_df['target_proba'] = pred[:, 1] else: if target_class_names is None: target_class_names = [ f'class{i}' for i in range(pred.shape[1]) ] for i, label in enumerate(target_class_names): res_df[label] = pred[:, i] elif is_regressor(mdl): res_df['pred'] = mdl.predict(features_df) else: raise AttributeError('unknown model type') return res_df
def _check_arguments(self, base_estimator, n_initial_points, acq_optimizer): """Check arguments for sanity.""" if isinstance(base_estimator, str): base_estimator = cook_estimator(base_estimator, space=self.space, random_state=self.rng) if not is_regressor(base_estimator): raise ValueError("%s has to be a regressor." % base_estimator) self.base_estimator_ = base_estimator if n_initial_points < 0: raise ValueError("Expected `n_initial_points` >= 0, got %d" % n_initial_points) self._n_initial_points = n_initial_points self.n_initial_points_ = n_initial_points if acq_optimizer == "auto": if has_gradients(self.base_estimator_): acq_optimizer = "sampling" else: acq_optimizer = "lbfgs" if acq_optimizer not in ["lbfgs", "sampling"]: raise ValueError("Expected acq_optimizer to be 'lbfgs' or " "'sampling', got {0}".format(acq_optimizer)) if has_gradients(self.base_estimator_) and acq_optimizer != "sampling": raise ValueError("The regressor {0} should run with " "acq_optimizer='sampling'".format( type(base_estimator))) self.acq_optimizer = acq_optimizer
def _report(self, gt, pred, model, prefix): os.makedirs(self.output, exist_ok=True) pickle.dump(model, open(os.path.join(self.output, 'model.sav'), 'wb')) matches = None if is_classifier(self.model): with open( os.path.join(self.output, f'{prefix}_classification_report.txt'), 'w') as f: matches, report = [gt, pred ], classification_report(gt, pred, digits=4, zero_division=True) f.write(''.join(report)) print(''.join(report)) elif is_regressor(self.model): with open(os.path.join(self.output, f'{prefix}_rmse_accuracy.txt'), 'w') as f: matches, report = self._acc_tolerance(gt, pred, tolerance=[0, 1, 2, 3]) f.write('\n'.join(report)) print('\n'.join(report)) return matches
def test_linear(): print('\ntest_linear():') rs = np.random.RandomState(42) index = range(1000) X = np.hstack( (np.linspace(0., 10., 1000).reshape(-1, 1), np.linspace(-1., 1., 1000).reshape(-1, 1), rs.random(1000).reshape(-1, 1))) transformation = rs.random(size=(3, 2)) y = np.matmul(X, transformation) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=10, random_state=42) reg = IncrementalRegression() assert is_regressor(reg) for prt in np.array_split(index, 3): reg.partial_fit(X[prt, :], y[prt, :]) y_reg = reg.predict(X_test) print("tests: {0}\nregr: {1}".format(y_test, y_reg)) np.testing.assert_allclose(y_reg, y_test, rtol=.01, atol=.15)
def get_pickled_model(estimator): """Get pickled estimator. Tip: Check Sklearn-Neptune integration `documentation <https://docs-beta.neptune.ai/essentials/integrations/machine-learning-frameworks/sklearn>`_ for the full example. Args: estimator (:obj:`estimator`): | Scikit-learn estimator to pickle. Returns: ``neptune.types.File`` object that you can assign to run's ``base_namespace``. Examples: .. code:: python3 import neptune.new.integrations.sklearn as npt_utils rfr = RandomForestRegressor() run = neptune.init(project='my_workspace/my_project') run['estimator/pickled_model'] = npt_utils.get_pickled_model(rfr) """ assert is_regressor(estimator) or is_classifier(estimator), \ 'Estimator should be sklearn regressor or classifier.' return neptune.types.File.as_pickle(estimator)
def find_sklearn_model(name, mt="regression"): """Given string name, find the sklearn object and module associated.""" if isinstance(name, str): for pkg in skpackages(): if hasattr(pkg, name): return getattr(pkg, name)(), pkg.__name__ # if we've reached here, the thing isn't in a known package - let's search the shorthand space _mt = model_types() q = _mt.query("Short == @name") # if we just have one row, return if q.shape[0] == 1: return find_sklearn_model(q.index[0]) elif q.shape[0] > 1: # determine whether we want a classifier or a regressor return find_sklearn_model(q[q["ModelType"] == mt].index[0]) else: raise ValueError( "name '{}' does not exist as a model".format(name)) elif is_classifier(name): return name, name.__module__.rsplit(".", 1)[0] elif is_regressor(name): return name, name.__module__.rsplit(".", 1)[0] else: raise TypeError( "model '{}' not recognized as scikit-learn model.".format(name))
def _predict_binary(estimator, X): """Make predictions using a single binary estimator.""" if is_regressor(estimator): return estimator.predict(X) # probabilities of the positive class score = estimator.predict_proba(X)[:, 1] return score
def test_OutSamplerTransformer_regressor(multi_output): np.random.seed(123) X = np.random.randn(100, 10) if multi_output: y = np.random.randn(100, 2) else: y = np.random.randn(100) model = OutSamplerTransformer(RandomForestRegressor(n_estimators=10, random_state=123), cv=10) model.fit(X, y) y1 = model.model.predict(X) y2 = model.transform(X) assert not is_classifier(model) assert not is_regressor(model) if multi_output: assert np.abs(y1[:, 0] - y2[:, 0]).max() <= 10**(-10) assert np.abs(y1[:, 1] - y2[:, 1]).max() <= 10**(-10) assert y2.shape == (100, 2) assert model.get_feature_names() == [ "output%d__RandomForestRegressor__target" % d for d in range(y.shape[1]) ] else: assert np.abs(y1 - y2[:, 0]).max() <= 10**(-10) assert y2.shape == (100, 1) assert model.get_feature_names() == ["RandomForestRegressor__target"]
def new_scorer(estimator, X, Y): if is_regressor(estimator): return regr_scorer(estimator, X, Y) elif is_classifier(estimator): return class_scorer(estimator, X, Y) else: raise ValueError("Not supported type of Estimator")
def test_StackerRegressor(): np.random.seed(123) X = np.random.randn(100, 10) y = np.random.randn(100) stacker = StackerRegressor( models=[ RandomForestRegressor(n_estimators=10, random_state=123), Ridge(random_state=123) ], cv=10, blender=Ridge(random_state=123), ) stacker.fit(X, y) yhat = stacker.predict(X) assert yhat.ndim == 1 assert yhat.shape[0] == X.shape[0] assert is_regressor(stacker) assert not is_classifier(stacker) with pytest.raises(AttributeError): stacker.predict_proba(X) with pytest.raises(AttributeError): stacker.classes_
def cook_estimator(base_estimator, space=None, **kwargs): """ Cook a default estimator. Parameters ---------- * `base_estimator` ["GP", "RF", "ET", "GBRT" or sklearn regressor, default="GP"]: Should inherit from `sklearn.base.RegressorMixin`. In addition the `predict` method should have an optional `return_std` argument, which returns `std(Y | x)`` along with `E[Y | x]`. If base_estimator is one of ["GP", "RF", "ET", "GBRT"], a default surrogate model of the corresponding type is used corresponding to what is used in the minimize functions. * `space` [Space instance]: Has to be provided if the base_estimator is a gaussian process. Ignored otherwise. * `kwargs` [dict]: Extra parameters provided to the base_estimator at init time. """ if space is not None: n_dims = space.transformed_n_dims is_cat = space.is_categorical if isinstance(base_estimator, str): base_estimator = base_estimator.upper() if base_estimator not in ["GP", "ET", "RF", "GBRT"]: raise ValueError("Valid strings for the base_estimator parameter " "are: 'RF', 'ET' or 'GP' not %s" % base_estimator) elif not is_regressor(base_estimator): raise ValueError("base_estimator has to be a regressor.") if base_estimator == "GP": cov_amplitude = ConstantKernel(1.0, (0.01, 1000.0)) if is_cat: other_kernel = HammingKernel(length_scale=np.ones(n_dims)) else: other_kernel = Matern(length_scale=np.ones(n_dims), length_scale_bounds=[(0.01, 100)] * n_dims, nu=2.5) base_estimator = GaussianProcessRegressor(kernel=cov_amplitude * other_kernel, normalize_y=True, random_state=None, alpha=0.0, noise="gaussian", n_restarts_optimizer=2) elif base_estimator == "RF": base_estimator = RandomForestRegressor(n_estimators=100, min_samples_leaf=3) elif base_estimator == "ET": base_estimator = ExtraTreesRegressor(n_estimators=100, min_samples_leaf=3) elif base_estimator == "GBRT": gbrt = GradientBoostingRegressor(n_estimators=30, loss="quantile") base_estimator = GradientBoostingQuantileRegressor(base_estimator=gbrt) base_estimator.set_params(**kwargs) return base_estimator
def test_staged_predict(HistGradientBoosting, X, y): # Test whether staged predictor eventually gives # the same prediction. X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=0) gb = HistGradientBoosting(max_iter=10) # test raise NotFittedError if not fitted with pytest.raises(NotFittedError): next(gb.staged_predict(X_test)) gb.fit(X_train, y_train) # test if the staged predictions of each iteration # are equal to the corresponding predictions of the same estimator # trained from scratch. # this also test limit case when max_iter = 1 method_names = (["predict"] if is_regressor(gb) else ["predict", "predict_proba", "decision_function"]) for method_name in method_names: staged_method = getattr(gb, "staged_" + method_name) staged_predictions = list(staged_method(X_test)) assert len(staged_predictions) == gb.n_iter_ for n_iter, staged_predictions in enumerate(staged_method(X_test), 1): aux = HistGradientBoosting(max_iter=n_iter) aux.fit(X_train, y_train) pred_aux = getattr(aux, method_name)(X_test) assert_allclose(staged_predictions, pred_aux) assert staged_predictions.shape == pred_aux.shape
def _check_arguments(self, base_estimator, n_initial_points, acq_optimizer, dimensions): """Check arguments for sanity.""" if isinstance(base_estimator, str): base_estimator = cook_estimator( base_estimator, space=dimensions, random_state=self.rng) if not is_regressor(base_estimator) and base_estimator is not None: raise ValueError( "%s has to be a regressor." % base_estimator) if "ps" in self.acq_func: self.base_estimator_ = MultiOutputRegressor(base_estimator) else: self.base_estimator_ = base_estimator if n_initial_points < 0: raise ValueError( "Expected `n_initial_points` >= 0, got %d" % n_initial_points) self._n_initial_points = n_initial_points self.n_initial_points_ = n_initial_points if acq_optimizer == "auto": if has_gradients(self.base_estimator_): acq_optimizer = "lbfgs" else: acq_optimizer = "sampling" if acq_optimizer not in ["lbfgs", "sampling"]: raise ValueError("Expected acq_optimizer to be 'lbfgs' or " "'sampling', got {0}".format(acq_optimizer)) if (not has_gradients(self.base_estimator_) and acq_optimizer != "sampling"): raise ValueError("The regressor {0} should run with " "acq_optimizer" "='sampling'.".format(type(base_estimator))) self.acq_optimizer = acq_optimizer
def __init__(self, models): """Proxy class to build an ensemble of models with an API as one Parameters ---------- models: array An array of models """ self._models = models if len(models) else None if self._models is not None: if is_classifier(self._models[0]): check_type = is_classifier self._scoring_fun = accuracy_score elif is_regressor(self._models[0]): check_type = is_regressor self._scoring_fun = r2_score else: raise ValueError('Expected regressors or classifiers,' ' got %s instead' % type(self._models[0])) for model in self._models: if not check_type(model): raise ValueError('Different types of models found, privide' ' either regressors or classifiers.')
def fit(self, X, y): """Fit a receptive field model. Parameters ---------- X : array, shape (n_times[, n_epochs], n_features) The input features for the model. y : array, shape (n_times[, n_epochs], n_outputs) The output features for the model. Returns ------- self : instance The instance so you can chain operations. """ if self.scoring not in _SCORERS.keys(): raise ValueError('scoring must be one of %s, got' '%s ' % (sorted(_SCORERS.keys()), self.scoring)) from sklearn.base import is_regressor, clone X, y = self._check_dimensions(X, y) # Initialize delays self.delays_ = _times_to_delays(self.tmin, self.tmax, self.sfreq) # Define the slice that we should use in the middle self.keep_samples_ = _delays_to_slice(self.delays_) if isinstance(self.estimator, numbers.Real): estimator = TimeDelayingRidge(self.tmin, self.tmax, self.sfreq, alpha=self.estimator, fit_intercept=self.fit_intercept) elif is_regressor(self.estimator): estimator = clone(self.estimator) else: raise ValueError('`estimator` must be a float or an instance' ' of `BaseEstimator`,' ' got type %s.' % type(self.estimator)) self.estimator_ = estimator del estimator _check_estimator(self.estimator_) # Create input features n_times, n_epochs, n_feats = X.shape # Update feature names if we have none if self.feature_names is None: self.feature_names = ['feature_%s' % ii for ii in range(n_feats)] if len(self.feature_names) != n_feats: raise ValueError('n_features in X does not match feature names ' '(%s != %s)' % (n_feats, len(self.feature_names))) # Create input features X_del, y = self._delay_and_reshape(X, y) self.estimator_.fit(X_del, y) coefs = get_coef(self.estimator_, 'coef_') coefs = coefs.reshape([-1, n_feats, len(self.delays_)]) if len(coefs) == 1: # Remove a singleton first dimension if only 1 output coefs = coefs[0] self.coef_ = coefs return self
def cook_estimator(base_estimator, space=None, **kwargs): """ Cook a default estimator. For the special base_estimator called "DUMMY" the return value is None. This corresponds to sampling points at random, hence there is no need for an estimator. Parameters ---------- * `base_estimator` ["GP", "RF", "ET", "GBRT", "DUMMY" or sklearn regressor, default="GP"]: Should inherit from `sklearn.base.RegressorMixin`. In addition the `predict` method should have an optional `return_std` argument, which returns `std(Y | x)`` along with `E[Y | x]`. If base_estimator is one of ["GP", "RF", "ET", "GBRT", "DUMMY"], a surrogate model corresponding to the relevant `X_minimize` function is created. * `space` [Space instance]: Has to be provided if the base_estimator is a gaussian process. Ignored otherwise. * `kwargs` [dict]: Extra parameters provided to the base_estimator at init time. """ if isinstance(base_estimator, str): base_estimator = base_estimator.upper() if base_estimator not in ["GP", "ET", "RF", "GBRT", "DUMMY"]: raise ValueError("Valid strings for the base_estimator parameter " " are: 'RF', 'ET', 'GP', 'GBRT' or 'DUMMY' not " "%s." % base_estimator) elif not is_regressor(base_estimator): raise ValueError("base_estimator has to be a regressor.") if base_estimator == "GP": if space is not None: space = Space(space) space = Space(normalize_dimensions(space.dimensions)) n_dims = space.transformed_n_dims is_cat = space.is_categorical else: raise ValueError("Expected a Space instance, not None.") cov_amplitude = ConstantKernel(1.0, (0.01, 1000.0)) # only special if *all* dimensions are categorical if is_cat: other_kernel = HammingKernel(length_scale=np.ones(n_dims)) else: other_kernel = Matern( length_scale=np.ones(n_dims), length_scale_bounds=[(0.01, 100)] * n_dims, nu=2.5) base_estimator = GaussianProcessRegressor( kernel=cov_amplitude * other_kernel, normalize_y=True, noise="gaussian", n_restarts_optimizer=2) elif base_estimator == "RF": base_estimator = RandomForestRegressor(n_estimators=100, min_samples_leaf=3) elif base_estimator == "ET": base_estimator = ExtraTreesRegressor(n_estimators=100, min_samples_leaf=3) elif base_estimator == "GBRT": gbrt = GradientBoostingRegressor(n_estimators=30, loss="quantile") base_estimator = GradientBoostingQuantileRegressor(base_estimator=gbrt) elif base_estimator == "DUMMY": return None base_estimator.set_params(**kwargs) return base_estimator
def forest_minimize(func, dimensions, base_estimator='et', n_calls=100, n_points=1000, n_random_starts=10, x0=None, y0=None, n_jobs=1, random_state=None, acq="EI", xi=0.01, kappa=1.96): """Sequential optimisation using decision trees. A tree based regression model is used to model the expensive to evaluate function `func`. The model is improved by sequentially evaluating the expensive function at the next best point. Thereby finding the minimum of `func` with as few evaluations as possible. The total number of evaluations, `n_calls`, are performed like the following. If `x0` is provided but not `y0`, then the elements of `x0` are first evaluated, followed by `n_random_starts` evaluations. Finally, `n_calls - len(x0) - n_random_starts` evaluations are made guided by the surrogate model. If `x0` and `y0` are both provided then `n_random_starts` evaluations are first made then `n_calls - n_random_starts` subsequent evaluations are made guided by the surrogate model. Parameters ---------- * `func` [callable]: Function to minimize. Should take a array of parameters and return the function values. * `dimensions` [list, shape=(n_dims,)]: List of search space dimensions. Each search dimension can be defined either as - a `(upper_bound, lower_bound)` tuple (for `Real` or `Integer` dimensions), - a `(upper_bound, lower_bound, "prior")` tuple (for `Real` dimensions), - as a list of categories (for `Categorical` dimensions), or - an instance of a `Dimension` object (`Real`, `Integer` or `Categorical`). * `base_estimator` [string or `Regressor`, default=`"et"`]: The regressor to use as surrogate model. Can be either - `"rf"` for random forest regressor - `"et"` for extra trees regressor - `"dt"` for single decision tree regressor - instance of regressor with support for `return_std` in its predict method The predefined models are initilized with good defaults. If you want to adjust the model parameters pass your own instance of a regressor which returns the mean and standard deviation when making predictions. * `n_calls` [int, default=100]: Number of calls to `func`. * `n_random_starts` [int, default=10]: Number of evaluations of `func` with random initialization points before approximating the `func` with `base_estimator`. * `n_points` [int, default=1000]: Number of points to sample when minimizing the acquisition function. * `x0` [list, list of lists or `None`]: Initial input points. - If it is a list of lists, use it as a list of input points. - If it is a list, use it as a single initial input point. - If it is `None`, no initial input points are used. * `y0` [list, scalar or `None`]: Evaluation of initial input points. - If it is a list, then it corresponds to evaluations of the function at each element of `x0` : the i-th element of `y0` corresponds to the function evaluated at the i-th element of `x0`. - If it is a scalar, then it corresponds to the evaluation of the function at `x0`. - If it is None and `x0` is provided, then the function is evaluated at each element of `x0`. * `n_jobs` [int, default=1]: The number of jobs to run in parallel for `fit` and `predict`. If -1, then the number of jobs is set to the number of cores. * `random_state` [int, RandomState instance, or None (default)]: Set random state to something other than None for reproducible results. * `acq` [string, default=`"LCB"`]: Function to minimize over the forest posterior. Can be either - `"LCB"` for lower confidence bound, - `"EI"` for expected improvement, - `"PI"` for probability of improvement. * `xi` [float, default=0.01]: Controls how much improvement one wants over the previous best values. Used when the acquisition is either `"EI"` or `"PI"`. * `kappa` [float, default=1.96]: Controls how much of the variance in the predicted values should be taken into account. If set to be very high, then we are favouring exploration over exploitation and vice versa. Used when the acquisition is `"LCB"`. Returns ------- * `res` [`OptimizeResult`, scipy object]: The optimization result returned as a OptimizeResult object. Important attributes are: - `x` [list]: location of the minimum. - `fun` [float]: function value at the minimum. - `models`: surrogate models used for each iteration. - `x_iters` [list of lists]: location of function evaluation for each iteration. - `func_vals` [array]: function value for each iteration. - `space` [Space]: the optimization space. - `specs` [dict]`: the call specifications. For more details related to the OptimizeResult object, refer http://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.OptimizeResult.html """ # Save call args + rng specs = {"args": copy.copy(inspect.currentframe().f_locals), "function": inspect.currentframe().f_code.co_name} # Check params rng = check_random_state(random_state) # Default estimator if isinstance(base_estimator, str): if base_estimator not in ("rf", "et", "dt"): raise ValueError( "Valid values for the base_estimator parameter" " are: 'rf', 'et' or 'dt', not '%s'" % base_estimator) if base_estimator == "rf": base_estimator = RandomForestRegressor(n_estimators=100, min_samples_leaf=3, n_jobs=n_jobs, random_state=rng) elif base_estimator == "et": base_estimator = ExtraTreesRegressor(n_estimators=100, min_samples_leaf=3, n_jobs=n_jobs, random_state=rng) elif base_estimator == "dt": base_estimator = DecisionTreeRegressor(min_samples_leaf=3, random_state=rng) else: if not is_regressor(base_estimator): raise ValueError("The base_estimator parameter has to either" " be a string or a regressor instance." " '%s' is neither." % base_estimator) res = _tree_minimize(func, dimensions, base_estimator, n_calls=n_calls, n_points=n_points, n_random_starts=n_random_starts, x0=x0, y0=y0, random_state=random_state, acq=acq, xi=xi, kappa=kappa) res.specs = specs return res
def analyse_results( regular_cv_results, permutation_cv_results, labels, estimator, base_folder=None, analysis_folder='analysis', feature_names=None, learning_task=None, vs_analysis=None, threshold=.75, model_assessment_options=None, score_surfaces_options=None): """Summary and plot generation.""" # learning_task follows the convention of # sklearn.utils.multiclass.type_of_target if learning_task is None: if is_regressor(estimator): learning_task = 'continuous' else: learning_task = type_of_target(labels) # Create an empty dictionary which will contain the key results # of the analysis analysis_summary = dict() # Run the appropriate analysis according to the learning_task is_regression = learning_task.lower() in ('continuous', 'regression') if is_regression: # Perform regression analysis target = 'regression' elif learning_task.lower() == 'multiclass': target = 'multiclass' else: # Perform classification analysis target = 'classification' # Support for empty regular or permutation tests performance_regular = performance_metrics( regular_cv_results, labels, target) performance_permutation = performance_metrics( permutation_cv_results, labels, target) if base_folder is not None and analysis_folder is not None: analysis_folder = os.path.join(base_folder, analysis_folder) if not os.path.exists(analysis_folder): os.makedirs(analysis_folder) # ### Create two separate folders for figures in different formats try: os.mkdir(os.path.join(analysis_folder, 'figures_pdf')) os.mkdir(os.path.join(analysis_folder, 'figures_png')) except OSError: pass # if folder already exists, ignore it else: analysis_folder = None if model_assessment_options is None: model_assessment_options = {} # Handle variable selection step if vs_analysis is not None: # Get feature names if feature_names is None: # what follows creates [feat_0, feat_1, ..., feat_d] # feature_names = 'feat_' + np.arange( # labels.size).astype(str).astype(object) raise ValueError( "Variable selection analysis was specified, but no feature " "names were provided.") feature_names = np.array(feature_names) # force feature names to array if threshold is None: threshold = .75 selected = {} # Init variable selection containers selected['regular'] = dict(zip(feature_names, np.zeros(len(feature_names)))) selected['permutation'] = selected['regular'].copy() n_splits_regular = len((regular_cv_results.values() or [[]])[0]) n_splits_permutation = len((permutation_cv_results.values() or [[]])[0]) n_jobs = {'regular': n_splits_regular, 'permutation': n_splits_permutation} names_ = ('regular', 'permutation') cv_results_ = (regular_cv_results, permutation_cv_results) for batch_name, cv_result in zip(names_, cv_results_): # cv_result['estimator'] is a list containing # the grid-search estimators estimators = cv_result.get('estimator', None) if estimators is None: continue # in case of no permutations skip this iter for estimator in estimators: selected_list = get_selected_list( estimator, vs_analysis) if len(selected_list) < 1: continue selected_variables = feature_names[selected_list] for var in selected_variables: selected[batch_name][var] += 1. / n_jobs[batch_name] # Save selected variables textual summary if analysis_folder is not None: save_signature(os.path.join( analysis_folder, 'signature_%s.txt' % batch_name), selected[batch_name], threshold) # Also save the frequency list as an entry of the analysis summary # Create an empty pandas dataframe to store the frequencies df_tmp = pd.DataFrame(columns=['Frequency']) for k in reversed(sorted( selected[batch_name], key=selected[batch_name].__getitem__)): df_tmp.loc[k] = selected[batch_name][k] * 100 # Add the dataframe to the analysis summary analysis_summary['selection_frequency_{}'.format(batch_name)] = df_tmp feat_arr_r = np.array(list(iteritems(selected['regular'])), dtype=object) feat_arr_p = np.array(list(iteritems(selected['permutation'])), dtype=object) # sort by name feat_arr_r = feat_arr_r[feat_arr_r[:, 0].argsort()] feat_arr_p = feat_arr_p[feat_arr_p[:, 0].argsort()] # Save graphical summary plotting.feature_frequencies( feat_arr_r, analysis_folder, threshold=threshold) plotting.features_manhattan( feat_arr_r, feat_arr_p, analysis_folder, threshold=threshold) plotting.select_over_threshold( feat_arr_r, feat_arr_p, analysis_folder, threshold=threshold) # Generate distribution plots # And save distributions in analysis summary for i, metric in enumerate(performance_regular): plotting.distributions( v_regular=performance_regular[metric], v_permutation=performance_permutation.get(metric, []), base_folder=analysis_folder, metric=metric, first_run=i == 0, is_regression=is_regression) v_regular = performance_regular[metric] v_permutation = performance_permutation.get(metric, []) metric_values = dict() metric_values['values_regular'] = v_regular metric_values['values_permutation'] = v_permutation r_mean, r_sd = np.nanmean(v_regular), np.nanstd(v_regular) p_mean, p_sd = np.nanmean(v_permutation), np.nanstd(v_permutation) rstest = stats.ks_2samp(v_regular, v_permutation) metric_values['mean_regular'] = r_mean metric_values['sd_regular'] = r_sd metric_values['mean_permutation'] = p_mean metric_values['sd_permutation'] = p_sd metric_values['rstest'] = rstest analysis_summary['metric_{}'.format(metric)] = metric_values # Generate surfaces # This has meaning only if the estimator is an istance of GridSearchCV if isinstance(estimator, BaseSearchCV): if score_surfaces_options is None: score_surfaces_options = {} plotting.score_surfaces( param_grid=estimator.param_grid, results=regular_cv_results, base_folder=analysis_folder, is_regression=is_regression, **score_surfaces_options) # Finally, save in the analysis folder the pickled summary if analysis_folder is not None: with open(os.path.join(analysis_folder, 'summary.pkl'), 'w') as af: pkl.dump(analysis_summary, af)
def __init__(self, dimensions, base_estimator="gp", n_random_starts=None, n_initial_points=10, acq_func="gp_hedge", acq_optimizer="auto", random_state=None, acq_func_kwargs=None, acq_optimizer_kwargs=None): self.rng = check_random_state(random_state) # Configure acquisition function # Store and creat acquisition function set self.acq_func = acq_func self.acq_func_kwargs = acq_func_kwargs allowed_acq_funcs = ["gp_hedge", "EI", "LCB", "PI", "EIps", "PIps"] if self.acq_func not in allowed_acq_funcs: raise ValueError("expected acq_func to be in %s, got %s" % (",".join(allowed_acq_funcs), self.acq_func)) # treat hedging method separately if self.acq_func == "gp_hedge": self.cand_acq_funcs_ = ["EI", "LCB", "PI"] self.gains_ = np.zeros(3) else: self.cand_acq_funcs_ = [self.acq_func] if acq_func_kwargs is None: acq_func_kwargs = dict() self.eta = acq_func_kwargs.get("eta", 1.0) # Configure counters of points # Check `n_random_starts` deprecation first if n_random_starts is not None: warnings.warn(("n_random_starts will be removed in favour of " "n_initial_points."), DeprecationWarning) n_initial_points = n_random_starts if n_initial_points < 0: raise ValueError( "Expected `n_initial_points` >= 0, got %d" % n_initial_points) self._n_initial_points = n_initial_points self.n_initial_points_ = n_initial_points # Configure estimator # build base_estimator if doesn't exist if isinstance(base_estimator, str): base_estimator = cook_estimator( base_estimator, space=dimensions, random_state=self.rng.randint(0, np.iinfo(np.int32).max)) # check if regressor if not is_regressor(base_estimator) and base_estimator is not None: raise ValueError( "%s has to be a regressor." % base_estimator) # treat per second acqusition function specially is_multi_regressor = isinstance(base_estimator, MultiOutputRegressor) if "ps" in self.acq_func and not is_multi_regressor: self.base_estimator_ = MultiOutputRegressor(base_estimator) else: self.base_estimator_ = base_estimator # Configure optimizer # decide optimizer based on gradient information if acq_optimizer == "auto": if has_gradients(self.base_estimator_): acq_optimizer = "lbfgs" else: acq_optimizer = "sampling" if acq_optimizer not in ["lbfgs", "sampling"]: raise ValueError("Expected acq_optimizer to be 'lbfgs' or " "'sampling', got {0}".format(acq_optimizer)) if (not has_gradients(self.base_estimator_) and acq_optimizer != "sampling"): raise ValueError("The regressor {0} should run with " "acq_optimizer" "='sampling'.".format(type(base_estimator))) self.acq_optimizer = acq_optimizer # record other arguments if acq_optimizer_kwargs is None: acq_optimizer_kwargs = dict() self.n_points = acq_optimizer_kwargs.get("n_points", 10000) self.n_restarts_optimizer = acq_optimizer_kwargs.get( "n_restarts_optimizer", 5) n_jobs = acq_optimizer_kwargs.get("n_jobs", 1) self.n_jobs = n_jobs self.acq_optimizer_kwargs = acq_optimizer_kwargs # Configure search space # normalize space if GP regressor if isinstance(self.base_estimator_, GaussianProcessRegressor): dimensions = normalize_dimensions(dimensions) self.space = Space(dimensions) # record categorical and non-categorical indices self._cat_inds = [] self._non_cat_inds = [] for ind, dim in enumerate(self.space.dimensions): if isinstance(dim, Categorical): self._cat_inds.append(ind) else: self._non_cat_inds.append(ind) # Initialize storage for optimization self.models = [] self.Xi = [] self.yi = [] # Initialize cache for `ask` method responses # This ensures that multiple calls to `ask` with n_points set # return same sets of points. Reset to {} at every call to `tell`. self.cache_ = {}
def forest_minimize(func, dimensions, base_estimator='rf', maxiter=100, n_points=100, n_start=10, random_state=None): """Sequential optimisation using decision trees. A tree based regression model is used to model the expensive to evaluate function `func`. The model is improved by sequentially evaluating the expensive function at the next best point. Thereby finding the minimum of `func` with as few evaluations as possible. Parameters ---------- * `func` [callable]: Function to minimize. Should take a array of parameters and return the function values. * `dimensions` [list, shape=(n_dims,)]: List of search space dimensions. Each search dimension can be defined either as - a `(upper_bound, lower_bound)` tuple (for `Real` or `Integer` dimensions), - a `(upper_bound, lower_bound, "prior")` tuple (for `Real` dimensions), - as a list of categories (for `Categorical` dimensions), or - an instance of a `Dimension` object (`Real`, `Integer` or `Categorical`). * `base_estimator` [string or `Regressor`, default=`"rf"`]: The regressor to use as surrogate model. Can be either - `"rf"` for random forest regressor - `"et"` for extra trees regressor - `"dt"` for single decision tree regressor - instance of regressor with support for `return_std` in its predict method The predefined models are initilized with good defaults. If you want to adjust the model parameters pass your own instance of a regressor which returns the mean and standard deviation when making predictions. * `maxiter` [int, default=100]: Number of iterations used to find the minimum. This corresponds to the total number of evaluations of `func`. If `n_start` > 0 only `maxiter - n_start` additional evaluations of `func` are made that are guided by the surrogate model. * `n_start` [int, default=10]: Number of random points to draw before fitting `base_estimator` for the first time. If `n_start = maxiter` this degrades to a random search for the minimum. * `n_points` [int, default=1000]: Number of points to sample when minimizing the acquisition function. * `random_state` [int, RandomState instance, or None (default)]: Set random state to something other than None for reproducible results. Returns ------- * `res` [`OptimizeResult`, scipy object]: The optimization result returned as a OptimizeResult object. Important attributes are: - `x` [float]: location of the minimum. - `fun` [float]: function value at the minimum. - `models`: surrogate models used for each iteration. - `x_iters` [array]: location of function evaluation for each iteration. - `func_vals` [array]: function value for each iteration. - `space` [Space]: the optimisation space. For more details related to the OptimizeResult object, refer http://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.OptimizeResult.html """ rng = check_random_state(random_state) if isinstance(base_estimator, str): if base_estimator not in ("rf", "et", "dt"): raise ValueError("Valid values for the base_estimator parameter" " are: 'rf', 'et' or 'dt', not '%s'" % base_estimator) if base_estimator == "rf": base_estimator = RandomForestRegressor(min_samples_leaf=10, random_state=rng) elif base_estimator == "et": base_estimator = ExtraTreesRegressor(min_samples_leaf=10, random_state=rng) elif base_estimator == "dt": base_estimator = DecisionTreeRegressor(min_samples_leaf=10, random_state=rng) else: if not is_regressor(base_estimator): raise ValueError("The base_estimator parameter has to either" " be a string or a regressor instance." " '%s' is neither." % base_estimator) return _tree_minimize(func, dimensions, base_estimator, maxiter=maxiter, n_points=n_points, n_start=n_start, random_state=random_state)