def handle_deprecated_train_score(results, return_train_score): if return_train_score == 'warn': results = DeprecationDict(results) message = ('You are accessing a training score ({!r}), ' 'which will not be available by default any more in ' 'sklearn 0.21. If you need training scores, please ' 'set return_train_score=True') for key in results: if key.endswith('_train_score'): results.add_warning(key, message.format(key), FutureWarning) return results
def cross_validate(estimator, X, mixed_y=None, groups=None, scoring=None, cv=None, n_jobs=1, verbose=0, fit_params=None, pre_dispatch='2*n_jobs', return_train_score="warn"): """Evaluate metric(s) by cross-validation and also record fit/score times.""" # TODO: wrapper patch, key hard coding? _y = mixed_y['classifier'] if isinstance(mixed_y, dict) else mixed_y X, y, groups = indexable(X, _y, groups) cv = check_cv(cv, y, classifier=is_classifier(estimator)) scorers, _ = _check_multimetric_scoring(estimator, scoring=scoring) # We clone the estimator to make sure that all the folds are # independent, and that it is pickle-able. parallel = Parallel(n_jobs=n_jobs, verbose=verbose, pre_dispatch=pre_dispatch) scores = parallel( delayed(_fit_and_score)( clone(estimator), X, mixed_y, scorers, train, test, verbose, None, fit_params, return_train_score=return_train_score, return_times=True) for train, test in cv.split(X, y, groups)) if return_train_score: train_scores, test_scores, fit_times, score_times = zip(*scores) train_scores = _aggregate_score_dicts(train_scores) else: test_scores, fit_times, score_times = zip(*scores) test_scores = _aggregate_score_dicts(test_scores) # TODO: replace by a dict in 0.21 ret = DeprecationDict() if return_train_score == 'warn' else {} ret['fit_time'] = np.array(fit_times) ret['score_time'] = np.array(score_times) for name in scorers: ret['test_%s' % name] = np.array(test_scores[name]) if return_train_score: key = 'train_%s' % name ret[key] = np.array(train_scores[name]) if return_train_score == 'warn': message = ( 'You are accessing a training score ({!r}), ' 'which will not be available by default ' 'any more in 0.21. If you need training scores, ' 'please set return_train_score=True').format(key) # warn on key access ret.add_warning(key, message, FutureWarning) return ret
def test_deprecationdict(): dd = DeprecationDict() dd.add_warning('a', 'hello') dd.add_warning('b', 'world', DeprecationWarning) assert 1 == assert_warns_message(UserWarning, 'hello', dd.get, 'a', 1) dd['a'] = 5 dd['b'] = 6 dd['c'] = 7 assert 5 == assert_warns_message(UserWarning, 'hello', dd.__getitem__, 'a') assert 6 == assert_warns_message(DeprecationWarning, 'world', dd.__getitem__, 'b') assert 7 == assert_no_warnings(dd.get, 'c')
def _format_results(self, candidate_params, scorers, n_splits, out): n_candidates = len(candidate_params) # if one choose to see train score, "out" will contain train score info if self.return_train_score: (train_score_dicts, test_score_dicts, test_sample_counts, fit_time, score_time) = zip(*out) else: (test_score_dicts, test_sample_counts, fit_time, score_time) = zip(*out) # test_score_dicts and train_score dicts are lists of dictionaries and # we make them into dict of lists test_scores = _aggregate_score_dicts(test_score_dicts) if self.return_train_score: train_scores = _aggregate_score_dicts(train_score_dicts) # TODO: replace by a dict in 0.21 results = (DeprecationDict() if self.return_train_score == 'warn' else {}) def _store(key_name, array, weights=None, splits=False, rank=False): """A small helper to store the scores/times to the cv_results_""" # When iterated first by splits, then by parameters # We want `array` to have `n_candidates` rows and `n_splits` cols. array = np.array(array, dtype=np.float64).reshape(n_candidates, n_splits) if splits: for split_i in range(n_splits): # Uses closure to alter the results results["split%d_%s" % (split_i, key_name)] = array[:, split_i] array_means = np.average(array, axis=1, weights=weights) results['mean_%s' % key_name] = array_means # Weighted std is not directly available in numpy array_stds = np.sqrt(np.average((array - array_means[:, np.newaxis]) ** 2, axis=1, weights=weights)) results['std_%s' % key_name] = array_stds if rank: results["rank_%s" % key_name] = np.asarray( rankdata(-array_means, method='min'), dtype=np.int32) _store('fit_time', fit_time) _store('score_time', score_time) # Use one MaskedArray and mask all the places where the param is not # applicable for that candidate. Use defaultdict as each candidate may # not contain all the params param_results = defaultdict(partial(MaskedArray, np.empty(n_candidates,), mask=True, dtype=object)) for cand_i, params in enumerate(candidate_params): for name, value in params.items(): # An all masked empty array gets created for the key # `"param_%s" % name` at the first occurrence of `name`. # Setting the value at an index also unmasks that index param_results["param_%s" % name][cand_i] = value results.update(param_results) # Store a list of param dicts at the key 'params' results['params'] = candidate_params # NOTE test_sample counts (weights) remain the same for all candidates test_sample_counts = np.array(test_sample_counts[:n_splits], dtype=np.int) iid = self.iid if self.iid == 'warn': warn = False for scorer_name in scorers.keys(): scores = test_scores[scorer_name].reshape(n_candidates, n_splits) means_weighted = np.average(scores, axis=1, weights=test_sample_counts) means_unweighted = np.average(scores, axis=1) if not np.allclose(means_weighted, means_unweighted, rtol=1e-4, atol=1e-4): warn = True break if warn: warnings.warn("The default of the `iid` parameter will change " "from True to False in version 0.22 and will be" " removed in 0.24. This will change numeric" " results when test-set sizes are unequal.", DeprecationWarning) iid = True for scorer_name in scorers.keys(): # Computed the (weighted) mean and std for test scores alone _store('test_%s' % scorer_name, test_scores[scorer_name], splits=True, rank=True, weights=test_sample_counts if iid else None) if self.return_train_score: prev_keys = set(results.keys()) _store('train_%s' % scorer_name, train_scores[scorer_name], splits=True) if self.return_train_score == 'warn': for key in set(results.keys()) - prev_keys: message = ( 'You are accessing a training score ({!r}), ' 'which will not be available by default ' 'any more in 0.21. If you need training scores, ' 'please set return_train_score=True').format(key) # warn on key access results.add_warning(key, message, FutureWarning) return results
def fit(self, X, y=None, groups=None, **fit_params): """Run fit with all sets of parameters. Parameters ---------- X : array-like, shape = [n_samples, n_features] Training vector, where n_samples is the number of samples and n_features is the number of features. y : array-like, shape = [n_samples] or [n_samples, n_output], optional Target relative to X for classification or regression; None for unsupervised learning. groups : array-like, with shape (n_samples,), optional Group labels for the samples used while splitting the dataset into train/test set. **fit_params : dict of string -> object Parameters passed to the ``fit`` method of the estimator """ if self.fit_params is not None: warnings.warn( '"fit_params" as a constructor argument was ' 'deprecated in version 0.19 and will be removed ' 'in version 0.21. Pass fit parameters to the ' '"fit" method instead.', DeprecationWarning) if fit_params: warnings.warn( 'Ignoring fit_params passed as a constructor ' 'argument in favor of keyword arguments to ' 'the "fit" method.', RuntimeWarning) else: fit_params = self.fit_params estimator = self.estimator cv = check_cv(self.cv, y, classifier=is_classifier(estimator)) scorers, self.multimetric_ = _check_multimetric_scoring( self.estimator, scoring=self.scoring) if self.multimetric_: if self.refit is not False and ( not isinstance(self.refit, six.string_types) or # This will work for both dict / list (tuple) self.refit not in scorers): raise ValueError("For multi-metric scoring, the parameter " "refit must be set to a scorer key " "to refit an estimator with the best " "parameter setting on the whole data and " "make the best_* attributes " "available for that metric. If this is not " "needed, refit should be set to False " "explicitly. %r was passed." % self.refit) else: refit_metric = self.refit else: refit_metric = 'score' # X, y, groups = indexable(X, y, groups) if groups is not None: raise NotImplementedError("groups are not supported") # n_splits = cv.get_n_splits(X, y, groups) n_splits = min( cv.get_n_splits(X_.transpose(1, 2, 0), y_, None) for X_, y_ in zip(X, y)) def generate_index(X_list, y_list): split = [ cv.split(X.transpose(1, 2, 0), y) for X, y in zip(X_list, y_list) ] for i in range(n_splits): yield zip(*[next(s) for s in split]) generate_index_iter = generate_index(X, y) # Regenerate parameter iterable for each fit candidate_params = list(self._get_param_iterator()) n_candidates = len(candidate_params) if self.verbose > 0: print("Fitting {0} folds for each of {1} candidates, totalling" " {2} fits".format(n_splits, n_candidates, n_candidates * n_splits)) base_estimator = clone(self.estimator) pre_dispatch = self.pre_dispatch out = Parallel( n_jobs=self.n_jobs, verbose=self.verbose, pre_dispatch=pre_dispatch)(delayed(_fit_and_score)( clone(base_estimator), X, y, scorers, train, test, self.verbose, parameters, fit_params=fit_params, return_train_score=self.return_train_score, return_n_test_samples=True, return_times=True, return_parameters=False, error_score=self.error_score, return_estimator=True, return_idx=True) for parameters, ( train, test) in product(candidate_params, generate_index_iter)) # if one choose to see train score, "out" will contain train score info if self.return_train_score: (train_score_dicts, test_score_dicts, test_sample_counts, fit_time, score_time, estimators, train_idxs, test_idxs) = zip(*out) else: (test_score_dicts, test_sample_counts, fit_time, score_time, estimators, train_idxs, test_idxs) = zip(*out) # test_score_dicts and train_score dicts are lists of dictionaries and # we make them into dict of lists test_scores = _aggregate_score_dicts(test_score_dicts) if self.return_train_score: train_scores = _aggregate_score_dicts(train_score_dicts) # TODO: replace by a dict in 0.21 results = (DeprecationDict() if self.return_train_score == 'warn' else {}) def _store(key_name, array, weights=None, splits=False, rank=False): """Store the scores/times to the cv_results_.""" # When iterated first by splits, then by parameters # We want `array` to have `n_candidates` rows and `n_splits` cols. array = np.array(array, dtype=np.float64).reshape(n_candidates, n_splits) if splits: for split_i in range(n_splits): # Uses closure to alter the results results["split%d_%s" % (split_i, key_name)] = array[:, split_i] array_means = np.average(array, axis=1, weights=weights) results['mean_%s' % key_name] = array_means # Weighted std is not directly available in numpy array_stds = np.sqrt( np.average((array - array_means[:, np.newaxis])**2, axis=1, weights=weights)) results['std_%s' % key_name] = array_stds if rank: results["rank_%s" % key_name] = np.asarray(rankdata( -array_means, method='min'), dtype=np.int32) _store('fit_time', fit_time) _store('score_time', score_time) results['estimators'] = estimators results['train_index'] = train_idxs results['test_index'] = test_idxs # Use one MaskedArray and mask all the places where the param is not # applicable for that candidate. Use defaultdict as each candidate may # not contain all the params param_results = defaultdict( partial(MaskedArray, np.empty(n_candidates, ), mask=True, dtype=object)) for cand_i, params in enumerate(candidate_params): for name, value in params.items(): # An all masked empty array gets created for the key # `"param_%s" % name` at the first occurence of `name`. # Setting the value at an index also unmasks that index param_results["param_%s" % name][cand_i] = value results.update(param_results) # Store a list of param dicts at the key 'params' results['params'] = candidate_params # NOTE test_sample counts (weights) remain the same for all candidates test_sample_counts = np.array(test_sample_counts[:n_splits], dtype=np.int) for scorer_name in scorers.keys(): # Computed the (weighted) mean and std for test scores alone _store('test_%s' % scorer_name, test_scores[scorer_name], splits=True, rank=True, weights=test_sample_counts if self.iid else None) if self.return_train_score: prev_keys = set(results.keys()) _store('train_%s' % scorer_name, train_scores[scorer_name], splits=True) if self.return_train_score == 'warn': for key in set(results.keys()) - prev_keys: message = ( 'You are accessing a training score ({!r}), ' 'which will not be available by default ' 'any more in 0.21. If you need training scores, ' 'please set return_train_score=True').format(key) # warn on key access results.add_warning(key, message, FutureWarning) # For multi-metric evaluation, store the best_index_, best_params_ and # best_score_ iff refit is one of the scorer names # In single metric evaluation, refit_metric is "score" if self.refit or not self.multimetric_: self.best_index_ = results["rank_test_%s" % refit_metric].argmin() self.best_params_ = candidate_params[self.best_index_] self.best_score_ = results["mean_test_%s" % refit_metric][self.best_index_] if self.refit: self.best_estimator_ = clone(base_estimator).set_params( **self.best_params_) if y is not None: self.best_estimator_.fit(X, y, **fit_params) else: self.best_estimator_.fit(X, **fit_params) # Store the only scorer not as a dict for single metric evaluation self.scorer_ = scorers if self.multimetric_ else scorers['score'] self.cv_results_ = results self.n_splits_ = n_splits return self
def repeated_cross_validate(estimator, X, y=None, groups=None, scoring=None, cv=None, n_jobs=1, n_reps=1, verbose=0, fit_params=None, pre_dispatch='2*n_jobs', return_train_score="warn"): if len(cv) != n_reps: raise ValueError( "Set n_reps = {}. Got only {} cross validators.".format( n_reps, len(cv))) n_folds = np.unique( [cross_validator.get_n_splits() for cross_validator in cv]) if len(n_folds) != 1: raise ValueError( "Cross validators are not unified in fold number: {}".format( n_folds)) n_folds = n_folds[0] """Evaluate metric(s) by cross-validation and also record fit/score times. Read more in the :ref:`User Guide <multimetric_cross_validation>`. Parameters ---------- estimator : estimator object implementing 'fit' The object to use to fit the data. X : array-like The data to fit. Can be for example a list, or an array. y : array-like, optional, default: None The target variable to try to predict in the case of supervised learning. groups : array-like, with shape (n_samples,), optional Group labels for the samples used while splitting the dataset into train/test set. scoring : string, callable, list/tuple, dict or None, default: None A single string (see :ref:`scoring_parameter`) or a callable (see :ref:`scoring`) to evaluate the predictions on the test set. For evaluating multiple metrics, either give a list of (unique) strings or a dict with names as keys and callables as values. NOTE that when using custom scorers, each scorer should return a single value. Metric functions returning a list/array of values can be wrapped into multiple scorers that return one value each. See :ref:`multimetric_grid_search` for an example. If None, the estimator's default scorer (if available) is used. cv : array-like, a collection of cross-validation generators, with length n_reps Refer :ref:`User Guide <cross_validation>` for the various cross-validation strategies that can be used here. n_jobs : integer, optional The number of CPUs to use to do the computation. -1 means 'all CPUs'. verbose : integer, optional The verbosity level. fit_params : dict, optional Parameters to pass to the fit method of the estimator. pre_dispatch : int, or string, optional Controls the number of jobs that get dispatched during parallel execution. Reducing this number can be useful to avoid an explosion of memory consumption when more jobs get dispatched than CPUs can process. This parameter can be: - None, in which case all the jobs are immediately created and spawned. Use this for lightweight and fast-running jobs, to avoid delays due to on-demand spawning of the jobs - An int, giving the exact number of total jobs that are spawned - A string, giving an expression as a function of n_jobs, as in '2*n_jobs' return_train_score : boolean, optional Whether to include train decision_scores. Current default is ``'warn'``, which behaves as ``True`` in addition to raising a warning when a training score is looked up. That default will be changed to ``False`` in 0.21. Computing training decision_scores is used to get insights on how different parameter settings impact the overfitting/underfitting trade-off. However computing the decision_scores on the training set can be computationally expensive and is not strictly required to select the parameters that yield the best generalization performance. Returns ------- repeated_decision_scores : dict of `decision_scores` dicts, of shape=(n_reps,) """ X, y, groups = indexable(X, y, groups) # cv = check_cv(cv, y, classifier=is_classifier(estimator)) scorers, _ = _check_multimetric_scoring(estimator, scoring=scoring) # We clone the estimator to make sure that all the folds are # independent, and that it is pickle-able. parallel = Parallel(n_jobs=n_jobs, verbose=verbose, pre_dispatch=pre_dispatch) # ---------------------- My Hack ----------------------- # # 1) Set parameter `error_score=-1` to `_fit_and_score` # # 2) Created an argument `return_estimator` to # # `_fit_and_score` # # ------------------------------------------------------ # tasks = [[ delayed(_fit_and_score)(clone(estimator), X, y, scorers, train, test, verbose, None, fit_params, return_train_score=return_train_score, return_times=True, return_estimator=True, error_score=-1) for train, test in cross_validator.split(X, y, groups) ] for cross_validator in cv] # Flatten this list of lists into a simple list tasks = itertools.chain.from_iterable(tasks) scores = parallel(tasks) if return_train_score: train_scores, test_scores, fit_times, score_times, estimators = zip( *scores) train_scores = _aggregate_score_dicts(train_scores) else: test_scores, fit_times, score_times, estimators = zip(*scores) test_scores = _aggregate_score_dicts(test_scores) # TODO: replace by a dict in 0.21 ret = DeprecationDict() if return_train_score == 'warn' else {} ret['fit_time'] = np.array(fit_times) ret['score_time'] = np.array(score_times) ret['estimator'] = list(estimators) for name in scorers: ret['test_%s' % name] = np.array(test_scores[name]) if return_train_score: key = 'train_%s' % name ret[key] = np.array(train_scores[name]) if return_train_score == 'warn': message = ( 'You are accessing a training score ({!r}), ' 'which will not be available by default ' 'any more in 0.21. If you need training decision_scores, ' 'please set return_train_score=True').format(key) # warn on key access ret.add_warning(key, message, FutureWarning) """ Now `ret` is a dictionary whose values are all sequences of length `n_folds * n_reps`. Split it into `n_reps` sub-dictionaries whose values are of length `n_folds` """ rep_rets = list(_split_dict(ret, chunk_size=n_folds)) assert len(rep_rets) == n_reps for i in range(0, n_reps): rep_rets[i]["cross_validator"] = cv[i] result = dict(zip(range(0, n_reps), rep_rets)) return result
def cross_validate(estimator, X, y=None, groups=None, scoring=None, cv=None, n_jobs=1, verbose=0, fit_params=None, pre_dispatch='2*n_jobs', return_train_score="warn"): """Evaluate metric(s) by cross-validation and also record fit/score times. Read more in the :ref:`User Guide <multimetric_cross_validation>`. Parameters ---------- estimator : estimator object implementing 'fit' The object to use to fit the data. X : array-like The data to fit. Can be for example a list, or an array. y : array-like, optional, default: None The target variable to try to predict in the case of supervised learning. groups : array-like, with shape (n_samples,), optional Group labels for the samples used while splitting the dataset into train/test set. scoring : string, callable, list/tuple, dict or None, default: None A single string (see :ref:`scoring_parameter`) or a callable (see :ref:`scoring`) to evaluate the predictions on the test set. For evaluating multiple metrics, either give a list of (unique) strings or a dict with names as keys and callables as values. NOTE that when using custom scorers, each scorer should return a single value. Metric functions returning a list/array of values can be wrapped into multiple scorers that return one value each. See :ref:`multimetric_grid_search` for an example. If None, the estimator's default scorer (if available) is used. cv : int, cross-validation generator or an iterable, optional Determines the cross-validation splitting strategy. Possible inputs for cross_validators are: - None, to use the default 3-fold cross validation, - integer, to specify the number of folds in a `(Stratified)KFold`, - An object to be used as a cross-validation generator. - An iterable yielding train, test splits. For integer/None inputs, if the estimator is a classifier and ``y`` is either binary or multiclass, :class:`StratifiedKFold` is used. In all other cases, :class:`KFold` is used. Refer :ref:`User Guide <cross_validation>` for the various cross-validation strategies that can be used here. n_jobs : integer, optional The number of CPUs to use to do the computation. -1 means 'all CPUs'. verbose : integer, optional The verbosity level. fit_params : dict, optional Parameters to pass to the fit method of the estimator. pre_dispatch : int, or string, optional Controls the number of jobs that get dispatched during parallel execution. Reducing this number can be useful to avoid an explosion of memory consumption when more jobs get dispatched than CPUs can process. This parameter can be: - None, in which case all the jobs are immediately created and spawned. Use this for lightweight and fast-running jobs, to avoid delays due to on-demand spawning of the jobs - An int, giving the exact number of total jobs that are spawned - A string, giving an expression as a function of n_jobs, as in '2*n_jobs' return_train_score : boolean, optional Whether to include train decision_scores. Current default is ``'warn'``, which behaves as ``True`` in addition to raising a warning when a training score is looked up. That default will be changed to ``False`` in 0.21. Computing training decision_scores is used to get insights on how different parameter settings impact the overfitting/underfitting trade-off. However computing the decision_scores on the training set can be computationally expensive and is not strictly required to select the parameters that yield the best generalization performance. Returns ------- decision_scores : dict of float arrays of shape=(n_splits,) Array of results of the estimator for each run of the cross validation. A dict of arrays containing the score/time arrays for each scorer is returned. The possible keys for this ``dict`` are: ``test_score`` The score array for test decision_scores on each cross_validators split. ``train_score`` The score array for train decision_scores on each cross_validators split. This is available only if ``return_train_score`` parameter is ``True``. ``fit_time`` The time for fitting the estimator on the train set for each cross_validators split. ``score_time`` The time for scoring the estimator on the test set for each cross_validators split. (Note time for scoring on the train set is not included even if ``return_train_score`` is set to ``True`` ``estimator`` A list of estimator objects, one for each training dataset. Examples -------- >>> from sklearn import datasets, linear_model >>> from sklearn.model_selection import cross_validate >>> from sklearn.metrics.scorer import make_scorer >>> from sklearn.metrics import confusion_matrix >>> from sklearn.svm import LinearSVC >>> diabetes = datasets.load_diabetes() >>> X = diabetes.data[:150] >>> y = diabetes.target[:150] >>> lasso = linear_model.Lasso() Single metric evaluation using ``cross_validate`` >>> cv_results = cross_validate(lasso, X, y, return_train_score=False) >>> sorted(cv_results.keys()) # doctest: +ELLIPSIS ['fit_time', 'score_time', 'test_score'] >>> cv_results['test_score'] # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE array([ 0.33..., 0.08..., 0.03...]) Multiple metric evaluation using ``cross_validate`` (please refer the ``scoring`` parameter doc for more information) >>> decision_scores = cross_validate(lasso, X, y, ... scoring=('r2', 'neg_mean_squared_error')) >>> print(decision_scores['test_neg_mean_squared_error']) # doctest: +ELLIPSIS [-3635.5... -3573.3... -6114.7...] >>> print(decision_scores['train_r2']) # doctest: +ELLIPSIS [ 0.28... 0.39... 0.22...] See Also --------- :func:`sklearn.model_selection.cross_val_score`: Run cross-validation for single metric evaluation. :func:`sklearn.metrics.make_scorer`: Make a scorer from a performance metric or loss function. """ X, y, groups = indexable(X, y, groups) cv = check_cv(cv, y, classifier=is_classifier(estimator)) scorers, _ = _check_multimetric_scoring(estimator, scoring=scoring) # We clone the estimator to make sure that all the folds are # independent, and that it is pickle-able. parallel = Parallel(n_jobs=n_jobs, verbose=verbose, pre_dispatch=pre_dispatch) # ---------------------- My Hack ----------------------- # # 1) Set parameter `error_score=-1` to `_fit_and_score` # # 2) Created an argument `return_estimator` to # # `_fit_and_score` # # ------------------------------------------------------ # scores = parallel( delayed(_fit_and_score)(clone(estimator), X, y, scorers, train, test, verbose, None, fit_params, return_train_score=return_train_score, return_times=True, return_estimator=True, error_score=-1) for train, test in cv.split(X, y, groups)) if return_train_score: train_scores, test_scores, fit_times, score_times, estimators = zip( *scores) train_scores = _aggregate_score_dicts(train_scores) else: test_scores, fit_times, score_times, estimators = zip(*scores) test_scores = _aggregate_score_dicts(test_scores) # TODO: replace by a dict in 0.21 ret = DeprecationDict() if return_train_score == 'warn' else {} ret['fit_time'] = np.array(fit_times) ret['score_time'] = np.array(score_times) ret['estimator'] = list(estimators) for name in scorers: ret['test_%s' % name] = np.array(test_scores[name]) if return_train_score: key = 'train_%s' % name ret[key] = np.array(train_scores[name]) if return_train_score == 'warn': message = ( 'You are accessing a training score ({!r}), ' 'which will not be available by default ' 'any more in 0.21. If you need training decision_scores, ' 'please set return_train_score=True').format(key) # warn on key access ret.add_warning(key, message, FutureWarning) ret['cross_validator'] = cv return ret
def monkeypatch_fit(self, X, y=None, groups=None, **fit_params): if self.fit_params is not None: warnings.warn('"fit_params" as a constructor argument was ' 'deprecated in version 0.19 and will be removed ' 'in version 0.21. Pass fit parameters to the ' '"fit" method instead.', DeprecationWarning) if fit_params: warnings.warn('Ignoring fit_params passed as a constructor ' 'argument in favor of keyword arguments to ' 'the "fit" method.', RuntimeWarning) else: fit_params = self.fit_params estimator = self.estimator cv = check_cv(self.cv, y, classifier=is_classifier(estimator)) scorers, self.multimetric_ = _check_multimetric_scoring( self.estimator, scoring=self.scoring) if self.multimetric_: if self.refit is not False and ( not isinstance(self.refit, six.string_types) or # This will work for both dict / list (tuple) self.refit not in scorers): raise ValueError("For multi-metric scoring, the parameter " "refit must be set to a scorer key " "to refit an estimator with the best " "parameter setting on the whole data and " "make the best_* attributes " "available for that metric. If this is not " "needed, refit should be set to False " "explicitly. %r was passed." % self.refit) else: refit_metric = self.refit else: refit_metric = 'score' X, y, groups = indexable(X, y, groups) n_splits = cv.get_n_splits(X, y, groups) # Regenerate parameter iterable for each fit candidate_params = list(self._get_param_iterator()) n_candidates = len(candidate_params) if self.verbose > 0: print("Fitting {0} folds for each of {1} candidates, totalling" " {2} fits".format(n_splits, n_candidates, n_candidates * n_splits)) base_estimator = clone(self.estimator) pre_dispatch = self.pre_dispatch # =================================================================== # BEGIN MONKEYPATCH MODIFICATION # =================================================================== parallel_cv = cv.split(X, y, groups) if type(self.pipeline_split_idx) == int and isinstance(base_estimator, Pipeline): split_idx = self.pipeline_split_idx pre_pipe_steps = base_estimator.steps[:split_idx] new_pipe_steps = base_estimator.steps[split_idx:] memory = base_estimator.memory pre_pipe = Pipeline(pre_pipe_steps, memory) if len(new_pipe_steps) == 1: est_name, base_estimator = new_pipe_steps[0] else: est_name = None base_estimator = Pipeline(new_pipe_steps, memory) fit_params_pre_pipe = {} steps_pre_pipe = [tup[0] for tup in pre_pipe_steps] fit_param_keys = fit_params.keys() for pname in fit_param_keys: step, param = pname.split('__', 1) if step in steps_pre_pipe: fit_params_pre_pipe[pname] = fit_params.pop(pname) elif step == est_name: fit_params[param] = fit_params.pop(pname) if est_name is not None: for dic in candidate_params: for k in dic: step, param = k.split('__', 1) if step == est_name: dic.update({param: dic.pop(k)}) try: X = pre_pipe.fit_transform(X, **fit_params_pre_pipe) except TypeError: raise RuntimeError('Pipeline before pipeline_split_idx requires ' 'fitting to y. Please initialize with an ' 'earlier index.') if self.transform_before_grid and isinstance(base_estimator, Pipeline): pipe = base_estimator est_name, base_estimator = pipe.steps.pop() X_cv, y_cv, parallel_cv = [], [], [] sample_count = 0 fit_params_est = {} fit_param_keys = fit_params.keys() for pname in fit_param_keys: step, param = pname.split('__', 1) if step == est_name: fit_params_est[param] = fit_params.pop(pname) for dic in candidate_params: for k in dic: step, param = k.split('__', 1) if step == est_name: dic.update({param: dic.pop(k)}) for (train, test) in cv.split(X, y, groups): if y is not None: if isinstance(X, pd.DataFrame): pipe.fit(X.iloc[train], y.iloc[train], **fit_params) else: pipe.fit(X[train], y[train], **fit_params) y_cv.append(y) else: if isinstance(X, pd.DataFrame): pipe.fit(X.iloc[train], **fit_params) else: pipe.fit(X[train], **fit_params) X_cv.append(pipe.transform(X)) train = train + sample_count test = test + sample_count sample_count += len(train) sample_count += len(test) parallel_cv.append((train, test)) if isinstance(X, pd.DataFrame): X = pd.concat(tuple(X_cv)) else: X = np.vstack(tuple(X_cv)) if y is not None: if isinstance(y, pd.Series): y = pd.concat(tuple(y_cv)) else: y = np.hstack(tuple(y_cv)) if 'sample_weight' in fit_params_est: samp_weight = fit_params_est['sample_weight'] fit_params_est['sample_weight'] = np.tile(samp_weight, len(y_cv)) fit_params = fit_params_est out = Parallel( n_jobs=self.n_jobs, verbose=self.verbose, pre_dispatch=pre_dispatch )(delayed(monkeypatch_fit_and_score) (clone(base_estimator), X, y, scorers, train, test, self.verbose, parameters, fit_params=fit_params, return_train_score=self.return_train_score, return_n_test_samples=True, return_times=True, return_parameters=False, error_score=self.error_score) for parameters, (train, test) in product(candidate_params, parallel_cv)) # =================================================================== # END MONKEYPATCH MODIFICATION # =================================================================== # if one choose to see train score, "out" will contain train score info if self.return_train_score: (train_score_dicts, test_score_dicts, test_sample_counts, fit_time, score_time) = zip(*out) else: (test_score_dicts, test_sample_counts, fit_time, score_time) = zip(*out) # test_score_dicts and train_score dicts are lists of dictionaries and # we make them into dict of lists test_scores = _aggregate_score_dicts(test_score_dicts) if self.return_train_score: train_scores = _aggregate_score_dicts(train_score_dicts) # TODO: replace by a dict in 0.21 results = (DeprecationDict() if self.return_train_score == 'warn' else {}) def _store(key_name, array, weights=None, splits=False, rank=False): """A small helper to store the scores/times to the cv_results_""" # When iterated first by splits, then by parameters # We want `array` to have `n_candidates` rows and `n_splits` cols. array = np.array(array, dtype=np.float64).reshape(n_candidates, n_splits) if splits: for split_i in range(n_splits): # Uses closure to alter the results results["split%d_%s" % (split_i, key_name)] = array[:, split_i] array_means = np.average(array, axis=1, weights=weights) results['mean_%s' % key_name] = array_means # Weighted std is not directly available in numpy array_stds = np.sqrt(np.average((array - array_means[:, np.newaxis]) ** 2, axis=1, weights=weights)) results['std_%s' % key_name] = array_stds if rank: results["rank_%s" % key_name] = np.asarray( rankdata(-array_means, method='min'), dtype=np.int32) _store('fit_time', fit_time) _store('score_time', score_time) # Use one MaskedArray and mask all the places where the param is not # applicable for that candidate. Use defaultdict as each candidate may # not contain all the params param_results = defaultdict(partial(MaskedArray, np.empty(n_candidates,), mask=True, dtype=object)) for cand_i, params in enumerate(candidate_params): for name, value in params.items(): # An all masked empty array gets created for the key # `"param_%s" % name` at the first occurence of `name`. # Setting the value at an index also unmasks that index param_results["param_%s" % name][cand_i] = value results.update(param_results) # Store a list of param dicts at the key 'params' results['params'] = candidate_params # NOTE test_sample counts (weights) remain the same for all candidates test_sample_counts = np.array(test_sample_counts[:n_splits], dtype=np.int) for scorer_name in scorers.keys(): # Computed the (weighted) mean and std for test scores alone _store('test_%s' % scorer_name, test_scores[scorer_name], splits=True, rank=True, weights=test_sample_counts if self.iid else None) if self.return_train_score: prev_keys = set(results.keys()) _store('train_%s' % scorer_name, train_scores[scorer_name], splits=True) if self.return_train_score == 'warn': for key in set(results.keys()) - prev_keys: message = ( 'You are accessing a training score ({!r}), ' 'which will not be available by default ' 'any more in 0.21. If you need training scores, ' 'please set return_train_score=True').format(key) # warn on key access results.add_warning(key, message, FutureWarning) # For multi-metric evaluation, store the best_index_, best_params_ and # best_score_ iff refit is one of the scorer names # In single metric evaluation, refit_metric is "score" if self.refit or not self.multimetric_: self.best_index_ = results["rank_test_%s" % refit_metric].argmin() self.best_params_ = candidate_params[self.best_index_] self.best_score_ = results["mean_test_%s" % refit_metric][ self.best_index_] if self.refit: self.best_estimator_ = clone(base_estimator).set_params( **self.best_params_) if y is not None: self.best_estimator_.fit(X, y, **fit_params) else: self.best_estimator_.fit(X, **fit_params) # Store the only scorer not as a dict for single metric evaluation self.scorer_ = scorers if self.multimetric_ else scorers['score'] self.cv_results_ = results self.n_splits_ = n_splits return self
def my_cross_validate(estimator, X, y, groups=None, scoring=None, cv=None, n_jobs=1, verbose=0, fit_params=None, pre_dispatch='2*n_jobs', return_train_score="warn"): """ In this project, data is pre-split, and estimator is always a classifier so: cv: None (do not use) groups: None (do not use) X: ((X_train1, X_test1), (X_train2, X_test2), ...) y: ((y_train1, y_test1), (y_train2, y_test2), ...) """ # X, y, groups = indexable(X, y, groups) # cv = check_cv(cv, y, classifier=is_classifier(estimator)) scorers, _ = _check_multimetric_scoring(estimator, scoring=scoring) # We clone the estimator to make sure that all the folds are # independent, and that it is pickle-able. parallel = Parallel(n_jobs=n_jobs, verbose=verbose, pre_dispatch=pre_dispatch) scores = parallel( delayed(_my_fit_and_score)(clone(estimator), Xi, yi, scorers, verbose, None, fit_params, return_train_score=return_train_score, return_times=True) for Xi, yi in zip(X, y)) if return_train_score: train_scores, test_scores, fit_times, score_times = zip(*scores) train_scores = _aggregate_score_dicts(train_scores) else: test_scores, fit_times, score_times = zip(*scores) test_scores = _aggregate_score_dicts(test_scores) # TODO: replace by a dict in 0.21 ret = DeprecationDict() if return_train_score == 'warn' else {} ret['fit_time'] = np.array(fit_times) ret['score_time'] = np.array(score_times) for name in scorers: ret['test_%s' % name] = np.array(test_scores[name]) if return_train_score: key = 'train_%s' % name ret[key] = np.array(train_scores[name]) if return_train_score == 'warn': message = ('You are accessing a training score ({!r}), ' 'which will not be available by default ' 'any more in 0.21. If you need training scores, ' 'please set return_train_score=True').format(key) # warn on key access ret.add_warning(key, message, FutureWarning) return ret