def cross_val_score(estimator, X, y=None, groups=None, scoring=None, cv=None, n_jobs=1, verbose=0, fit_params=None, pre_dispatch='2*n_jobs'): """ Evaluate a score by cross-validation """ if not isinstance(scoring, (list, tuple)): scoring = [scoring] X, y, groups = indexable(X, y, groups) cv = check_cv(cv, y, classifier=is_classifier(estimator)) splits = list(cv.split(X, y, groups)) scorer = [check_scoring(estimator, scoring=s) for s in scoring] # We clone the estimator to make sure that all the folds are # independent, and that it is pickle-able. parallel = Parallel(n_jobs=n_jobs, verbose=verbose, pre_dispatch=pre_dispatch) scores = parallel(delayed(_fit_and_score)(clone(estimator), X, y, scorer, train, test, verbose, None, fit_params) for train, test in splits) group_order = [] if hasattr(cv, 'groups'): group_order = [np.array(cv.groups)[test].tolist()[0] for _, test in splits] return np.squeeze(np.array(scores)), group_order
def split(self, X, y=None, groups=None): """ Generate indices to split data into training and test set. Parameters ---------- X : array-like, shape (n_samples, n_features) Training data, where n_samples is the number of samples and n_features is the number of features. y : array-like, shape (n_samples,) The target variable for supervised learning problems. groups : array-like, with shape (n_samples,), optional Group labels for the samples used while splitting the dataset into train/test set. Yields ------ train : ndarray The training set indices for that split. test : ndarray The testing set indices for that split. """ # check arguments X, y, groups = indexable(X, y, groups) for train, test in super( ShuffleSplitWithinGroups, self).split(X, y, groups): yield train, test
def permutation_test_score(estimator, X, y, groups=None, cv=None, n_permutations=100, n_jobs=1, random_state=0, verbose=0, scoring=None): """ Evaluate the significance of a cross-validated score with permutations, as in test 1 of [Ojala2010]_. A modification of original sklearn's permutation test score function to evaluate p-value outside this function, so that the score can be reused from outside. .. [Ojala2010] Ojala and Garriga. Permutation Tests for Studying Classifier Performance. The Journal of Machine Learning Research (2010) vol. 11 """ X, y, groups = indexable(X, y, groups) cv = check_cv(cv, y, classifier=is_classifier(estimator)) scorer = check_scoring(estimator, scoring=scoring) random_state = check_random_state(random_state) # We clone the estimator to make sure that all the folds are # independent, and that it is pickle-able. permutation_scores = Parallel(n_jobs=n_jobs, verbose=verbose)( delayed(_permutation_test_score)( clone(estimator), X, _shuffle(y, groups, random_state), groups, cv, scorer) for _ in range(n_permutations)) permutation_scores = np.array(permutation_scores) return permutation_scores
def _wrapped_cross_val_score(sklearn_pipeline, features, target, cv, scoring_function, sample_weight=None, groups=None): """Fit estimator and compute scores for a given dataset split. Parameters ---------- sklearn_pipeline : pipeline object implementing 'fit' The object to use to fit the data. features : array-like of shape at least 2D The data to fit. target : array-like, optional, default: None The target variable to try to predict in the case of supervised learning. cv: int or cross-validation generator If CV is a number, then it is the number of folds to evaluate each pipeline over in k-fold cross-validation during the TPOT optimization process. If it is an object then it is an object to be used as a cross-validation generator. scoring_function : callable A scorer callable object / function with signature ``scorer(estimator, X, y)``. sample_weight : array-like, optional List of sample weights to balance (or un-balanace) the dataset target as needed groups: array-like {n_samples, }, optional Group labels for the samples used while splitting the dataset into train/test set """ sample_weight_dict = set_sample_weight(sklearn_pipeline.steps, sample_weight) features, target, groups = indexable(features, target, groups) cv = check_cv(cv, target, classifier=is_classifier(sklearn_pipeline)) cv_iter = list(cv.split(features, target, groups)) scorer = check_scoring(sklearn_pipeline, scoring=scoring_function) try: with warnings.catch_warnings(): warnings.simplefilter('ignore') scores = [_fit_and_score(estimator=clone(sklearn_pipeline), X=features, y=target, scorer=scorer, train=train, test=test, verbose=0, parameters=None, fit_params=sample_weight_dict) for train, test in cv_iter] CV_score = np.array(scores)[:, 0] return np.nanmean(CV_score) except TimeoutException: return "Timeout" except Exception as e: return -float('inf')
def _generate_sample(self, zscored=False, full=False): from sklearn.utils import indexable X = self._Xtr_zs.copy() if zscored else self._Xtrain.copy() sample_x = [tuple(x) for x in X[self._ftnames].values] labels_y = X[[self._rate_column]].values.ravel().tolist() if full: X = self._Xtest.copy() LOG.warning('Requested fitting in both train and test ' 'datasets, appending %d examples', len(X)) sample_x += [tuple(x) for x in X[self._ftnames].values] labels_y += X[[self._rate_column]].values.ravel().tolist() groups = None if not full: groups = self.get_groups() return indexable(np.array(sample_x), labels_y, groups)
def my_cross_val_predict(estimator, X, y=None, groups=None, cv=None, n_jobs=1, verbose=0, fit_params=None, pre_dispatch='2*n_jobs', method='predict'): X, y, groups = indexable(X, y, groups) cv = check_cv(cv, y, classifier=is_classifier(estimator)) # Ensure the estimator has implemented the passed decision function if not callable(getattr(estimator, method)): raise AttributeError('{} not implemented in estimator' .format(method)) if method in ['decision_function', 'predict_proba', 'predict_log_proba']: le = LabelEncoder() y = le.fit_transform(y) # We clone the estimator to make sure that all the folds are # independent, and that it is pickle-able. parallel = Parallel(n_jobs=n_jobs, verbose=verbose, pre_dispatch=pre_dispatch) prediction_blocks = parallel(delayed(_my_fit_and_predict)( clone(estimator), X, y, train, test, verbose, fit_params, method) for train, test in cv.split(X, y, groups)) # Concatenate the predictions predictions = [pred_block_i for pred_block_i, _, _ in prediction_blocks] test_indices = np.concatenate([indices_i for _, indices_i, _ in prediction_blocks]) scores = np.concatenate([score_i for _, _, score_i in prediction_blocks]) if not _check_is_permutation(test_indices, _num_samples(X)): raise ValueError('cross_val_predict only works for partitions') inv_test_indices = np.empty(len(test_indices), dtype=int) inv_test_indices[test_indices] = np.arange(len(test_indices)) # Check for sparse predictions if sp.issparse(predictions[0]): predictions = sp.vstack(predictions, format=predictions[0].format) else: predictions = np.concatenate(predictions) return predictions[inv_test_indices], scores
def cross_val_score_filter_feature_selection(model,filter_function,filter_criteria, X, y, scoring=None, cv=None, n_jobs=1, verbose=0, fit_params=None, pre_dispatch='2*n_jobs'): X, y = indexable(X, y) cv = check_cv(cv, X, y, classifier=is_classifier(model)) scorer = check_scoring(model, scoring=scoring) # We clone the estimator to make sure that all the folds are # independent, and that it is pickle-able. parallel = Parallel(n_jobs=n_jobs, verbose=verbose, pre_dispatch=pre_dispatch) # scores = parallel(delayed(_fit_and_score)(clone(model), filter_function(X,y,train,filter_criteria), y, scorer, train, test, verbose, None, fit_params) for train, test in cv) return np.array(scores)[:, 0]
def split(self, X, y=None, groups=None): """ Generate indices to split data into training and test set. Parameters ---------- X : array-like, shape (n_samples, n_features) Training data, where n_samples is the number of samples and n_features is the number of features. y : array-like, shape (n_samples,) The target variable for supervised learning problems. groups : array-like, with shape (n_samples,), optional Group labels for the samples used while splitting the dataset into train/test set. Yields ------ train : ndarray The training set indices for that split. test : ndarray The testing set indices for that split. """ # check arguments X, y, groups = indexable(X, y, groups) # get the number of samples n_samples = X.shape[0] if self.n_splits > n_samples: raise ValueError( ("Cannot have number of splits n_splits={0} greater" " than the number of samples: n_samples={1}." ).format(self.n_splits, n_samples)) for train, test in super(KFoldWithinGroups, self).split(X, y, groups): yield train, test
def split(self, X, y=None, groups=None): """Generate indices to split data into training and test set. Parameters ---------- X : array-like, shape (n_samples, n_features) Training data, where n_samples is the number of samples and n_features is the number of features. y : array-like, shape (n_samples,) The target variable for supervised learning problems. groups : array-like, with shape (n_samples,), optional Group labels for the samples used while splitting the dataset into train/test set. Yields ------ train : ndarray The training set indices for that split. test : ndarray The testing set indices for that split. """ X, y, groups = indexable(X, y, groups) n_samples = X.shape[0] if self.n_splits > n_samples: raise ValueError( ("Cannot have number of splits n_splits={0} greater" " than the number of samples: n_samples={1}." ).format(self.n_splits, n_samples)) # generate test fold test_fold = np.arange(n_samples, dtype=int) % self.n_splits cv = PredefinedSplit(test_fold) return(cv.split())
def cross_val_multiscore(estimator, X, y=None, groups=None, scoring=None, cv=None, n_jobs=1, verbose=0, fit_params=None, pre_dispatch='2*n_jobs'): """Evaluate a score by cross-validation. Parameters ---------- estimator : instance of sklearn.base.BaseEstimator The object to use to fit the data. Must implement the 'fit' method. X : array-like, shape (n_samples, n_dimensional_features,) The data to fit. Can be, for example a list, or an array at least 2d. y : array-like, shape (n_samples, n_targets,) The target variable to try to predict in the case of supervised learning. groups : array-like, with shape (n_samples,) Group labels for the samples used while splitting the dataset into train/test set. scoring : str, callable | None A string (see model evaluation documentation) or a scorer callable object / function with signature ``scorer(estimator, X, y)``. Note that when using an estimator which inherently returns multidimensional output - in particular, SlidingEstimator or GeneralizingEstimator - you should set the scorer there, not here. cv : int, cross-validation generator | iterable Determines the cross-validation splitting strategy. Possible inputs for cv are: - None, to use the default 3-fold cross validation, - integer, to specify the number of folds in a ``(Stratified)KFold``, - An object to be used as a cross-validation generator. - An iterable yielding train, test splits. For integer/None inputs, if the estimator is a classifier and ``y`` is either binary or multiclass, :class:`sklearn.model_selection.StratifiedKFold` is used. In all other cases, :class:`sklearn.model_selection.KFold` is used. %(n_jobs)s verbose : int, optional The verbosity level. fit_params : dict, optional Parameters to pass to the fit method of the estimator. pre_dispatch : int, or str, optional Controls the number of jobs that get dispatched during parallel execution. Reducing this number can be useful to avoid an explosion of memory consumption when more jobs get dispatched than CPUs can process. This parameter can be: - None, in which case all the jobs are immediately created and spawned. Use this for lightweight and fast-running jobs, to avoid delays due to on-demand spawning of the jobs - An int, giving the exact number of total jobs that are spawned - A string, giving an expression as a function of n_jobs, as in '2*n_jobs' Returns ------- scores : array of float, shape (n_splits,) | shape (n_splits, n_scores) Array of scores of the estimator for each run of the cross validation. """ # This code is copied from sklearn from sklearn.base import clone from sklearn.utils import indexable from sklearn.model_selection._split import check_cv check_scoring = _get_check_scoring() X, y, groups = indexable(X, y, groups) cv = check_cv(cv, y, classifier=is_classifier(estimator)) cv_iter = list(cv.split(X, y, groups)) scorer = check_scoring(estimator, scoring=scoring) # We clone the estimator to make sure that all the folds are # independent, and that it is pickle-able. # Note: this parallelization is implemented using MNE Parallel parallel, p_func, n_jobs = parallel_func(_fit_and_score, n_jobs, pre_dispatch=pre_dispatch) scores = parallel( p_func(clone(estimator), X, y, scorer, train, test, verbose, None, fit_params) for train, test in cv_iter) return np.array(scores)[:, 0, ...] # flatten over joblib output.
def fit(self, X, y=None, groups=None, **fit_params): """Run fit with all sets of parameters. Parameters ---------- X : array-like, shape = [n_samples, n_features] Training vector, where n_samples is the number of samples and n_features is the number of features. y : array-like, shape = [n_samples] or [n_samples, n_output], optional Target relative to X for classification or regression; None for unsupervised learning. groups : array-like, with shape (n_samples,), optional Group labels for the samples used while splitting the dataset into train/test set. **fit_params : dict of string -> object Parameters passed to the ``fit`` method of the estimator """ if self.fit_params is not None: warnings.warn( '"fit_params" as a constructor argument was ' 'deprecated in version 0.19 and will be removed ' 'in version 0.21. Pass fit parameters to the ' '"fit" method instead.', DeprecationWarning) if fit_params: warnings.warn( 'Ignoring fit_params passed as a constructor ' 'argument in favor of keyword arguments to ' 'the "fit" method.', RuntimeWarning) else: fit_params = self.fit_params estimator = self.estimator cv = check_cv(self.cv, y, classifier=is_classifier(estimator)) scorers, self.multimetric_ = _check_multimetric_scoring( self.estimator, scoring=self.scoring) if self.multimetric_: if self.refit is not False and ( not isinstance(self.refit, six.string_types) or # This will work for both dict / list (tuple) self.refit not in scorers): raise ValueError("For multi-metric scoring, the parameter " "refit must be set to a scorer key " "to refit an estimator with the best " "parameter setting on the whole data and " "make the best_* attributes " "available for that metric. If this is not " "needed, refit should be set to False " "explicitly. %r was passed." % self.refit) else: refit_metric = self.refit else: refit_metric = 'score' X, y, groups = indexable(X, y, groups) n_splits = cv.get_n_splits(X, y, groups) # Regenerate parameter iterable for each fit candidate_params = list(self._get_param_iterator()) n_candidates = len(candidate_params) if self.verbose > 0: print("Fitting {0} folds for each of {1} candidates, totalling" " {2} fits".format(n_splits, n_candidates, n_candidates * n_splits)) base_estimator = clone(self.estimator) if self.spark is None: out = self._run_sklearn_fit(base_estimator, X, y, scorers, fit_params, candidate_params, cv, groups) else: out = self._run_skspark_fit(base_estimator, X, y, scorers, fit_params, candidate_params, cv, groups) # if one choose to see train score, "out" will contain train score info if self.return_train_score: (train_score_dicts, test_score_dicts, test_sample_counts, fit_time, score_time) = zip(*out) else: (test_score_dicts, test_sample_counts, fit_time, score_time) = zip(*out) # test_score_dicts and train_score dicts are lists of dictionaries and # we make them into dict of lists test_scores = _aggregate_score_dicts(test_score_dicts) if self.return_train_score: train_scores = _aggregate_score_dicts(train_score_dicts) # TODO: replace by a dict in 0.21 from sklearn.utils.deprecation import DeprecationDict results = (DeprecationDict() if self.return_train_score == 'warn' else {}) def _store(key_name, array, weights=None, splits=False, rank=False): """A small helper to store the scores/times to the cv_results_""" # When iterated first by splits, then by parameters # We want `array` to have `n_candidates` rows and `n_splits` cols. array = np.array(array, dtype=np.float64).reshape(n_candidates, n_splits) if splits: for split_i in range(n_splits): # Uses closure to alter the results results["split%d_%s" % (split_i, key_name)] = array[:, split_i] array_means = np.average(array, axis=1, weights=weights) results['mean_%s' % key_name] = array_means # Weighted std is not directly available in numpy array_stds = np.sqrt( np.average((array - array_means[:, np.newaxis])**2, axis=1, weights=weights)) results['std_%s' % key_name] = array_stds if rank: results["rank_%s" % key_name] = np.asarray(rankdata( -array_means, method='min'), dtype=np.int32) _store('fit_time', fit_time) _store('score_time', score_time) # Use one MaskedArray and mask all the places where the param is not # applicable for that candidate. Use defaultdict as each candidate may # not contain all the params param_results = defaultdict( partial(MaskedArray, np.empty(n_candidates, ), mask=True, dtype=object)) for cand_i, params in enumerate(candidate_params): for name, value in params.items(): # An all masked empty array gets created for the key # `"param_%s" % name` at the first occurence of `name`. # Setting the value at an index also unmasks that index param_results["param_%s" % name][cand_i] = value results.update(param_results) # Store a list of param dicts at the key 'params' results['params'] = candidate_params # NOTE test_sample counts (weights) remain the same for all candidates test_sample_counts = np.array(test_sample_counts[:n_splits], dtype=np.int) for scorer_name in scorers.keys(): # Computed the (weighted) mean and std for test scores alone _store('test_%s' % scorer_name, test_scores[scorer_name], splits=True, rank=True, weights=test_sample_counts if self.iid else None) if self.return_train_score: prev_keys = set(results.keys()) _store('train_%s' % scorer_name, train_scores[scorer_name], splits=True) if self.return_train_score == 'warn': for key in set(results.keys()) - prev_keys: message = ( 'You are accessing a training score ({!r}), ' 'which will not be available by default ' 'any more in 0.21. If you need training scores, ' 'please set return_train_score=True').format(key) # warn on key access results.add_warning(key, message, FutureWarning) # For multi-metric evaluation, store the best_index_, best_params_ and # best_score_ iff refit is one of the scorer names # In single metric evaluation, refit_metric is "score" if self.refit or not self.multimetric_: self.best_index_ = results["rank_test_%s" % refit_metric].argmin() self.best_params_ = candidate_params[self.best_index_] self.best_score_ = results["mean_test_%s" % refit_metric][self.best_index_] if self.refit: self.best_estimator_ = clone(base_estimator).set_params( **self.best_params_) if y is not None: self.best_estimator_.fit(X, y, **fit_params) else: self.best_estimator_.fit(X, **fit_params) # Store the only scorer not as a dict for single metric evaluation self.scorer_ = scorers if self.multimetric_ else scorers['score'] self.cv_results_ = results self.n_splits_ = n_splits return self
def one_class_learning_curve(estimator, X, y, groups=None, train_sizes=np.linspace(0.1, 1.0, 5), cv=None, scoring=None, n_jobs=1, pre_dispatch="all", verbose=0, shuffle=False, random_state=None): """One-class learning curve. Determines cross-validated training and test scores for different one-class training set sizes. This should help choosing the best downsampling ratio. A cross-validation generator splits the whole dataset k times in training and test data. Subsets of the training set with varying sizes will be used to train the estimator and a score for each training subset size and the test set will be computed. Afterwards, the scores will be averaged over all k runs for each training subset size. Read more in the :ref:`User Guide <learning_curve>`. Parameters ---------- estimator : object type that implements the "fit" and "predict" methods An object of that type which is cloned for each validation. X : array-like, shape (n_samples, n_features) Training vector, where n_samples is the number of samples and n_features is the number of features. y : array-like, shape (n_samples) or (n_samples, n_features), optional Target relative to X for classification or regression; None for unsupervised learning. groups : array-like, with shape (n_samples,), optional Group labels for the samples used while splitting the dataset into train/test set. train_sizes : array-like, shape (n_ticks,), dtype float or int Relative or absolute numbers of training examples that will be used to generate the learning curve. If the dtype is float, it is regarded as a fraction of the maximum size of the training set (that is determined by the selected validation method), i.e. it has to be within (0, 1]. Otherwise it is interpreted as absolute sizes of the training sets. Note that for classification the number of samples usually have to be big enough to contain at least one sample from each class. (default: np.linspace(0.1, 1.0, 5)) cv : int, cross-validation generator or an iterable, optional Determines the cross-validation splitting strategy. Possible inputs for cv are: - None, to use the default 3-fold cross validation, - integer, to specify the number of folds in a `(Stratified)KFold`, - An object to be used as a cross-validation generator. - An iterable yielding train, test splits. For integer/None inputs, if the estimator is a classifier and ``y`` is either binary or multiclass, :class:`StratifiedKFold` is used. In all other cases, :class:`KFold` is used. Refer :ref:`User Guide <cross_validation>` for the various cross-validation strategies that can be used here. scoring : string, callable or None, optional, default: None A string (see model evaluation documentation) or a scorer callable object / function with signature ``scorer(estimator, X, y)``. n_jobs : integer, optional Number of jobs to run in parallel (default 1). pre_dispatch : integer or string, optional Number of predispatched jobs for parallel execution (default is all). The option can reduce the allocated memory. The string can be an expression like '2*n_jobs'. verbose : integer, optional Controls the verbosity: the higher, the more messages. shuffle : boolean, optional Whether to shuffle training data before taking prefixes of it based on``train_sizes``. random_state : int, RandomState instance or None, optional (default=None) If int, random_state is the seed used by the random number generator; If RandomState instance, random_state is the random number generator; If None, the random number generator is the RandomState instance used by `np.random`. Used when ``shuffle`` == 'True'. ------- train_sizes_abs : array, shape = (n_unique_ticks,), dtype int Numbers of training examples that has been used to generate the learning curve. Note that the number of ticks might be less than n_ticks because duplicate entries will be removed. train_scores : array, shape (n_ticks, n_cv_folds) Scores on training sets. test_scores : array, shape (n_ticks, n_cv_folds) Scores on test set. Notes ----- See :ref:`examples/model_selection/plot_learning_curve.py <sphx_glr_auto_examples_model_selection_plot_learning_curve.py>` """ X, y, groups = indexable(X, y, groups) cv = check_cv(cv, y, classifier=is_classifier(estimator)) # Store it as list as we will be iterating over the list multiple times cv_iter = list(cv.split(X, y, groups)) scorer = check_scoring(estimator, scoring=scoring) n_max_training_samples = len(cv_iter[0][0]) # Because the lengths of folds can be significantly different, it is # not guaranteed that we use all of the available training data when we # use the first 'n_max_training_samples' samples. train_sizes_abs = _translate_train_sizes(train_sizes, n_max_training_samples) n_unique_ticks = train_sizes_abs.shape[0] if verbose > 0: print("[learning_curve] Training set sizes: " + str(train_sizes_abs)) parallel = Parallel(n_jobs=n_jobs, pre_dispatch=pre_dispatch, verbose=verbose) if shuffle: rng = check_random_state(random_state) cv_iter = ((rng.permutation(train), test) for train, test in cv_iter) one_class_sizes = list() train_test_proportions = [] for train, test in cv_iter: pos = train[y.iloc[train] == 1] for n_train_samples in train_sizes_abs: train_split = train[:n_train_samples] neg = train_split[y.iloc[train_split] == 0] selected = np.concatenate((pos, neg), axis=0) train_test_proportions.append((selected, test)) if len(one_class_sizes) < train_sizes_abs.shape[0]: one_class_sizes.append(neg.shape[0]) out = parallel( delayed(_fit_and_score)(clone(estimator), X, y, scorer, train, test, verbose, parameters=None, fit_params=None, return_train_score=True) for train, test in train_test_proportions) out = np.array(out) n_cv_folds = out.shape[0] // n_unique_ticks out = out.reshape(n_cv_folds, n_unique_ticks, 2) out = np.asarray(out).transpose((2, 1, 0)) return np.array(one_class_sizes), out[0], out[1]
def cross_val_decision_function(estimator, X, y=None, cv=None, n_jobs=1, verbose=0, fit_params=None, pre_dispatch='2*n_jobs'): """Generate cross-validated estimates for each input data point Read more in the :ref:`User Guide <cross_validation>`. Parameters ---------- estimator : estimator object implementing 'fit' and 'predict' The object to use to fit the data. X : array-like The data to fit. Can be, for example a list, or an array at least 2d. y : array-like, optional, default: None The target variable to try to predict in the case of supervised learning. cv : int, cross-validation generator or an iterable, optional Determines the cross-validation splitting strategy. Possible inputs for cv are: - None, to use the default 3-fold cross-validation, - integer, to specify the number of folds. - An object to be used as a cross-validation generator. - An iterable yielding train/test splits. For integer/None inputs, if ``y`` is binary or multiclass, :class:`StratifiedKFold` used. If the estimator is a classifier or if ``y`` is neither binary nor multiclass, :class:`KFold` is used. Refer :ref:`User Guide <cross_validation>` for the various cross-validation strategies that can be used here. n_jobs : integer, optional The number of CPUs to use to do the computation. -1 means 'all CPUs'. verbose : integer, optional The verbosity level. fit_params : dict, optional Parameters to pass to the fit method of the estimator. pre_dispatch : int, or string, optional Controls the number of jobs that get dispatched during parallel execution. Reducing this number can be useful to avoid an explosion of memory consumption when more jobs get dispatched than CPUs can process. This parameter can be: - None, in which case all the jobs are immediately created and spawned. Use this for lightweight and fast-running jobs, to avoid delays due to on-demand spawning of the jobs - An int, giving the exact number of total jobs that are spawned - A string, giving an expression as a function of n_jobs, as in '2*n_jobs' Returns ------- preds : ndarray This is the result of calling 'predict' """ X, y = indexable(X, y) cv = check_cv(cv, X, y, classifier=is_classifier(estimator)) # We clone the estimator to make sure that all the folds are # independent, and that it is pickle-able. parallel = Parallel(n_jobs=n_jobs, verbose=verbose, pre_dispatch=pre_dispatch) preds_blocks = parallel( delayed(_fit_and_predict)(clone(estimator), X, y, train, test, verbose, fit_params) for train, test in cv) preds = [p for p, _ in preds_blocks] locs = np.concatenate([loc for _, loc in preds_blocks]) if not _check_is_partition(locs, _num_samples(X)): raise ValueError('cross_val_predict only works for partitions') inv_locs = np.empty(len(locs), dtype=int) inv_locs[locs] = np.arange(len(locs)) # Check for sparse predictions if sp.issparse(preds[0]): preds = sp.vstack(preds, format=preds[0].format) else: preds = np.concatenate(preds) return preds[inv_locs]
def fit(self, X, y, **kwargs): """ Fit is the entry point for the visualizer. Given instances described by X and binary classes described in the target y, fit performs n trials by shuffling and splitting the dataset then computing the precision, recall, f1, and queue rate scores for each trial. The scores are aggregated by the quantiles expressed then drawn. Parameters ---------- X : ndarray or DataFrame of shape n x m A matrix of n instances with m features y : ndarray or Series of length n An array or series of target or class values. The target y must be a binary classification target. kwargs: dict keyword arguments passed to Scikit-Learn API. Returns ------- self : instance Returns the instance of the visualizer raises: YellowbrickValueError If the target y is not a binary classification target. """ # Check target before metrics raise crazy exceptions if type_of_target(y) != 'binary': raise YellowbrickValueError("multiclass format is not supported") # Make arrays indexable for cross validation X, y = indexable(X, y) # TODO: parallelize trials with joblib (using sklearn utility) # NOTE: parallelization with matplotlib is tricy at best! trials = [ metric for idx in range(self.n_trials) for metric in self._split_fit_score_trial(X, y, idx) ] # Compute maximum number of uniform thresholds across all trials n_thresholds = np.array([len(t['thresholds']) for t in trials]).min() self.thresholds_ = np.linspace(0.0, 1.0, num=n_thresholds) # Filter metrics and collect values for uniform thresholds metrics = frozenset(METRICS) - self._check_exclude(self.exclude) uniform_metrics = defaultdict(list) for trial in trials: rows = defaultdict(list) for t in self.thresholds_: idx = bisect.bisect_left(trial['thresholds'], t) for metric in metrics: rows[metric].append(trial[metric][idx]) for metric, row in rows.items(): uniform_metrics[metric].append(row) # Convert metrics to metric arrays uniform_metrics = { metric: np.array(values) for metric, values in uniform_metrics.items() } # Perform aggregation and store cv_scores_ quantiles = self._check_quantiles(self.quantiles) self.cv_scores_ = {} for metric, values in uniform_metrics.items(): # Compute the lower, median, and upper plots lower, median, upper = mstats.mquantiles(values, prob=quantiles, axis=0) # Store the aggregates in cv scores self.cv_scores_[metric] = median self.cv_scores_["{}_lower".format(metric)] = lower self.cv_scores_["{}_upper".format(metric)] = upper # Draw and always return self self.draw() return self
def fit(self, X, y, sample_weight=None): """Fit the calibrated model Parameters ---------- X : array-like, shape (n_samples, n_features) Training data. y : array-like, shape (n_samples,) Target values. sample_weight : array-like, shape = [n_samples] or None Sample weights. If None, then samples are equally weighted. Returns ------- self : object Returns an instance of self. """ X, y = check_X_y(X, y, accept_sparse=['csc', 'csr', 'coo'], force_all_finite=False) X, y = indexable(X, y) df = self._preproc(X) weights = None if self.platts_trick: # Bayesian priors (see Platt end of section 2.2) prior0 = float(np.sum(y <= 0)) prior1 = y.shape[0] - prior0 weights = np.zeros_like(y).astype(float) weights[y > 0] = (prior1 + 1.) / (prior1 + 2.) weights[y <= 0] = 1. / (prior0 + 2.) y = np.append(np.ones_like(y), np.zeros_like(y)) weights = np.append(weights, 1.0 - weights) df = np.append(df, df) if self.method is None: self.calibrator = _DummyCalibration() elif self.method == 'isotonic': self.calibrator = IsotonicRegression(out_of_bounds='clip') elif self.method == 'sksigmoid': self.calibrator = sk_sigmoid() elif self.method == 'sksigmoid_notrick': self.calibrator = sk_sigmoid_notrick() elif self.method == 'sigmoid': self.calibrator = _SigmoidCalibration() elif self.method == 'beta': self.calibrator = BetaCalibration(parameters="abm") elif self.method == 'beta_am': self.calibrator = BetaCalibration(parameters="am") elif self.method == 'beta_ab': self.calibrator = BetaCalibration(parameters="ab") elif self.method == 'beta_test_strict': self.calibrator = BetaCalibration(parameters="abm") elif self.method == 'beta_test_relaxed': self.calibrator = BetaCalibration(parameters="abm") elif self.method == 'beta_test': self.calibrator = _BetaTestedCalibration() else: raise ValueError('method should be None, "sigmoid", ' '"isotonic", "beta", "beta_am" or "beta_ab". ' 'Got %s.' % self.method) self.calibrator.fit(df, y, weights) if self.method == 'beta': df_pos = df[y == 1] df_neg = df[y == 0] # alpha_pos_nll, beta_pos_nll = fit_beta_nll(df_pos) # alpha_neg_nll, beta_neg_nll = fit_beta_nll(df_neg) # # a_nll = alpha_pos_nll - alpha_neg_nll # b_nll = beta_neg_nll - beta_pos_nll # m_nll = fit_beta_midpoint(alpha_pos_nll, beta_pos_nll, # alpha_neg_nll, beta_neg_nll) alpha_pos_mmt, beta_pos_mmt = fit_beta_moments(df_pos) alpha_neg_mmt, beta_neg_mmt = fit_beta_moments(df_neg) a_mmt = alpha_pos_mmt - alpha_neg_mmt if a_mmt < 0 or np.isnan(a_mmt): a_mmt = 0 b_mmt = beta_neg_mmt - beta_pos_mmt if b_mmt < 0 or np.isnan(b_mmt): b_mmt = 0 prior_pos = len(df_pos) / len(df) prior_neg = len(df_neg) / len(df) m_mmt = fit_beta_midpoint(prior_pos, alpha_pos_mmt, beta_pos_mmt, prior_neg, alpha_neg_mmt, beta_neg_mmt) map = self.calibrator.calibrator_.map_ # if a_mmt > 4 and map[0] < 2: # print [a_mmt, map[0]] # print [b_mmt, map[1]] # print [m_mmt, map[2]] # exit() # if b_mmt > 4 and map[1] < 2: # print [a_mmt, map[0]] # print [b_mmt, map[1]] # print [m_mmt, map[2]] # exit() self.a = [a_mmt, map[0]] self.b = [b_mmt, map[1]] self.m = [m_mmt, map[2]] self.df_pos = df_pos self.df_neg = df_neg return self
def split(self, X, y=None, groups=None): """Generate indices to split data into training and test set. Parameters ---------- X : array-like, shape (n_samples, n_features) Training data, where n_samples is the number of samples and n_features is the number of features. y : array-like, shape (n_samples,) Always ignored, exists for compatibility. groups : array-like, with shape (n_samples,) Always ignored, exists for compatibility. Yields ------ train : ndarray The training set indices for that split. test : ndarray The testing set indices for that split. """ X, y, groups = indexable(X, y, groups) n_samples = _num_samples(X) n_splits = self.n_splits n_folds = n_splits + 1 gap_size = self.gap_size rollback_size = self.rollback_size if self.test_size is not None: test_size = self.test_size else: test_size = n_samples // n_folds # Make sure we have enough samples for the given split parameters if n_folds > n_samples: raise ValueError((f"Cannot have number of folds={n_folds} greater" f" than the number of samples={n_samples}.")) if rollback_size >= test_size: raise ValueError((f"test_size={test_size} should be strictly " f"larger than rollback_size={rollback_size}")) first_test = n_samples - (test_size - rollback_size) * n_splits first_test -= rollback_size if first_test < 0: raise ValueError( (f"Too many splits={n_splits} for number of samples" f"={n_samples} with test_size={test_size} and " f"rollback_size ={rollback_size}.")) indices = np.arange(n_samples) test_starts = range(first_test, n_samples, test_size - rollback_size) test_starts = test_starts[0:n_splits] for test_start in test_starts: train_end = test_start - gap_size if self.max_train_size and self.max_train_size < train_end: yield (indices[train_end - self.max_train_size:train_end], indices[test_start:test_start + test_size]) else: yield (indices[:max(train_end, 0)], indices[test_start:test_start + test_size])
def cross_val_predict_proba(estimator, X, y=None, cv=None, n_jobs=1, verbose=0, fit_params=None, pre_dispatch='2*n_jobs'): """Generate cross-validated estimates for each input data point Parameters ---------- estimator : estimator object implementing 'fit' and 'predict' The object to use to fit the data. X : array-like The data to fit. Can be, for example a list, or an array at least 2d. y : array-like, optional, default: None The target variable to try to predict in the case of supervised learning. cv : cross-validation generator or int, optional, default: None A cross-validation generator to use. If int, determines the number of folds in StratifiedKFold if y is binary or multiclass and estimator is a classifier, or the number of folds in KFold otherwise. If None, it is equivalent to cv=3. This generator must include all elements in the test set exactly once. Otherwise, a ValueError is raised. n_jobs : integer, optional The number of CPUs to use to do the computation. -1 means 'all CPUs'. verbose : integer, optional The verbosity level. fit_params : dict, optional Parameters to pass to the fit method of the estimator. pre_dispatch : int, or string, optional Controls the number of jobs that get dispatched during parallel execution. Reducing this number can be useful to avoid an explosion of memory consumption when more jobs get dispatched than CPUs can process. This parameter can be: - None, in which case all the jobs are immediately created and spawned. Use this for lightweight and fast-running jobs, to avoid delays due to on-demand spawning of the jobs - An int, giving the exact number of total jobs that are spawned - A string, giving an expression as a function of n_jobs, as in '2*n_jobs' Returns ------- probs : ndarray This is the result of calling 'predict_proba' """ X, y = indexable(X, y) cv = _check_cv(cv, X, y, classifier=is_classifier(estimator)) # We clone the estimator to make sure that all the folds are # independent, and that it is pickle-able. parallel = Parallel(n_jobs=n_jobs, verbose=verbose, pre_dispatch=pre_dispatch) probs_blocks = parallel(delayed(_fit_and_predict_proba)(clone(estimator), X, y, train, test, verbose, fit_params) for train, test in cv) p = np.concatenate([p for p, _ in probs_blocks]) locs = np.concatenate([loc for _, loc in probs_blocks]) if not _check_is_partition(locs, X.shape[0]): raise ValueError('cross_val_predict_proba only works for partitions') probs = p.copy() probs[locs] = p return probs
def train_test_split(*arrays, **options): """Extend sklearn.model_selection.train_test_slit to have group split. Parameters ---------- *arrays : sequence of indexables with same length / shape[0] Allowed inputs are lists, numpy arrays, scipy-sparse matrices or pandas dataframes. test_size : float, int or None, optional (default=None) If float, should be between 0.0 and 1.0 and represent the proportion of the dataset to include in the test split. If int, represents the absolute number of test samples. If None, the value is set to the complement of the train size. If ``train_size`` is also None, it will be set to 0.25. train_size : float, int, or None, (default=None) If float, should be between 0.0 and 1.0 and represent the proportion of the dataset to include in the train split. If int, represents the absolute number of train samples. If None, the value is automatically set to the complement of the test size. random_state : int, RandomState instance or None, optional (default=None) If int, random_state is the seed used by the random number generator; If RandomState instance, random_state is the random number generator; If None, the random number generator is the RandomState instance used by `np.random`. shuffle : None or str (default='simple') How to shuffle the data before splitting. None, no shuffle. For str, one of 'simple', 'stratified' and 'group', corresponding to `ShuffleSplit`, `StratifiedShuffleSplit` and `GroupShuffleSplit`, respectively. labels : array-like or None (default=None) Ignored if shuffle is None or 'simple'. When shuffle='stratified', this array is used as class labels. When shuffle='group', this array is used as groups. Returns ------- splitting : list, length=2 * len(arrays) List containing train-test split of inputs. """ n_arrays = len(arrays) if n_arrays == 0: raise ValueError("At least one array required as input") test_size = options.pop('test_size', None) train_size = options.pop('train_size', None) random_state = options.pop('random_state', None) shuffle = options.pop('shuffle', 'simple') labels = options.pop('labels', None) if options: raise TypeError("Invalid parameters passed: %s" % str(options)) arrays = indexable(*arrays) n_samples = _num_samples(arrays[0]) if shuffle == 'group': if labels is None: raise ValueError("When shuffle='group', " "labels should not be None!") labels = check_array(labels, ensure_2d=False, dtype=None) uniques = np.unique(labels) n_samples = uniques.size n_train, n_test = _validate_shuffle_split(n_samples, test_size, train_size, default_test_size=0.25) shuffle_options = dict(test_size=n_test, train_size=n_train, random_state=random_state) if shuffle is None: if labels is not None: warnings.warn("The `labels` is ignored for " "shuffle being None!") train = np.arange(n_train) test = np.arange(n_train, n_train + n_test) elif shuffle == 'simple': if labels is not None: warnings.warn("The `labels` is not needed and therefore " "ignored for ShuffleSplit, as shuffle='simple'!") cv = ShuffleSplit(**shuffle_options) train, test = next(cv.split(X=arrays[0], y=None)) elif shuffle == 'stratified': cv = StratifiedShuffleSplit(**shuffle_options) train, test = next(cv.split(X=arrays[0], y=labels)) elif shuffle == 'group': cv = GroupShuffleSplit(**shuffle_options) train, test = next(cv.split(X=arrays[0], y=None, groups=labels)) else: raise ValueError("The argument `shuffle` only supports None, " "'simple', 'stratified' and 'group', but got `%s`!" % shuffle) return list(chain.from_iterable((safe_indexing(a, train), safe_indexing(a, test)) for a in arrays))
def fit(self, X, y, sample_weight=None): """Fit the calibrated model Parameters ---------- X : array-like, shape (n_samples, n_features) Training data. y : array-like, shape (n_samples,) Target values. sample_weight : array-like of shape (n_samples,), default=None Sample weights. If None, then samples are equally weighted. Returns ------- self : object Returns an instance of self. """ X, y = indexable(X, y) le = LabelBinarizer().fit(y) self.classes_ = le.classes_ # Check that each cross-validation fold can have at least one # example per class n_folds = self.cv if isinstance(self.cv, int) \ else self.cv.n_folds if hasattr(self.cv, "n_folds") else None if n_folds and \ np.any([np.sum(y == class_) < n_folds for class_ in self.classes_]): raise ValueError("Requesting %d-fold cross-validation but provided" " less than %d examples for at least one class." % (n_folds, n_folds)) self.calibrated_classifiers_ = [] if self.base_estimator is None: # we want all classifiers that don't expose a random_state # to be deterministic (and we don't want to expose this one). base_estimator = LinearSVC(random_state=0) else: base_estimator = self.base_estimator if self.cv == "prefit": calibrated_classifier = _CalibratedClassifier( base_estimator, method=self.method) calibrated_classifier.fit(X, y, sample_weight) self.calibrated_classifiers_.append(calibrated_classifier) else: cv = check_cv(self.cv, y, classifier=True) fit_parameters = signature(base_estimator.fit).parameters base_estimator_supports_sw = "sample_weight" in fit_parameters if sample_weight is not None: sample_weight = _check_sample_weight(sample_weight, X) if not base_estimator_supports_sw: estimator_name = type(base_estimator).__name__ warnings.warn("Since %s does not support sample_weights, " "sample weights will only be used for the " "calibration itself." % estimator_name) if self.ensemble: for train, test in cv.split(X, y): this_estimator = clone(base_estimator) if sample_weight is not None and \ base_estimator_supports_sw: this_estimator.fit(X[train], y[train], sample_weight=sample_weight[train]) else: this_estimator.fit(X[train], y[train]) calibrated_classifier = _CalibratedClassifier( this_estimator, method=self.method, classes=self.classes_) sw = None if sample_weight is None else sample_weight[test] calibrated_classifier.fit(X[test], y[test], sample_weight=sw) self.calibrated_classifiers_.append(calibrated_classifier) else: if hasattr(base_estimator, "decision_function"): base_estimator_method = "decision_function" elif hasattr(base_estimator, "predict_proba"): base_estimator_method = "predict_proba" else: raise RuntimeError('classifier has no decision_function ' 'or predict_proba method.') predictions = cross_val_predict(base_estimator, X, y, cv=cv, method=base_estimator_method) this_estimator = clone(base_estimator) if sample_weight is not None and base_estimator_supports_sw: this_estimator.\ fit(X, y, sample_weight=sample_weight) else: this_estimator.fit(X, y) calibrated_classifier = \ _CalibratedClassifier(this_estimator, method=self.method, classes=self.classes_, predictions_in_X=True) if hasattr(this_estimator, "decision_function"): if predictions.ndim == 1: predictions = predictions[:, np.newaxis] elif hasattr(this_estimator, "predict_proba"): if len(self.classes_) == 2: predictions = predictions[:, 1:] calibrated_classifier.fit(predictions, y, sample_weight) self.calibrated_classifiers_.append(calibrated_classifier) return self
def cross_validate_checkpoint( estimator, X, y=None, *, groups=None, scoring=None, cv=None, n_jobs=None, verbose=0, fit_params=None, pre_dispatch="2*n_jobs", return_train_score=False, return_estimator=False, error_score=np.nan, workdir=None, checkpoint=True, force_refresh=False, serialize_cv=False, ): """Evaluate metric(s) by cross-validation and also record fit/score times. This is a copy of :func:`sklearn:sklearn.model_selection.cross_validate` that uses :func:`_fit_and_score_ckpt` to checkpoint scores and estimators for each CV split. Read more in the :ref:`sklearn user guide <sklearn:multimetric_cross_validation>`. Parameters ---------- estimator : estimator object implementing 'fit' The object to use to fit the data. X : array-like of shape (n_samples, n_features) The data to fit. Can be for example a list, or an array. y : array-like of shape (n_samples,) or (n_samples, n_outputs), default=None The target variable to try to predict in the case of supervised learning. groups : array-like of shape (n_samples,), default=None Group labels for the samples used while splitting the dataset into train/test set. Only used in conjunction with a "Group" :term:`cv` instance (e.g., :class:`sklearn:GroupKFold`). scoring : str, callable, list/tuple, or dict, default=None A single str (see :ref:`sklearn:scoring_parameter`) or a callable (see :ref:`sklearn:scoring`) to evaluate the predictions on the test set. For evaluating multiple metrics, either give a list of (unique) strings or a dict with names as keys and callables as values. NOTE that when using custom scorers, each scorer should return a single value. Metric functions returning a list/array of values can be wrapped into multiple scorers that return one value each. See :ref:`sklearn:multimetric_grid_search` for an example. If None, the estimator's score method is used. cv : int, cross-validation generator or an iterable, default=None Determines the cross-validation splitting strategy. Possible inputs for cv are: - None, to use the default 5-fold cross validation, - int, to specify the number of folds in a `(Stratified)KFold`, - an sklearn `CV splitter <https://scikit-learn.org/stable/glossary.html#term-cv-splitter>`_, - An iterable yielding (train, test) splits as arrays of indices. For int/None inputs, if the estimator is a classifier and ``y`` is either binary or multiclass, :class:`sklearn.model_selection.StratifiedKFold` is used. In all other cases, :class:`sklearn.model_selection.KFold` is used. Refer :ref:`sklearn user guide <sklearn:cross_validation>` for the various cross-validation strategies that can be used here. n_jobs : int, default=None The number of CPUs to use to do the computation. ``None`` means 1 unless in a :obj:`joblib:joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`sklearn Glossary <sklearn:n_jobs>` for more details. verbose : int, default=0 The verbosity level. fit_params : dict, default=None Parameters to pass to the fit method of the estimator. pre_dispatch : int or str, default='2*n_jobs' Controls the number of jobs that get dispatched during parallel execution. Reducing this number can be useful to avoid an explosion of memory consumption when more jobs get dispatched than CPUs can process. This parameter can be: - None, in which case all the jobs are immediately created and spawned. Use this for lightweight and fast-running jobs, to avoid delays due to on-demand spawning of the jobs - An int, giving the exact number of total jobs that are spawned - A str, giving an expression as a function of n_jobs, as in '2*n_jobs' return_train_score : bool, default=False Whether to include train scores. Computing training scores is used to get insights on how different parameter settings impact the overfitting/underfitting trade-off. However computing the scores on the training set can be computationally expensive and is not strictly required to select the parameters that yield the best generalization performance. return_estimator : bool, default=False Whether to return the estimators fitted on each split. error_score : 'raise' or numeric Value to assign to the score if an error occurs in estimator fitting. If set to 'raise', the error is raised. If a numeric value is given, FitFailedWarning is raised. This parameter does not affect the refit step, which will always raise the error. workdir : path-like object, default=None A string or :term:`python:path-like-object` indicating the directory in which to store checkpoint files checkpoint : bool, default=True If True, checkpoint the parameters, estimators, and scores. force_refresh : bool, default=False If True, recompute scores even if the checkpoint file already exists. Otherwise, load scores from checkpoint files and return. serialize_cv : bool, default=False If True, do not use joblib.Parallel to evaluate each CV split. Returns ------- scores : dict of float arrays of shape (n_splits,) Array of scores of the estimator for each run of the cross validation. A dict of arrays containing the score/time arrays for each scorer is returned. The possible keys for this ``dict`` are: ``test_score`` The score array for test scores on each cv split. Suffix ``_score`` in ``test_score`` changes to a specific metric like ``test_r2`` or ``test_auc`` if there are multiple scoring metrics in the scoring parameter. ``train_score`` The score array for train scores on each cv split. Suffix ``_score`` in ``train_score`` changes to a specific metric like ``train_r2`` or ``train_auc`` if there are multiple scoring metrics in the scoring parameter. This is available only if ``return_train_score`` parameter is ``True``. ``fit_time`` The time for fitting the estimator on the train set for each cv split. ``score_time`` The time for scoring the estimator on the test set for each cv split. (Note time for scoring on the train set is not included even if ``return_train_score`` is set to ``True`` ``estimator`` The estimator objects for each cv split. This is available only if ``return_estimator`` parameter is set to ``True``. Examples -------- >>> import shutil >>> import tempfile >>> from sklearn import datasets, linear_model >>> from afqinsight import cross_validate_checkpoint >>> from sklearn.pipeline import make_pipeline >>> from sklearn.preprocessing import StandardScaler >>> diabetes = datasets.load_diabetes() >>> X = diabetes.data[:150] >>> y = diabetes.target[:150] >>> lasso = linear_model.Lasso() Single metric evaluation using ``cross_validate`` >>> cv_results = cross_validate_checkpoint(lasso, X, y, cv=3, checkpoint=False) >>> sorted(cv_results.keys()) ['fit_time', 'score_time', 'test_score'] >>> cv_results['test_score'] array([0.33150734, 0.08022311, 0.03531764]) Multiple metric evaluation using ``cross_validate``, an estimator pipeline, and checkpointing (please refer the ``scoring`` parameter doc for more information) >>> tempdir = tempfile.mkdtemp() >>> scaler = StandardScaler() >>> pipeline = make_pipeline(scaler, lasso) >>> scores = cross_validate_checkpoint(pipeline, X, y, cv=3, ... scoring=('r2', 'neg_mean_squared_error'), ... return_train_score=True, checkpoint=True, ... workdir=tempdir, return_estimator=True) >>> shutil.rmtree(tempdir) >>> print(scores['test_neg_mean_squared_error']) [-2479.2... -3281.2... -3466.7...] >>> print(scores['train_r2']) [0.507... 0.602... 0.478...] See Also -------- sklearn.model_selection.cross_val_score: Run cross-validation for single metric evaluation. sklearn.model_selection.cross_val_predict: Get predictions from each split of cross-validation for diagnostic purposes. sklearn.metrics.make_scorer: Make a scorer from a performance metric or loss function. """ X, y, groups = indexable(X, y, groups) cv = check_cv(cv, y, classifier=is_classifier(estimator)) scorers, _ = _check_multimetric_scoring(estimator, scoring=scoring) # We clone the estimator to make sure that all the folds are # independent, and that it is pickle-able. if serialize_cv: scores = [ _fit_and_score_ckpt( workdir=workdir, checkpoint=checkpoint, force_refresh=force_refresh, estimator=clone(estimator), X=X, y=y, scorer=scorers, train=train, test=test, verbose=verbose, parameters=None, fit_params=fit_params, return_train_score=return_train_score, return_times=True, return_estimator=return_estimator, error_score=error_score, ) for train, test in cv.split(X, y, groups) ] else: parallel = Parallel(n_jobs=n_jobs, verbose=verbose, pre_dispatch=pre_dispatch) scores = parallel( delayed(_fit_and_score_ckpt)( workdir=workdir, checkpoint=checkpoint, force_refresh=force_refresh, estimator=clone(estimator), X=X, y=y, scorer=scorers, train=train, test=test, verbose=verbose, parameters=None, fit_params=fit_params, return_train_score=return_train_score, return_times=True, return_estimator=return_estimator, error_score=error_score, ) for train, test in cv.split(X, y, groups)) zipped_scores = list(zip(*scores)) if return_train_score: train_scores = zipped_scores.pop(0) train_scores = _aggregate_score_dicts(train_scores) if return_estimator: fitted_estimators = zipped_scores.pop() test_scores, fit_times, score_times = zipped_scores test_scores = _aggregate_score_dicts(test_scores) ret = {} ret["fit_time"] = np.array(fit_times) ret["score_time"] = np.array(score_times) if return_estimator: ret["estimator"] = fitted_estimators for name in scorers: ret["test_%s" % name] = np.array(test_scores[name]) if return_train_score: key = "train_%s" % name ret[key] = np.array(train_scores[name]) return ret
def fit(self, X, y, **kwargs): """ Fit is the entry point for the visualizer. Given instances described by X and binary classes described in the target y, fit performs n trials by shuffling and splitting the dataset then computing the precision, recall, f1, and queue rate scores for each trial. The scores are aggregated by the quantiles expressed then drawn. Parameters ---------- X : ndarray or DataFrame of shape n x m A matrix of n instances with m features y : ndarray or Series of length n An array or series of target or class values. The target y must be a binary classification target. kwargs: dict keyword arguments passed to Scikit-Learn API. Returns ------- self : instance Returns the instance of the visualizer raises: YellowbrickValueError If the target y is not a binary classification target. """ # Check target before metrics raise crazy exceptions if type_of_target(y) != 'binary': raise YellowbrickValueError("multiclass format is not supported") # Make arrays indexable for cross validation X, y = indexable(X, y) # TODO: parallelize trials with joblib (using sklearn utility) # NOTE: parallelization with matplotlib is tricy at best! trials = [ metric for idx in range(self.n_trials) for metric in self._split_fit_score_trial(X, y, idx) ] # Compute maximum number of uniform thresholds across all trials n_thresholds = np.array([len(t['thresholds']) for t in trials]).min() self.thresholds_ = np.linspace(0.0, 1.0, num=n_thresholds) # Filter metrics and collect values for uniform thresholds metrics = frozenset(METRICS) - self._check_exclude(self.exclude) uniform_metrics = defaultdict(list) for trial in trials: rows = defaultdict(list) for t in self.thresholds_: idx = bisect.bisect_left(trial['thresholds'], t) for metric in metrics: rows[metric].append(trial[metric][idx]) for metric, row in rows.items(): uniform_metrics[metric].append(row) # Convert metrics to metric arrays uniform_metrics = { metric: np.array(values) for metric, values in uniform_metrics.items() } # Perform aggregation and store cv_scores_ quantiles = self._check_quantiles(self.quantiles) self.cv_scores_ = {} for metric, values in uniform_metrics.items(): # Compute the lower, median, and upper plots lower, median, upper = mstats.mquantiles( values, prob=quantiles, axis=0 ) # Store the aggregates in cv scores self.cv_scores_[metric] = median self.cv_scores_["{}_lower".format(metric)] = lower self.cv_scores_["{}_upper".format(metric)] = upper # Draw and always return self self.draw() return self
def _cross_val_predict(estimator, X, y=None, *, groups=None, cv=None, n_jobs=None, verbose=0, fit_params=None, pre_dispatch='2*n_jobs', method='predict', safe=True): """This is a fork from :meth:`~sklearn.model_selection.cross_val_predict` to allow for non-safe cloning of the models for each fold. Parameters ---------- estimator : estimator object implementing 'fit' and 'predict' The object to use to fit the data. X : array-like of shape (n_samples, n_features) The data to fit. Can be, for example a list, or an array at least 2d. y : array-like of shape (n_samples,) or (n_samples, n_outputs), \ default=None The target variable to try to predict in the case of supervised learning. groups : array-like of shape (n_samples,), default=None Group labels for the samples used while splitting the dataset into train/test set. Only used in conjunction with a "Group" :term:`cv` instance (e.g., :class:`GroupKFold`). cv : int, cross-validation generator or an iterable, default=None Determines the cross-validation splitting strategy. Possible inputs for cv are: - None, to use the default 5-fold cross validation, - int, to specify the number of folds in a `(Stratified)KFold`, - CV splitter, - An iterable yielding (train, test) splits as arrays of indices. For int/None inputs, if the estimator is a classifier and ``y`` is either binary or multiclass, :class:`StratifiedKFold` is used. In all other cases, :class:`KFold` is used. Refer :ref:`User Guide <cross_validation>` for the various cross-validation strategies that can be used here. .. versionchanged:: 0.22 ``cv`` default value if None changed from 3-fold to 5-fold. n_jobs : int, default=None The number of CPUs to use to do the computation. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary <n_jobs>` for more details. verbose : int, default=0 The verbosity level. fit_params : dict, defualt=None Parameters to pass to the fit method of the estimator. pre_dispatch : int or str, default='2*n_jobs' Controls the number of jobs that get dispatched during parallel execution. Reducing this number can be useful to avoid an explosion of memory consumption when more jobs get dispatched than CPUs can process. This parameter can be: - None, in which case all the jobs are immediately created and spawned. Use this for lightweight and fast-running jobs, to avoid delays due to on-demand spawning of the jobs - An int, giving the exact number of total jobs that are spawned - A str, giving an expression as a function of n_jobs, as in '2*n_jobs' method : str, default='predict' Invokes the passed method name of the passed estimator. For method='predict_proba', the columns correspond to the classes in sorted order. safe : bool, default=True Whether to clone with safe option. Returns ------- predictions : ndarray This is the result of calling ``method`` """ X, y, groups = indexable(X, y, groups) cv = check_cv(cv, y, classifier=is_classifier(estimator)) splits = list(cv.split(X, y, groups)) test_indices = np.concatenate([test for _, test in splits]) if not _check_is_permutation(test_indices, _num_samples(X)): raise ValueError('cross_val_predict only works for partitions') # If classification methods produce multiple columns of output, # we need to manually encode classes to ensure consistent column ordering. encode = method in [ 'decision_function', 'predict_proba', 'predict_log_proba' ] and y is not None if encode: y = np.asarray(y) if y.ndim == 1: le = LabelEncoder() y = le.fit_transform(y) elif y.ndim == 2: y_enc = np.zeros_like(y, dtype=int) for i_label in range(y.shape[1]): y_enc[:, i_label] = LabelEncoder().fit_transform(y[:, i_label]) y = y_enc # We clone the estimator to make sure that all the folds are # independent, and that it is pickle-able. parallel = Parallel(n_jobs=n_jobs, verbose=verbose, pre_dispatch=pre_dispatch) predictions = parallel( delayed(_fit_and_predict)(clone(estimator, safe=safe), X, y, train, test, verbose, fit_params, method) for train, test in splits) from pkg_resources import parse_version if parse_version(sklearn.__version__) < parse_version("0.24.0"): # Prior to 0.24.0, this private scikit-learn method returned a tuple of two values predictions = [p[0] for p in predictions] inv_test_indices = np.empty(len(test_indices), dtype=int) inv_test_indices[test_indices] = np.arange(len(test_indices)) if sp.issparse(predictions[0]): predictions = sp.vstack(predictions, format=predictions[0].format) elif encode and isinstance(predictions[0], list): # `predictions` is a list of method outputs from each fold. # If each of those is also a list, then treat this as a # multioutput-multiclass task. We need to separately concatenate # the method outputs for each label into an `n_labels` long list. n_labels = y.shape[1] concat_pred = [] for i_label in range(n_labels): label_preds = np.concatenate([p[i_label] for p in predictions]) concat_pred.append(label_preds) predictions = concat_pred else: predictions = np.concatenate(predictions) if isinstance(predictions, list): return [p[inv_test_indices] for p in predictions] else: return predictions[inv_test_indices]
def fit(self, X, y, sample_weight=None): """Fit the calibrated model Parameters ---------- X : array-like, shape (n_samples, n_features) Training data. y : array-like, shape (n_samples,) Target values. sample_weight : array-like, shape = [n_samples] or None Sample weights. If None, then samples are equally weighted. Returns ------- self : object Returns an instance of self. """ # X, y = check_X_y(X, y, accept_sparse=['csc', 'csr', 'coo'], # force_all_finite=False) X, y = indexable(X, y) lb = LabelBinarizer().fit(y) self.classes_ = lb.classes_ # Check that each cross-validation fold can have at least one # example per class n_folds = (self.cv if isinstance(self.cv, int) else self.cv.n_folds if hasattr(self.cv, 'n_folds') else None) if n_folds and np.any( [np.sum(y == class_) < n_folds for class_ in self.classes_]): raise ValueError('Requesting %d-fold cross-validation but provided' ' less than %d examples for at least one class.' % (n_folds, n_folds)) self.calibrated_classifiers_ = [] if self.base_estimator is None: # we want all classifiers that don't expose a random_state # to be deterministic (and we don't want to expose this one). base_estimator = LinearSVC(random_state=0) else: base_estimator = self.base_estimator if self.cv == 'prefit': calibrated_classifier = _CalibratedClassifier( base_estimator, method=self.method, score_type=self.score_type) if sample_weight is not None: calibrated_classifier.fit(X, y, sample_weight) else: calibrated_classifier.fit(X, y) self.calibrated_classifiers_.append(calibrated_classifier) else: cv = check_cv(self.cv, X, y, classifier=True) fit_parameters = signature(base_estimator.fit).parameters estimator_name = type(base_estimator).__name__ if (sample_weight is not None and 'sample_weight' not in fit_parameters): warnings.warn('%s does not support sample_weight. Samples' ' weights are only used for the calibration' ' itself.' % estimator_name) base_estimator_sample_weight = None else: base_estimator_sample_weight = sample_weight for train, test in cv: this_estimator = clone(base_estimator) if base_estimator_sample_weight is not None: this_estimator.fit( X[train], y[train], sample_weight=base_estimator_sample_weight[train], ) else: this_estimator.fit(X[train], y[train]) calibrated_classifier = _CalibratedClassifier( this_estimator, method=self.method, score_type=self.score_type, ) if sample_weight is not None: calibrated_classifier.fit(X[test], y[test], sample_weight[test]) else: calibrated_classifier.fit(X[test], y[test]) self.calibrated_classifiers_.append(calibrated_classifier) return self
def cross_val_predict(estimator, y, X=None, cv=None, verbose=0, averaging="mean", **kwargs): # TODO: remove kwargs """Generate cross-validated estimates for each input data point Parameters ---------- estimator : estimator An estimator object that implements the ``fit`` method y : array-like or iterable, shape=(n_samples,) The time-series array. X : array-like, shape=[n_obs, n_vars], optional (default=None) An optional 2-d array of exogenous variables. cv : BaseTSCrossValidator or None, optional (default=None) An instance of cross-validation. If None, will use a RollingForecastCV. Note that for cross-validation predictions, the CV step cannot exceed the CV horizon, or there will be a gap between fold predictions. verbose : integer, optional The verbosity level. averaging : str or callable, one of ["median", "mean"] (default="mean") Unlike normal CV, time series CV might have different folds (windows) forecasting the same time step. After all forecast windows are made, we build a matrix of y x n_folds, populating each fold's forecasts like so:: nan nan nan # training samples nan nan nan nan nan nan nan nan nan 1 nan nan # test samples 4 3 nan 3 2.5 3.5 nan 6 5 nan nan 4 We then average each time step's forecasts to end up with our final prediction results. Examples -------- >>> import pmdarima as pm >>> from pmdarima.model_selection import cross_val_predict,\ ... RollingForecastCV >>> y = pm.datasets.load_wineind() >>> cv = RollingForecastCV(h=14, step=12) >>> preds = cross_val_predict( ... pm.ARIMA((1, 1, 2), seasonal_order=(0, 1, 1, 12)), y, cv=cv) >>> preds[:5] array([30710.45743168, 34902.94929722, 17994.16587163, 22127.71167249, 25473.60876435]) """ # Temporary shim until we remove `exogenous` support completely X, _ = pm_compat.get_X(X, **kwargs) y, X = indexable(y, X) y = check_endog(y, copy=False) cv = check_cv(cv) avgfunc = _check_averaging(averaging) # need to be careful here: # >>> cv = RollingForecastCV(step=6, h=4) # >>> cv_generator = cv.split(wineind) # >>> next(cv_generator) # (array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, # 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, # 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, # 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57]), # array([58, 59, 60, 61])) # >>> next(cv_generator) # (array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, # 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, # 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, # 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, # 60, 61, 62, 63]), # array([64, 65, 66, 67])) <~~ 64 vs. 61 if cv.step > cv.horizon: raise ValueError("CV step cannot be > CV horizon, or there will be a " "gap in predictions between folds") # clone estimator to make sure all folds are independent prediction_blocks = [ _fit_and_predict(fold, base.clone(estimator), y, X, train=train, test=test, verbose=verbose,) # TODO: fit params? for fold, (train, test) in enumerate(cv.split(y, X))] # Unlike normal CV, time series CV might have different folds (windows) # forecasting the same time step. In this stage, we build a matrix of # y x n_folds, populating each fold's forecasts like so: pred_matrix = np.ones((y.shape[0], len(prediction_blocks))) * np.nan for i, (pred_block, test_indices) in enumerate(prediction_blocks): pred_matrix[test_indices, i] = pred_block # from there, we need to apply nanmean (or some other metric) along rows # to agree on a forecast for a sample. test_mask = ~(np.isnan(pred_matrix).all(axis=1)) predictions = pred_matrix[test_mask] return avgfunc(predictions, axis=1)
def repeated_cross_validate(estimator, X, y=None, groups=None, scoring=None, cv=None, n_jobs=1, n_reps=1, verbose=0, fit_params=None, pre_dispatch='2*n_jobs', return_train_score="warn"): if len(cv) != n_reps: raise ValueError( "Set n_reps = {}. Got only {} cross validators.".format( n_reps, len(cv))) n_folds = np.unique( [cross_validator.get_n_splits() for cross_validator in cv]) if len(n_folds) != 1: raise ValueError( "Cross validators are not unified in fold number: {}".format( n_folds)) n_folds = n_folds[0] """Evaluate metric(s) by cross-validation and also record fit/score times. Read more in the :ref:`User Guide <multimetric_cross_validation>`. Parameters ---------- estimator : estimator object implementing 'fit' The object to use to fit the data. X : array-like The data to fit. Can be for example a list, or an array. y : array-like, optional, default: None The target variable to try to predict in the case of supervised learning. groups : array-like, with shape (n_samples,), optional Group labels for the samples used while splitting the dataset into train/test set. scoring : string, callable, list/tuple, dict or None, default: None A single string (see :ref:`scoring_parameter`) or a callable (see :ref:`scoring`) to evaluate the predictions on the test set. For evaluating multiple metrics, either give a list of (unique) strings or a dict with names as keys and callables as values. NOTE that when using custom scorers, each scorer should return a single value. Metric functions returning a list/array of values can be wrapped into multiple scorers that return one value each. See :ref:`multimetric_grid_search` for an example. If None, the estimator's default scorer (if available) is used. cv : array-like, a collection of cross-validation generators, with length n_reps Refer :ref:`User Guide <cross_validation>` for the various cross-validation strategies that can be used here. n_jobs : integer, optional The number of CPUs to use to do the computation. -1 means 'all CPUs'. verbose : integer, optional The verbosity level. fit_params : dict, optional Parameters to pass to the fit method of the estimator. pre_dispatch : int, or string, optional Controls the number of jobs that get dispatched during parallel execution. Reducing this number can be useful to avoid an explosion of memory consumption when more jobs get dispatched than CPUs can process. This parameter can be: - None, in which case all the jobs are immediately created and spawned. Use this for lightweight and fast-running jobs, to avoid delays due to on-demand spawning of the jobs - An int, giving the exact number of total jobs that are spawned - A string, giving an expression as a function of n_jobs, as in '2*n_jobs' return_train_score : boolean, optional Whether to include train decision_scores. Current default is ``'warn'``, which behaves as ``True`` in addition to raising a warning when a training score is looked up. That default will be changed to ``False`` in 0.21. Computing training decision_scores is used to get insights on how different parameter settings impact the overfitting/underfitting trade-off. However computing the decision_scores on the training set can be computationally expensive and is not strictly required to select the parameters that yield the best generalization performance. Returns ------- repeated_decision_scores : dict of `decision_scores` dicts, of shape=(n_reps,) """ X, y, groups = indexable(X, y, groups) # cv = check_cv(cv, y, classifier=is_classifier(estimator)) scorers, _ = _check_multimetric_scoring(estimator, scoring=scoring) # We clone the estimator to make sure that all the folds are # independent, and that it is pickle-able. parallel = Parallel(n_jobs=n_jobs, verbose=verbose, pre_dispatch=pre_dispatch) # ---------------------- My Hack ----------------------- # # 1) Set parameter `error_score=-1` to `_fit_and_score` # # 2) Created an argument `return_estimator` to # # `_fit_and_score` # # ------------------------------------------------------ # tasks = [[ delayed(_fit_and_score)(clone(estimator), X, y, scorers, train, test, verbose, None, fit_params, return_train_score=return_train_score, return_times=True, return_estimator=True, error_score=-1) for train, test in cross_validator.split(X, y, groups) ] for cross_validator in cv] # Flatten this list of lists into a simple list tasks = itertools.chain.from_iterable(tasks) scores = parallel(tasks) if return_train_score: train_scores, test_scores, fit_times, score_times, estimators = zip( *scores) train_scores = _aggregate_score_dicts(train_scores) else: test_scores, fit_times, score_times, estimators = zip(*scores) test_scores = _aggregate_score_dicts(test_scores) # TODO: replace by a dict in 0.21 ret = DeprecationDict() if return_train_score == 'warn' else {} ret['fit_time'] = np.array(fit_times) ret['score_time'] = np.array(score_times) ret['estimator'] = list(estimators) for name in scorers: ret['test_%s' % name] = np.array(test_scores[name]) if return_train_score: key = 'train_%s' % name ret[key] = np.array(train_scores[name]) if return_train_score == 'warn': message = ( 'You are accessing a training score ({!r}), ' 'which will not be available by default ' 'any more in 0.21. If you need training decision_scores, ' 'please set return_train_score=True').format(key) # warn on key access ret.add_warning(key, message, FutureWarning) """ Now `ret` is a dictionary whose values are all sequences of length `n_folds * n_reps`. Split it into `n_reps` sub-dictionaries whose values are of length `n_folds` """ rep_rets = list(_split_dict(ret, chunk_size=n_folds)) assert len(rep_rets) == n_reps for i in range(0, n_reps): rep_rets[i]["cross_validator"] = cv[i] result = dict(zip(range(0, n_reps), rep_rets)) return result
def cross_val_predict(estimator, X, y=None, *, groups=None, cv=None, n_jobs=None, verbose=0, fit_params=None, pre_dispatch='2*n_jobs', method='predict'): """Generate cross-validated estimates for each input data point The data is split according to the cv parameter. Each sample belongs to exactly one test set, and its prediction is computed with an estimator fitted on the corresponding training set. Passing these predictions into an evaluation metric may not be a valid way to measure generalization performance. Results can differ from :func:`cross_validate` and :func:`cross_val_score` unless all tests sets have equal size and the metric decomposes over samples. Read more in the :ref:`User Guide <cross_validation>`. Parameters ---------- estimator : estimator object implementing 'fit' and 'predict' The object to use to fit the data. X : array-like of shape (n_samples, n_features) The data to fit. Can be, for example a list, or an array at least 2d. y : array-like of shape (n_samples,) or (n_samples, n_outputs), \ default=None The target variable to try to predict in the case of supervised learning. groups : array-like of shape (n_samples,), default=None Group labels for the samples used while splitting the dataset into train/test set. Only used in conjunction with a "Group" :term:`cv` instance (e.g., :class:`GroupKFold`). cv : int, cross-validation generator or an iterable, default=None Determines the cross-validation splitting strategy. Possible inputs for cv are: - None, to use the default 5-fold cross validation, - int, to specify the number of folds in a `(Stratified)KFold`, - :term:`CV splitter`, - An iterable yielding (train, test) splits as arrays of indices. For int/None inputs, if the estimator is a classifier and ``y`` is either binary or multiclass, :class:`StratifiedKFold` is used. In all other cases, :class:`KFold` is used. Refer :ref:`User Guide <cross_validation>` for the various cross-validation strategies that can be used here. .. versionchanged:: 0.22 ``cv`` default value if None changed from 3-fold to 5-fold. n_jobs : int, default=None The number of CPUs to use to do the computation. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary <n_jobs>` for more details. verbose : int, default=0 The verbosity level. fit_params : dict, defualt=None Parameters to pass to the fit method of the estimator. pre_dispatch : int or str, default='2*n_jobs' Controls the number of jobs that get dispatched during parallel execution. Reducing this number can be useful to avoid an explosion of memory consumption when more jobs get dispatched than CPUs can process. This parameter can be: - None, in which case all the jobs are immediately created and spawned. Use this for lightweight and fast-running jobs, to avoid delays due to on-demand spawning of the jobs - An int, giving the exact number of total jobs that are spawned - A str, giving an expression as a function of n_jobs, as in '2*n_jobs' method : str, default='predict' Invokes the passed method name of the passed estimator. For method='predict_proba', the columns correspond to the classes in sorted order. Returns ------- predictions : ndarray This is the result of calling ``method`` See also -------- cross_val_score : calculate score for each CV split cross_validate : calculate one or more scores and timings for each CV split Notes ----- In the case that one or more classes are absent in a training portion, a default score needs to be assigned to all instances for that class if ``method`` produces columns per class, as in {'decision_function', 'predict_proba', 'predict_log_proba'}. For ``predict_proba`` this value is 0. In order to ensure finite output, we approximate negative infinity by the minimum finite float value for the dtype in other cases. Examples -------- >>> from sklearn import datasets, linear_model >>> from sklearn.model_selection import cross_val_predict >>> diabetes = datasets.load_diabetes() >>> X = diabetes.data[:150] >>> y = diabetes.target[:150] >>> lasso = linear_model.Lasso() >>> y_pred = cross_val_predict(lasso, X, y, cv=3) """ X, y, groups = indexable(X, y, groups) cv = check_cv(cv, y, classifier=is_classifier(estimator)) # If classification methods produce multiple columns of output, # we need to manually encode classes to ensure consistent column ordering. encode = method in [ 'decision_function', 'predict_proba', 'predict_log_proba' ] and y is not None if encode: y = np.asarray(y) if y.ndim == 1: le = LabelEncoder() y = le.fit_transform(y) elif y.ndim == 2: y_enc = np.zeros_like(y, dtype=np.int) for i_label in range(y.shape[1]): y_enc[:, i_label] = LabelEncoder().fit_transform(y[:, i_label]) y = y_enc # We clone the estimator to make sure that all the folds are # independent, and that it is pickle-able. parallel = Parallel(n_jobs=n_jobs, verbose=verbose, pre_dispatch=pre_dispatch) prediction_blocks = parallel( delayed(_fit_and_predict)(clone(estimator), X, y, train, test, verbose, fit_params, method) for train, test in cv.split(X, y, groups)) # Concatenate the predictions predictions = [pred_block_i for pred_block_i, _, _ in prediction_blocks] test_indices = np.concatenate( [indices_i for _, indices_i, _ in prediction_blocks]) estimators = [e for _, _, e in prediction_blocks] if not _check_is_permutation(test_indices, _num_samples(X)): raise ValueError('cross_val_predict only works for partitions') inv_test_indices = np.empty(len(test_indices), dtype=int) inv_test_indices[test_indices] = np.arange(len(test_indices)) if sp.issparse(predictions[0]): predictions = sp.vstack(predictions, format=predictions[0].format) elif encode and isinstance(predictions[0], list): # `predictions` is a list of method outputs from each fold. # If each of those is also a list, then treat this as a # multioutput-multiclass task. We need to separately concatenate # the method outputs for each label into an `n_labels` long list. n_labels = y.shape[1] concat_pred = [] for i_label in range(n_labels): label_preds = np.concatenate([p[i_label] for p in predictions]) concat_pred.append(label_preds) predictions = concat_pred else: predictions = np.concatenate(predictions) if isinstance(predictions, list): return [p[inv_test_indices] for p in predictions], estimators else: return predictions[inv_test_indices], estimators
def cross_val_multiscore(estimator, X, y=None, groups=None, scoring=None, cv=None, n_jobs=1, verbose=0, fit_params=None, pre_dispatch='2*n_jobs'): """Evaluate a score by cross-validation. Parameters ---------- estimator : instance of sklearn.base.BaseEstimator The object to use to fit the data. Must implement the 'fit' method. X : array-like, shape (n_samples, n_dimensional_features,) The data to fit. Can be, for example a list, or an array at least 2d. y : array-like, shape (n_samples, n_targets,) The target variable to try to predict in the case of supervised learning. groups : array-like, with shape (n_samples,) Group labels for the samples used while splitting the dataset into train/test set. scoring : string, callable | None A string (see model evaluation documentation) or a scorer callable object / function with signature ``scorer(estimator, X, y)``. cv : int, cross-validation generator | iterable Determines the cross-validation splitting strategy. Possible inputs for cv are: - None, to use the default 3-fold cross validation, - integer, to specify the number of folds in a ``(Stratified)KFold``, - An object to be used as a cross-validation generator. - An iterable yielding train, test splits. For integer/None inputs, if the estimator is a classifier and ``y`` is either binary or multiclass, :class:`sklearn.model_selection.StratifiedKFold` is used. In all other cases, :class:`sklearn.model_selection.KFold` is used. n_jobs : int, optional The number of CPUs to use to do the computation. -1 means 'all CPUs'. verbose : int, optional The verbosity level. fit_params : dict, optional Parameters to pass to the fit method of the estimator. pre_dispatch : int, or string, optional Controls the number of jobs that get dispatched during parallel execution. Reducing this number can be useful to avoid an explosion of memory consumption when more jobs get dispatched than CPUs can process. This parameter can be: - None, in which case all the jobs are immediately created and spawned. Use this for lightweight and fast-running jobs, to avoid delays due to on-demand spawning of the jobs - An int, giving the exact number of total jobs that are spawned - A string, giving an expression as a function of n_jobs, as in '2*n_jobs' Returns ------- scores : array of float, shape (n_splits,) | shape (n_splits, n_scores) Array of scores of the estimator for each run of the cross validation. """ # This code is copied from sklearn from sklearn.base import clone from sklearn.utils import indexable from sklearn.metrics.scorer import check_scoring from sklearn.model_selection._split import check_cv X, y, groups = indexable(X, y, groups) cv = check_cv(cv, y, classifier=is_classifier(estimator)) cv_iter = list(cv.split(X, y, groups)) scorer = check_scoring(estimator, scoring=scoring) # We clone the estimator to make sure that all the folds are # independent, and that it is pickle-able. # Note: this parallelization is implemented using MNE Parallel parallel, p_func, n_jobs = parallel_func(_fit_and_score, n_jobs, pre_dispatch=pre_dispatch) scores = parallel(p_func(clone(estimator), X, y, scorer, train, test, verbose, None, fit_params) for train, test in cv_iter) return np.array(scores)[:, 0, ...] # flatten over joblib output.
def split(self, X, y=None, groups=None): X, y, groups = indexable(X, y, groups) for train, test, date_range in self._iter_indices(X, y, groups): yield train, test, date_range
def fit(self, X, y, sample_weight=None): """Fit the calibrated model Parameters ---------- X : array-like, shape (n_samples, n_features) Training data. y : array-like, shape (n_samples,) Target values. sample_weight : array-like of shape (n_samples,), default=None Sample weights. If None, then samples are equally weighted. Returns ------- self : object Returns an instance of self. """ X, y = check_X_y(X, y, accept_sparse=['csc', 'csr', 'coo'], force_all_finite=False, allow_nd=True) X, y = indexable(X, y) le = LabelBinarizer().fit(y) self.classes_ = le.classes_ # Check that each cross-validation fold can have at least one # example per class n_folds = self.cv if isinstance(self.cv, int) \ else self.cv.n_folds if hasattr(self.cv, "n_folds") else None if n_folds and \ np.any([np.sum(y == class_) < n_folds for class_ in self.classes_]): raise ValueError("Requesting %d-fold cross-validation but provided" " less than %d examples for at least one class." % (n_folds, n_folds)) self.calibrated_classifiers_ = [] cv = check_cv(self.cv, y, classifier=True) fit_parameters = signature(self.base_estimator.fit).parameters estimator_name = type(self.base_estimator).__name__ # Restructured to match the method for Platt (1999). Train an # estimator per fold. Collect the predictions into a single list # Train the calibration model. parallel = Parallel(n_jobs=self.n_jobs) self.fit_estimators_ = parallel( delayed(_fit)(clone(self.base_estimator), X[train], y[train]) for train, _ in cv.split(X, y)) results = parallel( delayed(_predict)(estimator, X[test], y[test]) for estimator, (_, test) in zip(self.fit_estimators_, cv.split(X, y))) cv_predictions = [item[0] for item in results] cv_targets = [item[1] for item in results] cv_predictions = list(itertools.chain.from_iterable(cv_predictions)) cv_targets = list(itertools.chain.from_iterable(cv_targets)) this_estimator = clone(self.base_estimator) # Re-fit base_estimator on the whole dataset refit_estimator = this_estimator.fit(X, y) calibrated_classifier = _CalibratedClassifier(refit_estimator, method=self.method, classes=self.classes_) # Fit the isotonic regression model. calibrated_classifier.fit(cv_predictions, cv_targets) self.calibrated_classifiers_.append(calibrated_classifier) return self
def cross_validate(estimator, X, y=None, groups=None, scoring=None, cv='warn', n_jobs=None, verbose=0, fit_params=None, pre_dispatch='2*n_jobs', return_train_score=False, return_estimator=False, error_score='raise-deprecating'): X, y, groups = indexable(X, y, groups) cv = check_cv(cv, y, classifier=is_classifier(estimator)) scorers, _ = _check_multimetric_scoring(estimator, scoring=scoring) def _score(estimator, X_test, y_test, scorer, is_multimetric=False): if is_multimetric: return _multimetric_score(estimator, X_test, y_test, scorer) else: if y_test is None: score = scorer(estimator, X_test) else: score = scorer(estimator, X_test, y_test) if hasattr(score, 'item'): try: # e.g. unwrap memmapped scalars score = score.item() except ValueError: # non-scalar? pass if not isinstance(score, numbers.Number): raise ValueError( "scoring must return a number, got %s (%s) " "instead. (scorer=%r)" % (str(score), type(score), scorer)) return score def _multimetric_score(estimator, X_test, y_test, scorers): """Return a dict of score for multimetric scoring.""" scores = {} for name, scorer in scorers.items(): if y_test is None: score = scorer(estimator, X_test) else: score = scorer(estimator, X_test, y_test) if hasattr(score, 'item'): try: # e.g. unwrap memmapped scalars score = score.item() except ValueError: # non-scalar? pass scores[name] = score if not isinstance(score, numbers.Number): raise ValueError( "scoring must return a number, got %s (%s) " "instead. (scorer=%s)" % (str(score), type(score), name)) return scores def _aggregate_score_dicts(scores): out = {} for key in scores[0]: out[key] = np.asarray([score[key] for score in scores]) return out def _fit_and_score(estimator, X, y, scorer, train, test, verbose, parameters, fit_params, return_train_score=False, return_parameters=False, return_n_test_samples=False, return_times=False, return_estimator=False, error_score='raise-deprecating'): start_time = time.time() if verbose > 1: if parameters is None: msg = '' else: msg = '%s' % (', '.join( '%s=%s' % (k, v) for k, v in parameters.items())) print("[CV] %s %s" % (msg, (64 - len(msg)) * '.')) # Adjust length of sample weights fit_params = fit_params if fit_params is not None else {} fit_params = dict([(k, _index_param_value(X, v, train)) for k, v in fit_params.items()]) train_scores = {} if parameters is not None: estimator.set_params(**parameters) X_train, y_train = _safe_split(estimator, X, y, train) X_test, y_test = _safe_split(estimator, X, y, test, train) is_multimetric = not callable(scorer) n_scorers = len(scorer.keys()) if is_multimetric else 1 try: ######################################### ############ FIT CALLED HERE ############ ######################################### if y_train is None: estimator.fit(X_train, **fit_params) else: estimator.fit(X_train, y_train, **fit_params) ######################################### except Exception as e: # Note fit time as time until error fit_time = time.time() - start_time score_time = 0.0 if error_score == 'raise': raise elif error_score == 'raise-deprecating': warnings.warn( "From version 0.22, errors during fit will result " "in a cross validation score of NaN by default. Use " "error_score='raise' if you want an exception " "raised or error_score=np.nan to adopt the " "behavior from version 0.22.", FutureWarning) raise elif isinstance(error_score, numbers.Number): if is_multimetric: test_scores = dict( zip(scorer.keys(), [ error_score, ] * n_scorers)) if return_train_score: train_scores = dict( zip(scorer.keys(), [ error_score, ] * n_scorers)) else: test_scores = error_score if return_train_score: train_scores = error_score warnings.warn( "Estimator fit failed. The score on this train-test" " partition for these parameters will be set to %f. " "Details: \n%s" % (error_score, format_exception_only( type(e), e)[0]), FitFailedWarning) else: raise ValueError( "error_score must be the string 'raise' or a" " numeric value. (Hint: if using 'raise', please" " make sure that it has been spelled correctly.)" ) else: fit_time = time.time() - start_time # _score will return dict if is_multimetric is True test_scores = _score(estimator, X_test, y_test, scorer, is_multimetric) score_time = time.time() - start_time - fit_time if return_train_score: train_scores = _score(estimator, X_train, y_train, scorer, is_multimetric) if verbose > 2: if is_multimetric: for scorer_name, score in test_scores.items(): msg += ", %s=%s" % (scorer_name, score) else: msg += ", score=%s" % test_scores if verbose > 1: total_time = score_time + fit_time end_msg = "%s, total=%s" % ( msg, logger.short_format_time(total_time)) print("[CV] %s %s" % ((64 - len(end_msg)) * '.', end_msg)) ret = [train_scores, test_scores ] if return_train_score else [test_scores] if return_n_test_samples: ret.append(_num_samples(X_test)) if return_times: ret.extend([fit_time, score_time]) if return_parameters: ret.append(parameters) if return_estimator: ret.append(estimator) return ret if not context: parallel = Parallel(n_jobs=n_jobs, verbose=verbose, pre_dispatch=pre_dispatch) else: parallel = cls.Parallel() # We clone the estimator to make sure that all the folds are # independent, and that it is pickle-able. scores = parallel( delayed(_fit_and_score)( clone(estimator), X, y, scorers, train, test, verbose, None, fit_params, return_train_score=return_train_score, return_times=True, return_estimator=return_estimator, error_score=error_score) for train, test in cv.split(X, y, groups)) zipped_scores = list(zip(*scores)) if return_train_score: train_scores = zipped_scores.pop(0) train_scores = _aggregate_score_dicts(train_scores) if return_estimator: fitted_estimators = zipped_scores.pop() test_scores, fit_times, score_times = zipped_scores test_scores = _aggregate_score_dicts(test_scores) ret = {} ret['fit_time'] = np.array(fit_times) ret['score_time'] = np.array(score_times) if return_estimator: ret['estimator'] = fitted_estimators for name in scorers: ret['test_%s' % name] = np.array(test_scores[name]) if return_train_score: key = 'train_%s' % name ret[key] = np.array(train_scores[name]) return ret
def _wrapped_cross_val_score(sklearn_pipeline, features, target, cv, scoring_function, sample_weight=None, groups=None, use_dask=False): """Fit estimator and compute scores for a given dataset split. Parameters ---------- sklearn_pipeline : pipeline object implementing 'fit' The object to use to fit the data. features : array-like of shape at least 2D The data to fit. target : array-like, optional, default: None The target variable to try to predict in the case of supervised learning. cv: cross-validation generator Object to be used as a cross-validation generator. scoring_function : callable A scorer callable object / function with signature ``scorer(estimator, X, y)``. sample_weight : array-like, optional List of sample weights to balance (or un-balanace) the dataset target as needed groups: array-like {n_samples, }, optional Group labels for the samples used while splitting the dataset into train/test set use_dask : bool, default False Whether to use dask """ sample_weight_dict = set_sample_weight(sklearn_pipeline.steps, sample_weight) features, target, groups = indexable(features, target, groups) cv_iter = list(cv.split(features, target, groups)) scorer = check_scoring(sklearn_pipeline, scoring=scoring_function) if use_dask: try: import dask_ml.model_selection # noqa import dask # noqa from dask.delayed import Delayed except Exception as e: msg = "'use_dask' requires the optional dask and dask-ml depedencies.\n{}".format( e) raise ImportError(msg) dsk, keys, n_splits = dask_ml.model_selection._search.build_graph( estimator=sklearn_pipeline, cv=cv, scorer=scorer, candidate_params=[{}], X=features, y=target, groups=groups, fit_params=sample_weight_dict, refit=False, error_score=float('-inf'), ) cv_results = Delayed(keys[0], dsk) scores = [ cv_results['split{}_test_score'.format(i)] for i in range(n_splits) ] CV_score = dask.delayed(np.array)(scores)[:, 0] return dask.delayed(np.nanmean)(CV_score) else: try: with warnings.catch_warnings(): warnings.simplefilter('ignore') scores = [ _fit_and_score(estimator=clone(sklearn_pipeline), X=features, y=target, scorer=scorer, train=train, test=test, verbose=0, parameters=None, error_score='raise', fit_params=sample_weight_dict) for train, test in cv_iter ] CV_score = np.array(scores)[:, 0] CV_score_mean = np.nanmean(CV_score) return CV_score_mean except TimeoutException: return "Timeout" except Exception as e: return -float('inf')
def validation_curve(estimator, X, y, param_name, param_range, labels=None, cv=None, scoring=None, n_jobs=1, pre_dispatch="all", verbose=0): """Validation curve. Determine training and test scores for varying parameter values. Compute scores for an estimator with different values of a specified parameter. This is similar to grid search with one parameter. However, this will also compute training scores and is merely a utility for plotting the results. Read more in the :ref:`User Guide <learning_curve>`. Parameters ---------- estimator : object type that implements the "fit" and "predict" methods An object of that type which is cloned for each validation. X : array-like, shape (n_samples, n_features) Training vector, where n_samples is the number of samples and n_features is the number of features. y : array-like, shape (n_samples) or (n_samples, n_features), optional Target relative to X for classification or regression; None for unsupervised learning. param_name : string Name of the parameter that will be varied. param_range : array-like, shape (n_values,) The values of the parameter that will be evaluated. labels : array-like, with shape (n_samples,), optional Group labels for the samples used while splitting the dataset into train/test set. cv : int, cross-validation generator or an iterable, optional Determines the cross-validation splitting strategy. Possible inputs for cv are: - None, to use the default 3-fold cross validation, - integer, to specify the number of folds in a `(Stratified)KFold`, - An object to be used as a cross-validation generator. - An iterable yielding train, test splits. For integer/None inputs, if the estimator is a classifier and ``y`` is either binary or multiclass, :class:`StratifiedKFold` used. In all other cases, :class:`KFold` is used. Refer :ref:`User Guide <cross_validation>` for the various cross-validation strategies that can be used here. scoring : string, callable or None, optional, default: None A string (see model evaluation documentation) or a scorer callable object / function with signature ``scorer(estimator, X, y)``. n_jobs : integer, optional Number of jobs to run in parallel (default 1). pre_dispatch : integer or string, optional Number of predispatched jobs for parallel execution (default is all). The option can reduce the allocated memory. The string can be an expression like '2*n_jobs'. verbose : integer, optional Controls the verbosity: the higher, the more messages. Returns ------- train_scores : array, shape (n_ticks, n_cv_folds) Scores on training sets. test_scores : array, shape (n_ticks, n_cv_folds) Scores on test set. Notes ----- See :ref:`examples/model_selection/plot_validation_curve.py <example_model_selection_plot_validation_curve.py>` """ X, y, labels = indexable(X, y, labels) cv = check_cv(cv, y, classifier=is_classifier(estimator)) scorer = check_scoring(estimator, scoring=scoring) parallel = Parallel(n_jobs=n_jobs, pre_dispatch=pre_dispatch, verbose=verbose) out = parallel(delayed(_fit_and_score)( estimator, X, y, scorer, train, test, verbose, parameters={param_name: v}, fit_params=None, return_train_score=True) for train, test in cv.split(X, y, labels) for v in param_range) out = np.asarray(out)[:, :2] n_params = len(param_range) n_cv_folds = out.shape[0] // n_params out = out.reshape(n_cv_folds, n_params, 2).transpose((2, 1, 0)) return out[0], out[1]
def y(self): if self._y is None: self._x, self._y = indexable(*self._load_training_data()) return self._y
def permutation_test_score(estimator, X, y, labels=None, cv=None, n_permutations=100, n_jobs=1, random_state=0, verbose=0, scoring=None): """Evaluate the significance of a cross-validated score with permutations Read more in the :ref:`User Guide <cross_validation>`. Parameters ---------- estimator : estimator object implementing 'fit' The object to use to fit the data. X : array-like of shape at least 2D The data to fit. y : array-like The target variable to try to predict in the case of supervised learning. labels : array-like, with shape (n_samples,), optional Group labels for the samples used while splitting the dataset into train/test set. scoring : string, callable or None, optional, default: None A string (see model evaluation documentation) or a scorer callable object / function with signature ``scorer(estimator, X, y)``. cv : int, cross-validation generator or an iterable, optional Determines the cross-validation splitting strategy. Possible inputs for cv are: - None, to use the default 3-fold cross validation, - integer, to specify the number of folds in a `(Stratified)KFold`, - An object to be used as a cross-validation generator. - An iterable yielding train, test splits. For integer/None inputs, if the estimator is a classifier and ``y`` is either binary or multiclass, :class:`StratifiedKFold` used. In all other cases, :class:`KFold` is used. Refer :ref:`User Guide <cross_validation>` for the various cross-validation strategies that can be used here. n_permutations : integer, optional Number of times to permute ``y``. n_jobs : integer, optional The number of CPUs to use to do the computation. -1 means 'all CPUs'. random_state : RandomState or an int seed (0 by default) A random number generator instance to define the state of the random permutations generator. verbose : integer, optional The verbosity level. Returns ------- score : float The true score without permuting targets. permutation_scores : array, shape (n_permutations,) The scores obtained for each permutations. pvalue : float The returned value equals p-value if `scoring` returns bigger numbers for better scores (e.g., accuracy_score). If `scoring` is rather a loss function (i.e. when lower is better such as with `mean_squared_error`) then this is actually the complement of the p-value: 1 - p-value. Notes ----- This function implements Test 1 in: Ojala and Garriga. Permutation Tests for Studying Classifier Performance. The Journal of Machine Learning Research (2010) vol. 11 """ X, y, labels = indexable(X, y, labels) cv = check_cv(cv, y, classifier=is_classifier(estimator)) scorer = check_scoring(estimator, scoring=scoring) random_state = check_random_state(random_state) # We clone the estimator to make sure that all the folds are # independent, and that it is pickle-able. score = _permutation_test_score(clone(estimator), X, y, labels, cv, scorer) permutation_scores = Parallel(n_jobs=n_jobs, verbose=verbose)( delayed(_permutation_test_score)(clone(estimator), X, _shuffle(y, labels, random_state), labels, cv, scorer) for _ in range(n_permutations)) permutation_scores = np.array(permutation_scores) pvalue = (np.sum(permutation_scores >= score) + 1.0) / (n_permutations + 1) return score, permutation_scores, pvalue
def _wrapped_cross_val_score(sklearn_pipeline, features, target, cv, scoring_function, sample_weight=None, groups=None, index=None, print_individual_scores=False): """Fit estimator and compute scores for a given dataset split. Parameters ---------- sklearn_pipeline : pipeline object implementing 'fit' The object to use to fit the data. features : array-like of shape at least 2D The data to fit. target : array-like, optional, default: None The target variable to try to predict in the case of supervised learning. cv: int or cross-validation generator If CV is a number, then it is the number of folds to evaluate each pipeline over in k-fold cross-validation during the TPOT optimization process. If it is an object then it is an object to be used as a cross-validation generator. scoring_function : callable A scorer callable object / function with signature ``scorer(estimator, X, y)``. sample_weight : array-like, optional List of sample weights to balance (or un-balanace) the dataset target as needed groups: array-like {n_samples, }, optional Group labels for the samples used while splitting the dataset into train/test set """ # print(index, sklearn_pipeline.steps) sample_weight_dict = set_sample_weight(sklearn_pipeline.steps, sample_weight) features, target, groups = indexable(features, target, groups) cv = check_cv(cv, target, classifier=is_classifier(sklearn_pipeline)) cv_iter = list(cv.split(features, target, groups)) scorer = check_scoring(sklearn_pipeline, scoring=scoring_function) try: with warnings.catch_warnings(): warnings.simplefilter('ignore') scores = [] fold = 0 for train, test in cv_iter: estimator = clone(sklearn_pipeline) score = _fit_and_score(estimator=estimator, X=features, y=target, scorer=scorer, train=train, test=test, verbose=0, parameters=None, fit_params=sample_weight_dict) fold += 1 # if print_individual_scores: # print("%d (%d): %s" % (index, fold, score)) scores.append(score) CV_score = np.array(scores)[:, 0] return np.nanmean(CV_score) except TimeoutException: # _logger.info("Evaluation timeout on %s" % sklearn_pipeline.steps) return "Timeout" except Exception as e: _logger.info(traceback.format_exc()) return -float('inf')
def learning_curve(estimator, X, y, labels=None, train_sizes=np.linspace(0.1, 1.0, 5), cv=None, scoring=None, exploit_incremental_learning=False, n_jobs=1, pre_dispatch="all", verbose=0): """Learning curve. Determines cross-validated training and test scores for different training set sizes. A cross-validation generator splits the whole dataset k times in training and test data. Subsets of the training set with varying sizes will be used to train the estimator and a score for each training subset size and the test set will be computed. Afterwards, the scores will be averaged over all k runs for each training subset size. Read more in the :ref:`User Guide <learning_curve>`. Parameters ---------- estimator : object type that implements the "fit" and "predict" methods An object of that type which is cloned for each validation. X : array-like, shape (n_samples, n_features) Training vector, where n_samples is the number of samples and n_features is the number of features. y : array-like, shape (n_samples) or (n_samples, n_features), optional Target relative to X for classification or regression; None for unsupervised learning. labels : array-like, with shape (n_samples,), optional Group labels for the samples used while splitting the dataset into train/test set. train_sizes : array-like, shape (n_ticks,), dtype float or int Relative or absolute numbers of training examples that will be used to generate the learning curve. If the dtype is float, it is regarded as a fraction of the maximum size of the training set (that is determined by the selected validation method), i.e. it has to be within (0, 1]. Otherwise it is interpreted as absolute sizes of the training sets. Note that for classification the number of samples usually have to be big enough to contain at least one sample from each class. (default: np.linspace(0.1, 1.0, 5)) cv : int, cross-validation generator or an iterable, optional Determines the cross-validation splitting strategy. Possible inputs for cv are: - None, to use the default 3-fold cross validation, - integer, to specify the number of folds in a `(Stratified)KFold`, - An object to be used as a cross-validation generator. - An iterable yielding train, test splits. For integer/None inputs, if the estimator is a classifier and ``y`` is either binary or multiclass, :class:`StratifiedKFold` used. In all other cases, :class:`KFold` is used. Refer :ref:`User Guide <cross_validation>` for the various cross-validation strategies that can be used here. scoring : string, callable or None, optional, default: None A string (see model evaluation documentation) or a scorer callable object / function with signature ``scorer(estimator, X, y)``. exploit_incremental_learning : boolean, optional, default: False If the estimator supports incremental learning, this will be used to speed up fitting for different training set sizes. n_jobs : integer, optional Number of jobs to run in parallel (default 1). pre_dispatch : integer or string, optional Number of predispatched jobs for parallel execution (default is all). The option can reduce the allocated memory. The string can be an expression like '2*n_jobs'. verbose : integer, optional Controls the verbosity: the higher, the more messages. Returns ------- train_sizes_abs : array, shape = (n_unique_ticks,), dtype int Numbers of training examples that has been used to generate the learning curve. Note that the number of ticks might be less than n_ticks because duplicate entries will be removed. train_scores : array, shape (n_ticks, n_cv_folds) Scores on training sets. test_scores : array, shape (n_ticks, n_cv_folds) Scores on test set. Notes ----- See :ref:`examples/model_selection/plot_learning_curve.py <example_model_selection_plot_learning_curve.py>` """ if exploit_incremental_learning and not hasattr(estimator, "partial_fit"): raise ValueError("An estimator must support the partial_fit interface " "to exploit incremental learning") X, y, labels = indexable(X, y, labels) cv = check_cv(cv, y, classifier=is_classifier(estimator)) cv_iter = cv.split(X, y, labels) # Make a list since we will be iterating multiple times over the folds cv_iter = list(cv_iter) scorer = check_scoring(estimator, scoring=scoring) n_max_training_samples = len(cv_iter[0][0]) # Because the lengths of folds can be significantly different, it is # not guaranteed that we use all of the available training data when we # use the first 'n_max_training_samples' samples. train_sizes_abs = _translate_train_sizes(train_sizes, n_max_training_samples) n_unique_ticks = train_sizes_abs.shape[0] if verbose > 0: print("[learning_curve] Training set sizes: " + str(train_sizes_abs)) parallel = Parallel(n_jobs=n_jobs, pre_dispatch=pre_dispatch, verbose=verbose) if exploit_incremental_learning: classes = np.unique(y) if is_classifier(estimator) else None out = parallel( delayed(_incremental_fit_estimator) (clone(estimator), X, y, classes, train, test, train_sizes_abs, scorer, verbose) for train, test in cv.split(X, y, labels)) else: out = parallel( delayed(_fit_and_score)(clone(estimator), X, y, scorer, train[:n_train_samples], test, verbose, parameters=None, fit_params=None, return_train_score=True) for train, test in cv_iter for n_train_samples in train_sizes_abs) out = np.array(out)[:, :2] n_cv_folds = out.shape[0] // n_unique_ticks out = out.reshape(n_cv_folds, n_unique_ticks, 2) out = np.asarray(out).transpose((2, 1, 0)) return train_sizes_abs, out[0], out[1]
def gbdt_feature_selector(data_dict,\ gbdt_estimator,\ feature_rank, \ category_features, \ cv_list=[StratifiedKFold(n_splits=5, shuffle=True, random_state=0)],\ groups_list=[None],\ weights_list=[1],\ rounds=100,\ step=1,\ auc_diff_threshold=0,\ auc_initial=0.55): ''' 逐步加入变量的stepwise特征筛选函数 使用lightgbm模型 data_dict: 多个数据集组成的dict 包含train test_xxx等等key gbdt_estimator: gbdt类的estimator feature_rank: 搜索特征的按照各种方式排序后的list category_features: 需要当做类别型变量处理的特征 cv_list: 数据集切分方法组成的list 默认为5折StratifiedKFold groups_list: 数据集切分方法的参数组成的list 默认不给 weight_list: 不同数据切分方法最终评估结果的权重 rounds: 总共筛选多少轮 step: 每轮加入多少个变量 auc_diff_threshold: auc有多少提升才会被选入到其中 auc_initial: 第一个加入的变量至少需要达到多高的auc才能进入模型 return: feature_selected 最终入模特征 step_detail 每一轮cv的细节内容构成的map key为roundx x为论数 step_outer_valid_statistic 被选入论的outer_valid评估结果 ''' ##进行一些预置检查 ##检查3个list的长度是否相等 cv_list, groups_list, weights_list = indexable(cv_list, groups_list, weights_list) ##进行一些准备工作 print("开始特征筛选".center(50, '=')) each_round_start = range(0, rounds * step, step) ##每一轮评估之前已经入模的特征 feature_selected = [] ##每一轮的明细数据 step_detail = {} ##存储最终入模的特征数据 step_outer_valid_statistic = {} for i in each_round_start: print('*************rounds: %d****************' % (i / step + 1)) ##选出特征 feature_added = feature_rank[i:i + step] feature_used = feature_selected + feature_added category_feature_uesd = [ i for i in feature_used if i in category_features ] print('入模型特征数:' + str(len(feature_used))) print('当前轮数考察特征:', feature_added) ##存储每一个CV下的结果 cv_detail_result = {} cv_statistic_result = {} cv_best_iteration_result = {} cv_statistic = {} cv_outer_valid_statistic = {} for cv_index in range(len(cv_list)): fold_detail_result, fold_statistic_result, fold_best_iteration_result = gbdt_cv_evaluate_earlystop( data_dict=data_dict, gbdt_estimator=gbdt_estimator, total_features=feature_used, category_features=category_feature_uesd, cv=cv_list[cv_index], groups=groups_list[cv_index]) cv_detail_result[cv_index] = fold_detail_result cv_statistic_result[cv_index] = fold_statistic_result cv_best_iteration_result[cv_index] = fold_best_iteration_result ##取出其中的评估数 fold_statistic = pd.DataFrame() for fold_key in fold_statistic_result.keys(): temp_statistic = pd.DataFrame( fold_statistic_result[fold_key]).T temp_statistic.columns = [fold_key + '_' ] + temp_statistic.columns fold_statistic = pd.concat([fold_statistic, temp_statistic], axis=1) ##包含各种评估指标的列 evaluation_map = {} ##包含auc数据的列 evaluation_map['auc'] = [ i for i in fold_statistic.columns if '_auc' in i ] ##包含ks数据的列 evaluation_map['ks'] = [ i for i in fold_statistic.columns if '_ks' in i ] ##计算ks数据的均值和方差 for evaluation_key in evaluation_map.keys(): fold_statistic[evaluation_key + '_mean'] = fold_statistic[ evaluation_map[evaluation_key]].apply(lambda x: np.mean(x), axis=1) fold_statistic[evaluation_key + '_std'] = fold_statistic[ evaluation_map[evaluation_key]].apply(lambda x: np.std(x), axis=1) cv_statistic[cv_index] = fold_statistic cv_outer_valid_statistic[cv_index] = fold_statistic.loc[ 'outer_valid', [i + '_mean' for i in evaluation_map.keys()] + [i + '_std' for i in evaluation_map.keys()]] ##进行cv结果的评估 使用的是outer_valid的数据 对不同cv下面的结果进行加权 outer_valid_statistic = pd.DataFrame(cv_outer_valid_statistic).apply( lambda x: np.dot(x, np.array(weights_list)), axis=1) ##存储每一轮结果的明细数据 current_step_detail = {} current_step_detail['auc_threshold'] = auc_initial current_step_detail['feature_initial'] = feature_selected current_step_detail['feature_added'] = feature_added current_step_detail['feature_used'] = feature_used current_step_detail['category_feature_used'] = category_feature_uesd current_step_detail['cv_detail_result'] = cv_detail_result current_step_detail[ 'cv_best_iteration_result'] = cv_best_iteration_result current_step_detail['cv_statistic'] = cv_statistic current_step_detail[ 'cv_outer_valid_statistic'] = cv_outer_valid_statistic current_step_detail['outer_valid_statistic'] = outer_valid_statistic current_step_detail['is_delete'] = (outer_valid_statistic['auc_mean'] - auc_initial) < auc_diff_threshold step_detail['round' + str(int(i / step + 1))] = current_step_detail print('当前step下加权评估结果为{0},auc阈值为{1}(包含每部最低提升要求{2})'.format( outer_valid_statistic, auc_initial + auc_diff_threshold, auc_diff_threshold)) ##不满足条件 就不把当前变量加入到候选集当中 if (outer_valid_statistic['auc_mean'] - auc_initial) < auc_diff_threshold: print('删除当前批次的入模特征') continue ##满足条件 加入到候选集当中 feature_selected = feature_used ##更新阈值 auc_initial = outer_valid_statistic['auc_mean'] ##存储每一步加入到模型单重 step_outer_valid_statistic['round' + str(int(i / step + 1))] = outer_valid_statistic return feature_selected, step_detail, step_outer_valid_statistic
def cross_val_score(estimator, X, y=None, labels=None, scoring=None, cv=None, n_jobs=1, verbose=0, fit_params=None, pre_dispatch='2*n_jobs'): """Evaluate a score by cross-validation Read more in the :ref:`User Guide <cross_validation>`. Parameters ---------- estimator : estimator object implementing 'fit' The object to use to fit the data. X : array-like The data to fit. Can be, for example a list, or an array at least 2d. y : array-like, optional, default: None The target variable to try to predict in the case of supervised learning. labels : array-like, with shape (n_samples,), optional Group labels for the samples used while splitting the dataset into train/test set. scoring : string, callable or None, optional, default: None A string (see model evaluation documentation) or a scorer callable object / function with signature ``scorer(estimator, X, y)``. cv : int, cross-validation generator or an iterable, optional Determines the cross-validation splitting strategy. Possible inputs for cv are: - None, to use the default 3-fold cross validation, - integer, to specify the number of folds in a `(Stratified)KFold`, - An object to be used as a cross-validation generator. - An iterable yielding train, test splits. For integer/None inputs, if the estimator is a classifier and ``y`` is either binary or multiclass, :class:`StratifiedKFold` used. In all other cases, :class:`KFold` is used. Refer :ref:`User Guide <cross_validation>` for the various cross-validation strategies that can be used here. n_jobs : integer, optional The number of CPUs to use to do the computation. -1 means 'all CPUs'. verbose : integer, optional The verbosity level. fit_params : dict, optional Parameters to pass to the fit method of the estimator. pre_dispatch : int, or string, optional Controls the number of jobs that get dispatched during parallel execution. Reducing this number can be useful to avoid an explosion of memory consumption when more jobs get dispatched than CPUs can process. This parameter can be: - None, in which case all the jobs are immediately created and spawned. Use this for lightweight and fast-running jobs, to avoid delays due to on-demand spawning of the jobs - An int, giving the exact number of total jobs that are spawned - A string, giving an expression as a function of n_jobs, as in '2*n_jobs' Returns ------- scores : array of float, shape=(len(list(cv)),) Array of scores of the estimator for each run of the cross validation. See Also --------- :func:`sklearn.metrics.make_scorer`: Make a scorer from a performance metric or loss function. """ X, y, labels = indexable(X, y, labels) cv = check_cv(cv, y, classifier=is_classifier(estimator)) scorer = check_scoring(estimator, scoring=scoring) # We clone the estimator to make sure that all the folds are # independent, and that it is pickle-able. parallel = Parallel(n_jobs=n_jobs, verbose=verbose, pre_dispatch=pre_dispatch) scores = parallel( delayed(_fit_and_score)(clone(estimator), X, y, scorer, train, test, verbose, None, fit_params) for train, test in cv.split(X, y, labels)) return np.array(scores)[:, 0]
def cross_validate(estimator, y, X=None, scoring=None, cv=None, verbose=0, error_score=np.nan, **kwargs): # TODO: remove kwargs """Evaluate metric(s) by cross-validation and also record fit/score times. Parameters ---------- estimator : estimator An estimator object that implements the ``fit`` method y : array-like or iterable, shape=(n_samples,) The time-series array. X : array-like, shape=[n_obs, n_vars], optional (default=None) An optional 2-d array of exogenous variables. scoring : str or callable, optional (default=None) The scoring metric to use. If a callable, must adhere to the signature ``metric(true, predicted)``. Valid string scoring metrics include: - 'smape' - 'mean_absolute_error' - 'mean_squared_error' cv : BaseTSCrossValidator or None, optional (default=None) An instance of cross-validation. If None, will use a RollingForecastCV verbose : integer, optional The verbosity level. error_score : 'raise' or numeric Value to assign to the score if an error occurs in estimator fitting. If set to 'raise', the error is raised. If a numeric value is given, ModelFitWarning is raised. This parameter does not affect the refit step, which will always raise the error. """ # Temporary shim until we remove `exogenous` support completely X, _ = pm_compat.get_X(X, **kwargs) y, X = indexable(y, X) y = check_endog(y, copy=False) cv = check_cv(cv) scoring = _check_scoring(scoring) # validate the error score if not (error_score == "raise" or isinstance(error_score, numbers.Number)): raise ValueError('error_score should be the string "raise" or a ' 'numeric value') # TODO: in the future we might consider joblib for parallelizing, but it # . could cause cross threads in parallelism.. results = [ _fit_and_score(fold, base.clone(estimator), y, X, scorer=scoring, train=train, test=test, verbose=verbose, error_score=error_score) for fold, (train, test) in enumerate(cv.split(y, X))] scores, fit_times, score_times = list(zip(*results)) ret = { 'test_score': np.array(scores), 'fit_time': np.array(fit_times), 'score_time': np.array(score_times), } return ret
def cross_val_score(estimator, X, y=None, labels=None, scoring=None, cv=None, n_jobs=1, verbose=0, fit_params=None, pre_dispatch='2*n_jobs'): """Evaluate a score by cross-validation Read more in the :ref:`User Guide <cross_validation>`. Parameters ---------- estimator : estimator object implementing 'fit' The object to use to fit the data. X : array-like The data to fit. Can be, for example a list, or an array at least 2d. y : array-like, optional, default: None The target variable to try to predict in the case of supervised learning. labels : array-like, with shape (n_samples,), optional Group labels for the samples used while splitting the dataset into train/test set. scoring : string, callable or None, optional, default: None A string (see model evaluation documentation) or a scorer callable object / function with signature ``scorer(estimator, X, y)``. cv : int, cross-validation generator or an iterable, optional Determines the cross-validation splitting strategy. Possible inputs for cv are: - None, to use the default 3-fold cross validation, - integer, to specify the number of folds in a `(Stratified)KFold`, - An object to be used as a cross-validation generator. - An iterable yielding train, test splits. For integer/None inputs, if the estimator is a classifier and ``y`` is either binary or multiclass, :class:`StratifiedKFold` used. In all other cases, :class:`KFold` is used. Refer :ref:`User Guide <cross_validation>` for the various cross-validation strategies that can be used here. n_jobs : integer, optional The number of CPUs to use to do the computation. -1 means 'all CPUs'. verbose : integer, optional The verbosity level. fit_params : dict, optional Parameters to pass to the fit method of the estimator. pre_dispatch : int, or string, optional Controls the number of jobs that get dispatched during parallel execution. Reducing this number can be useful to avoid an explosion of memory consumption when more jobs get dispatched than CPUs can process. This parameter can be: - None, in which case all the jobs are immediately created and spawned. Use this for lightweight and fast-running jobs, to avoid delays due to on-demand spawning of the jobs - An int, giving the exact number of total jobs that are spawned - A string, giving an expression as a function of n_jobs, as in '2*n_jobs' Returns ------- scores : array of float, shape=(len(list(cv)),) Array of scores of the estimator for each run of the cross validation. See Also --------- :func:`sklearn.metrics.make_scorer`: Make a scorer from a performance metric or loss function. """ X, y, labels = indexable(X, y, labels) cv = check_cv(cv, y, classifier=is_classifier(estimator)) scorer = check_scoring(estimator, scoring=scoring) # We clone the estimator to make sure that all the folds are # independent, and that it is pickle-able. parallel = Parallel(n_jobs=n_jobs, verbose=verbose, pre_dispatch=pre_dispatch) scores = parallel(delayed(_fit_and_score)(clone(estimator), X, y, scorer, train, test, verbose, None, fit_params) for train, test in cv.split(X, y, labels)) return np.array(scores)[:, 0]
def cross_validate(estimator, X, y=None, groups=None, scoring=None, cv=None, n_jobs=1, verbose=0, fit_params=None, pre_dispatch='2*n_jobs', return_train_score="warn"): """Evaluate metric(s) by cross-validation and also record fit/score times. Read more in the :ref:`User Guide <multimetric_cross_validation>`. Parameters ---------- estimator : estimator object implementing 'fit' The object to use to fit the data. X : array-like The data to fit. Can be for example a list, or an array. y : array-like, optional, default: None The target variable to try to predict in the case of supervised learning. groups : array-like, with shape (n_samples,), optional Group labels for the samples used while splitting the dataset into train/test set. scoring : string, callable, list/tuple, dict or None, default: None A single string (see :ref:`scoring_parameter`) or a callable (see :ref:`scoring`) to evaluate the predictions on the test set. For evaluating multiple metrics, either give a list of (unique) strings or a dict with names as keys and callables as values. NOTE that when using custom scorers, each scorer should return a single value. Metric functions returning a list/array of values can be wrapped into multiple scorers that return one value each. See :ref:`multimetric_grid_search` for an example. If None, the estimator's default scorer (if available) is used. cv : int, cross-validation generator or an iterable, optional Determines the cross-validation splitting strategy. Possible inputs for cross_validators are: - None, to use the default 3-fold cross validation, - integer, to specify the number of folds in a `(Stratified)KFold`, - An object to be used as a cross-validation generator. - An iterable yielding train, test splits. For integer/None inputs, if the estimator is a classifier and ``y`` is either binary or multiclass, :class:`StratifiedKFold` is used. In all other cases, :class:`KFold` is used. Refer :ref:`User Guide <cross_validation>` for the various cross-validation strategies that can be used here. n_jobs : integer, optional The number of CPUs to use to do the computation. -1 means 'all CPUs'. verbose : integer, optional The verbosity level. fit_params : dict, optional Parameters to pass to the fit method of the estimator. pre_dispatch : int, or string, optional Controls the number of jobs that get dispatched during parallel execution. Reducing this number can be useful to avoid an explosion of memory consumption when more jobs get dispatched than CPUs can process. This parameter can be: - None, in which case all the jobs are immediately created and spawned. Use this for lightweight and fast-running jobs, to avoid delays due to on-demand spawning of the jobs - An int, giving the exact number of total jobs that are spawned - A string, giving an expression as a function of n_jobs, as in '2*n_jobs' return_train_score : boolean, optional Whether to include train decision_scores. Current default is ``'warn'``, which behaves as ``True`` in addition to raising a warning when a training score is looked up. That default will be changed to ``False`` in 0.21. Computing training decision_scores is used to get insights on how different parameter settings impact the overfitting/underfitting trade-off. However computing the decision_scores on the training set can be computationally expensive and is not strictly required to select the parameters that yield the best generalization performance. Returns ------- decision_scores : dict of float arrays of shape=(n_splits,) Array of results of the estimator for each run of the cross validation. A dict of arrays containing the score/time arrays for each scorer is returned. The possible keys for this ``dict`` are: ``test_score`` The score array for test decision_scores on each cross_validators split. ``train_score`` The score array for train decision_scores on each cross_validators split. This is available only if ``return_train_score`` parameter is ``True``. ``fit_time`` The time for fitting the estimator on the train set for each cross_validators split. ``score_time`` The time for scoring the estimator on the test set for each cross_validators split. (Note time for scoring on the train set is not included even if ``return_train_score`` is set to ``True`` ``estimator`` A list of estimator objects, one for each training dataset. Examples -------- >>> from sklearn import datasets, linear_model >>> from sklearn.model_selection import cross_validate >>> from sklearn.metrics.scorer import make_scorer >>> from sklearn.metrics import confusion_matrix >>> from sklearn.svm import LinearSVC >>> diabetes = datasets.load_diabetes() >>> X = diabetes.data[:150] >>> y = diabetes.target[:150] >>> lasso = linear_model.Lasso() Single metric evaluation using ``cross_validate`` >>> cv_results = cross_validate(lasso, X, y, return_train_score=False) >>> sorted(cv_results.keys()) # doctest: +ELLIPSIS ['fit_time', 'score_time', 'test_score'] >>> cv_results['test_score'] # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE array([ 0.33..., 0.08..., 0.03...]) Multiple metric evaluation using ``cross_validate`` (please refer the ``scoring`` parameter doc for more information) >>> decision_scores = cross_validate(lasso, X, y, ... scoring=('r2', 'neg_mean_squared_error')) >>> print(decision_scores['test_neg_mean_squared_error']) # doctest: +ELLIPSIS [-3635.5... -3573.3... -6114.7...] >>> print(decision_scores['train_r2']) # doctest: +ELLIPSIS [ 0.28... 0.39... 0.22...] See Also --------- :func:`sklearn.model_selection.cross_val_score`: Run cross-validation for single metric evaluation. :func:`sklearn.metrics.make_scorer`: Make a scorer from a performance metric or loss function. """ X, y, groups = indexable(X, y, groups) cv = check_cv(cv, y, classifier=is_classifier(estimator)) scorers, _ = _check_multimetric_scoring(estimator, scoring=scoring) # We clone the estimator to make sure that all the folds are # independent, and that it is pickle-able. parallel = Parallel(n_jobs=n_jobs, verbose=verbose, pre_dispatch=pre_dispatch) # ---------------------- My Hack ----------------------- # # 1) Set parameter `error_score=-1` to `_fit_and_score` # # 2) Created an argument `return_estimator` to # # `_fit_and_score` # # ------------------------------------------------------ # scores = parallel( delayed(_fit_and_score)(clone(estimator), X, y, scorers, train, test, verbose, None, fit_params, return_train_score=return_train_score, return_times=True, return_estimator=True, error_score=-1) for train, test in cv.split(X, y, groups)) if return_train_score: train_scores, test_scores, fit_times, score_times, estimators = zip( *scores) train_scores = _aggregate_score_dicts(train_scores) else: test_scores, fit_times, score_times, estimators = zip(*scores) test_scores = _aggregate_score_dicts(test_scores) # TODO: replace by a dict in 0.21 ret = DeprecationDict() if return_train_score == 'warn' else {} ret['fit_time'] = np.array(fit_times) ret['score_time'] = np.array(score_times) ret['estimator'] = list(estimators) for name in scorers: ret['test_%s' % name] = np.array(test_scores[name]) if return_train_score: key = 'train_%s' % name ret[key] = np.array(train_scores[name]) if return_train_score == 'warn': message = ( 'You are accessing a training score ({!r}), ' 'which will not be available by default ' 'any more in 0.21. If you need training decision_scores, ' 'please set return_train_score=True').format(key) # warn on key access ret.add_warning(key, message, FutureWarning) ret['cross_validator'] = cv return ret
def permutation_test_score(estimator, X, y, data_train=None, cv=None, n_permutations=100, n_jobs=1, labels=None, random_state=0, verbose=0, scoring=None): """Evaluate the significance of a cross-validated score with permutations Parameters ---------- estimator : estimator object implementing 'fit' The object to use to fit the data. X : array-like of shape at least 2D The data to fit. y : array-like The target variable to try to predict in the case of supervised learning. data_train : np.array, optional Data to train on, if data for training is different from X. scoring : string, callable or None, optional, default: None A string (see model evaluation documentation) or a scorer callable object / function with signature ``scorer(estimator, X, y)``. cv : integer or cross-validation generator, optional If an integer is passed, it is the number of fold (default 3). Specific cross-validation objects can be passed, see sklearn.cross_validation module for the list of possible objects. n_permutations : integer, optional Number of times to permute ``y``. n_jobs : integer, optional The number of CPUs to use to do the computation. -1 means 'all CPUs'. labels : array-like of shape [n_samples] (optional) Labels constrain the permutation among groups of samples with a same label. random_state : RandomState or an int seed (0 by default) A random number generator instance to define the state of the random permutations generator. verbose : integer, optional The verbosity level. Returns ------- score : float The true score without permuting targets. permutation_scores : array, shape = [n_permutations] The scores obtained for each permutations. pvalue : float The returned value equals p-value if `score_func` returns bigger numbers for better scores (e.g., accuracy_score). If `score_func` is rather a loss function (i.e. when lower is better such as with `mean_squared_error`) then this is actually the complement of the p-value: 1 - p-value. Notes ----- This function implements Test 1 in: Ojala and Garriga. Permutation Tests for Studying Classifier Performance. The Journal of Machine Learning Research (2010) vol. 11 """ X, y = indexable(X, y) cv = _check_cv(cv, X, y, classifier=is_classifier(estimator)) scorer = check_scoring(estimator, scoring=scoring) random_state = check_random_state(random_state) if data_train is None: # We clone the estimator to make sure that all the folds are # independent, and that it is pickle-able. # Default behavior of sklearn permutation score score = _permutation_test_score(clone(estimator), X, y, cv, scorer) permutation_scores = Parallel(n_jobs=n_jobs, verbose=verbose)( delayed(_permutation_test_score)( clone(estimator), X, _shuffle(y, labels, random_state), cv, scorer) for _ in range(n_permutations)) else: # Modification for 2pn # First get the real score, train on nii_optional (actor), test on nii_func (observer) score = [] for train, test in cv: estimator.fit(data_train[train], y[train]) score.append(scorer(estimator, X[test], y[test])) score = np.mean(score) # Then, get the prmutation scores permutation_scores = Parallel(n_jobs=n_jobs, verbose=verbose)( delayed(_permutation_test_score)( clone(estimator), X, _shuffle(y, labels, random_state), cv, scorer, data_train) for _ in range(n_permutations)) permutation_scores = np.array(permutation_scores) pvalue = (np.sum(permutation_scores >= score) + 1.0) / (n_permutations + 1) return score, permutation_scores, pvalue
def gap_train_test_split(*arrays, **options): """Split arrays or matrices into random train and test subsets (with a gap) Parameters ---------- *arrays : sequence of indexables with same length / shape[0] Allowed inputs are lists, numpy arrays, scipy-sparse matrices or pandas dataframes. gap_size : float or int, default=0 If float, should be between 0.0 and 1.0 and represent the proportion of the dataset between the training and the test set. If int, represents the absolute number of the dropped samples. test_size : float, int, or None, default=None If float, should be between 0.0 and 1.0 and equal to test / (train + test). If int, represents the absolute number of test samples. If None, the value is set to the complement of the train size and the gap. If `train_size` is also None, it will be set to 0.25. train_size : float, int, or None, default=None If float, should be between 0.0 and 1.0 and equal to train / (train + test). If int, represents the absolute number of train samples. If None, the value is automatically set to the complement of the test size and the gap size. Returns ------- splitting : list, length=2 * len(arrays) List containing train-test split of inputs. Examples -------- >>> import numpy as np >>> from tscv import gap_train_test_split >>> X, y = np.arange(10).reshape((5, 2)), range(5) >>> X array([[0, 1], [2, 3], [4, 5], [6, 7], [8, 9]]) >>> list(y) [0, 1, 2, 3, 4] >>> X_train, X_test, y_train, y_test = gap_train_test_split( ... X, y, test_size=0.33, gap_size=1) ... >>> X_train array([[0, 1], [2, 3], [4, 5]]) >>> y_train [0, 1, 2] >>> X_test array([[8, 9]]) >>> y_test [4] >>> gap_train_test_split(list(range(10)), gap_size=0.1) [[0, 1, 2, 3, 4, 5, 6], [8, 9]] """ n_arrays = len(arrays) if n_arrays == 0: raise ValueError("At least one array required as input") check_consistent_length(*arrays) test_size = options.pop('test_size', None) train_size = options.pop('train_size', None) gap_size = options.pop('gap_size', 0) if not isinstance(gap_size, numbers.Real): raise TypeError("The gap size should be a real number.") if options: raise TypeError("Invalid parameters passed: %s. \n" "Check the spelling of keyword parameters." % str(options)) arrays = indexable(*arrays) n_samples = _num_samples(arrays[0]) def size_to_number(size, n): b, a = modf(size) return int(max(a, round(b * n))) n_gap = size_to_number(gap_size, n_samples) n_remain = n_samples - n_gap if test_size is None and train_size is None: test_size = 0.25 if train_size is None: n_test = size_to_number(test_size, n_remain) n_train = n_remain - n_test elif test_size is None: n_train = size_to_number(train_size, n_remain) n_test = n_remain - n_train else: warnings.warn( "The train_size argument is overridden by test_size; " "in case of nonzero gap_size, " "an explicit value should be provided " "and cannot be implied by 1 - train_size - test_size.", Warning) n_test = size_to_number(test_size, n_remain) n_train = n_remain - n_test train = np.arange(n_train) test = np.arange(n_train + n_gap, n_samples) return list( chain.from_iterable((_safe_indexing(a, train), _safe_indexing(a, test)) for a in arrays))
def learning_curve(estimator, X, y, labels=None, train_sizes=np.linspace(0.1, 1.0, 5), cv=None, scoring=None, exploit_incremental_learning=False, n_jobs=1, pre_dispatch="all", verbose=0): """Learning curve. Determines cross-validated training and test scores for different training set sizes. A cross-validation generator splits the whole dataset k times in training and test data. Subsets of the training set with varying sizes will be used to train the estimator and a score for each training subset size and the test set will be computed. Afterwards, the scores will be averaged over all k runs for each training subset size. Read more in the :ref:`User Guide <learning_curve>`. Parameters ---------- estimator : object type that implements the "fit" and "predict" methods An object of that type which is cloned for each validation. X : array-like, shape (n_samples, n_features) Training vector, where n_samples is the number of samples and n_features is the number of features. y : array-like, shape (n_samples) or (n_samples, n_features), optional Target relative to X for classification or regression; None for unsupervised learning. labels : array-like, with shape (n_samples,), optional Group labels for the samples used while splitting the dataset into train/test set. train_sizes : array-like, shape (n_ticks,), dtype float or int Relative or absolute numbers of training examples that will be used to generate the learning curve. If the dtype is float, it is regarded as a fraction of the maximum size of the training set (that is determined by the selected validation method), i.e. it has to be within (0, 1]. Otherwise it is interpreted as absolute sizes of the training sets. Note that for classification the number of samples usually have to be big enough to contain at least one sample from each class. (default: np.linspace(0.1, 1.0, 5)) cv : int, cross-validation generator or an iterable, optional Determines the cross-validation splitting strategy. Possible inputs for cv are: - None, to use the default 3-fold cross validation, - integer, to specify the number of folds in a `(Stratified)KFold`, - An object to be used as a cross-validation generator. - An iterable yielding train, test splits. For integer/None inputs, if the estimator is a classifier and ``y`` is either binary or multiclass, :class:`StratifiedKFold` used. In all other cases, :class:`KFold` is used. Refer :ref:`User Guide <cross_validation>` for the various cross-validation strategies that can be used here. scoring : string, callable or None, optional, default: None A string (see model evaluation documentation) or a scorer callable object / function with signature ``scorer(estimator, X, y)``. exploit_incremental_learning : boolean, optional, default: False If the estimator supports incremental learning, this will be used to speed up fitting for different training set sizes. n_jobs : integer, optional Number of jobs to run in parallel (default 1). pre_dispatch : integer or string, optional Number of predispatched jobs for parallel execution (default is all). The option can reduce the allocated memory. The string can be an expression like '2*n_jobs'. verbose : integer, optional Controls the verbosity: the higher, the more messages. Returns ------- train_sizes_abs : array, shape = (n_unique_ticks,), dtype int Numbers of training examples that has been used to generate the learning curve. Note that the number of ticks might be less than n_ticks because duplicate entries will be removed. train_scores : array, shape (n_ticks, n_cv_folds) Scores on training sets. test_scores : array, shape (n_ticks, n_cv_folds) Scores on test set. Notes ----- See :ref:`examples/model_selection/plot_learning_curve.py <example_model_selection_plot_learning_curve.py>` """ if exploit_incremental_learning and not hasattr(estimator, "partial_fit"): raise ValueError("An estimator must support the partial_fit interface " "to exploit incremental learning") X, y, labels = indexable(X, y, labels) cv = check_cv(cv, y, classifier=is_classifier(estimator)) cv_iter = cv.split(X, y, labels) # Make a list since we will be iterating multiple times over the folds cv_iter = list(cv_iter) scorer = check_scoring(estimator, scoring=scoring) n_max_training_samples = len(cv_iter[0][0]) # Because the lengths of folds can be significantly different, it is # not guaranteed that we use all of the available training data when we # use the first 'n_max_training_samples' samples. train_sizes_abs = _translate_train_sizes(train_sizes, n_max_training_samples) n_unique_ticks = train_sizes_abs.shape[0] if verbose > 0: print("[learning_curve] Training set sizes: " + str(train_sizes_abs)) parallel = Parallel(n_jobs=n_jobs, pre_dispatch=pre_dispatch, verbose=verbose) if exploit_incremental_learning: classes = np.unique(y) if is_classifier(estimator) else None out = parallel(delayed(_incremental_fit_estimator)( clone(estimator), X, y, classes, train, test, train_sizes_abs, scorer, verbose) for train, test in cv.split(X, y, labels)) else: out = parallel(delayed(_fit_and_score)( clone(estimator), X, y, scorer, train[:n_train_samples], test, verbose, parameters=None, fit_params=None, return_train_score=True) for train, test in cv_iter for n_train_samples in train_sizes_abs) out = np.array(out)[:, :2] n_cv_folds = out.shape[0] // n_unique_ticks out = out.reshape(n_cv_folds, n_unique_ticks, 2) out = np.asarray(out).transpose((2, 1, 0)) return train_sizes_abs, out[0], out[1]
def permutation_test_score(estimator, X, y, labels=None, cv=None, n_permutations=100, n_jobs=1, random_state=0, verbose=0, scoring=None): """Evaluate the significance of a cross-validated score with permutations Read more in the :ref:`User Guide <cross_validation>`. Parameters ---------- estimator : estimator object implementing 'fit' The object to use to fit the data. X : array-like of shape at least 2D The data to fit. y : array-like The target variable to try to predict in the case of supervised learning. labels : array-like, with shape (n_samples,), optional Group labels for the samples used while splitting the dataset into train/test set. scoring : string, callable or None, optional, default: None A string (see model evaluation documentation) or a scorer callable object / function with signature ``scorer(estimator, X, y)``. cv : int, cross-validation generator or an iterable, optional Determines the cross-validation splitting strategy. Possible inputs for cv are: - None, to use the default 3-fold cross validation, - integer, to specify the number of folds in a `(Stratified)KFold`, - An object to be used as a cross-validation generator. - An iterable yielding train, test splits. For integer/None inputs, if the estimator is a classifier and ``y`` is either binary or multiclass, :class:`StratifiedKFold` used. In all other cases, :class:`KFold` is used. Refer :ref:`User Guide <cross_validation>` for the various cross-validation strategies that can be used here. n_permutations : integer, optional Number of times to permute ``y``. n_jobs : integer, optional The number of CPUs to use to do the computation. -1 means 'all CPUs'. random_state : RandomState or an int seed (0 by default) A random number generator instance to define the state of the random permutations generator. verbose : integer, optional The verbosity level. Returns ------- score : float The true score without permuting targets. permutation_scores : array, shape (n_permutations,) The scores obtained for each permutations. pvalue : float The returned value equals p-value if `scoring` returns bigger numbers for better scores (e.g., accuracy_score). If `scoring` is rather a loss function (i.e. when lower is better such as with `mean_squared_error`) then this is actually the complement of the p-value: 1 - p-value. Notes ----- This function implements Test 1 in: Ojala and Garriga. Permutation Tests for Studying Classifier Performance. The Journal of Machine Learning Research (2010) vol. 11 """ X, y, labels = indexable(X, y, labels) cv = check_cv(cv, y, classifier=is_classifier(estimator)) scorer = check_scoring(estimator, scoring=scoring) random_state = check_random_state(random_state) # We clone the estimator to make sure that all the folds are # independent, and that it is pickle-able. score = _permutation_test_score(clone(estimator), X, y, labels, cv, scorer) permutation_scores = Parallel(n_jobs=n_jobs, verbose=verbose)( delayed(_permutation_test_score)( clone(estimator), X, _shuffle(y, labels, random_state), labels, cv, scorer) for _ in range(n_permutations)) permutation_scores = np.array(permutation_scores) pvalue = (np.sum(permutation_scores >= score) + 1.0) / (n_permutations + 1) return score, permutation_scores, pvalue
def _wrapped_cross_val_score(sklearn_pipeline, features, target, cv, scoring_function, sample_weight=None, groups=None, use_dask=False, predictions=None, pipelines=None, features_test=None, random_state=None): """Fit estimator and compute scores for a given dataset split. Parameters ---------- sklearn_pipeline : pipeline object implementing 'fit' The object to use to fit the data. features : array-like of shape at least 2D The data to fit. target : array-like, optional, default: None The target variable to try to predict in the case of supervised learning. cv: int or cross-validation generator If CV is a number, then it is the number of folds to evaluate each pipeline over in k-fold cross-validation during the TPOT optimization process. If it is an object then it is an object to be used as a cross-validation generator. scoring_function : callable A scorer callable object / function with signature ``scorer(estimator, X, y)``. sample_weight : array-like, optional List of sample weights to balance (or un-balanace) the dataset target as needed groups: array-like {n_samples, }, optional Group labels for the samples used while splitting the dataset into train/test set use_dask : bool, default False Whether to use dask """ # Re-set random seeds inside the threads if random_state is not None: random.seed(random_state) # deap uses random np.random.seed(random_state) sample_weight_dict = set_sample_weight(sklearn_pipeline.steps, sample_weight) features, target, groups = indexable(features, target, groups) cv = check_cv(cv, target, classifier=is_classifier(sklearn_pipeline)) cv_iter = list(cv.split(features, target, groups)) scorer = check_scoring(sklearn_pipeline, scoring=scoring_function) # save the sklearn predictions. The model is trained with the training set (features) and validated with the test dataset # (features_test) # Note: because of the way TPOT is built, the fit function is called to see if the model is valid. try: tmp = sklearn_pipeline.fit(features, target) predictions.append(tmp.predict(features_test)) pipelines.append(sklearn_pipeline) except: pass if use_dask: try: import dask_ml.model_selection # noqa import dask # noqa from dask.delayed import Delayed except ImportError: msg = "'use_dask' requires the optional dask and dask-ml depedencies." raise ImportError(msg) dsk, keys, n_splits = dask_ml.model_selection._search.build_graph( estimator=sklearn_pipeline, cv=cv, scorer=scorer, candidate_params=[{}], X=features, y=target, groups=groups, fit_params=sample_weight_dict, refit=False, error_score=float('-inf'), ) cv_results = Delayed(keys[0], dsk) scores = [cv_results['split{}_test_score'.format(i)] for i in range(n_splits)] CV_score = dask.delayed(np.array)(scores)[:, 0] return dask.delayed(np.nanmean)(CV_score) else: try: with warnings.catch_warnings(): warnings.simplefilter('ignore') scores = [_fit_and_score(estimator=clone(sklearn_pipeline), X=features, y=target, scorer=scorer, train=train, test=test, verbose=0, parameters=None, fit_params=sample_weight_dict) for train, test in cv_iter] CV_score = np.array(scores)[:, 0] return np.nanmean(CV_score) except TimeoutException: return "Timeout" except Exception as e: return -float('inf')
def validation_curve(estimator, X, y, param_name, param_range, labels=None, cv=None, scoring=None, n_jobs=1, pre_dispatch="all", verbose=0): """Validation curve. Determine training and test scores for varying parameter values. Compute scores for an estimator with different values of a specified parameter. This is similar to grid search with one parameter. However, this will also compute training scores and is merely a utility for plotting the results. Read more in the :ref:`User Guide <learning_curve>`. Parameters ---------- estimator : object type that implements the "fit" and "predict" methods An object of that type which is cloned for each validation. X : array-like, shape (n_samples, n_features) Training vector, where n_samples is the number of samples and n_features is the number of features. y : array-like, shape (n_samples) or (n_samples, n_features), optional Target relative to X for classification or regression; None for unsupervised learning. param_name : string Name of the parameter that will be varied. param_range : array-like, shape (n_values,) The values of the parameter that will be evaluated. labels : array-like, with shape (n_samples,), optional Group labels for the samples used while splitting the dataset into train/test set. cv : int, cross-validation generator or an iterable, optional Determines the cross-validation splitting strategy. Possible inputs for cv are: - None, to use the default 3-fold cross validation, - integer, to specify the number of folds in a `(Stratified)KFold`, - An object to be used as a cross-validation generator. - An iterable yielding train, test splits. For integer/None inputs, if the estimator is a classifier and ``y`` is either binary or multiclass, :class:`StratifiedKFold` used. In all other cases, :class:`KFold` is used. Refer :ref:`User Guide <cross_validation>` for the various cross-validation strategies that can be used here. scoring : string, callable or None, optional, default: None A string (see model evaluation documentation) or a scorer callable object / function with signature ``scorer(estimator, X, y)``. n_jobs : integer, optional Number of jobs to run in parallel (default 1). pre_dispatch : integer or string, optional Number of predispatched jobs for parallel execution (default is all). The option can reduce the allocated memory. The string can be an expression like '2*n_jobs'. verbose : integer, optional Controls the verbosity: the higher, the more messages. Returns ------- train_scores : array, shape (n_ticks, n_cv_folds) Scores on training sets. test_scores : array, shape (n_ticks, n_cv_folds) Scores on test set. Notes ----- See :ref:`examples/model_selection/plot_validation_curve.py <example_model_selection_plot_validation_curve.py>` """ X, y, labels = indexable(X, y, labels) cv = check_cv(cv, y, classifier=is_classifier(estimator)) scorer = check_scoring(estimator, scoring=scoring) parallel = Parallel(n_jobs=n_jobs, pre_dispatch=pre_dispatch, verbose=verbose) out = parallel( delayed(_fit_and_score)(estimator, X, y, scorer, train, test, verbose, parameters={ param_name: v }, fit_params=None, return_train_score=True) for train, test in cv.split(X, y, labels) for v in param_range) out = np.asarray(out)[:, :2] n_params = len(param_range) n_cv_folds = out.shape[0] // n_params out = out.reshape(n_cv_folds, n_params, 2).transpose((2, 1, 0)) return out[0], out[1]
def cross_val_decision_function(estimator, X, y=None, cv=None, n_jobs=1, verbose=0, fit_params=None, pre_dispatch='2*n_jobs'): """Generate cross-validated estimates for each input data point Read more in the :ref:`User Guide <cross_validation>`. Parameters ---------- estimator : estimator object implementing 'fit' and 'predict' The object to use to fit the data. X : array-like The data to fit. Can be, for example a list, or an array at least 2d. y : array-like, optional, default: None The target variable to try to predict in the case of supervised learning. cv : int, cross-validation generator or an iterable, optional Determines the cross-validation splitting strategy. Possible inputs for cv are: - None, to use the default 3-fold cross-validation, - integer, to specify the number of folds. - An object to be used as a cross-validation generator. - An iterable yielding train/test splits. For integer/None inputs, if ``y`` is binary or multiclass, :class:`StratifiedKFold` used. If the estimator is a classifier or if ``y`` is neither binary nor multiclass, :class:`KFold` is used. Refer :ref:`User Guide <cross_validation>` for the various cross-validation strategies that can be used here. n_jobs : integer, optional The number of CPUs to use to do the computation. -1 means 'all CPUs'. verbose : integer, optional The verbosity level. fit_params : dict, optional Parameters to pass to the fit method of the estimator. pre_dispatch : int, or string, optional Controls the number of jobs that get dispatched during parallel execution. Reducing this number can be useful to avoid an explosion of memory consumption when more jobs get dispatched than CPUs can process. This parameter can be: - None, in which case all the jobs are immediately created and spawned. Use this for lightweight and fast-running jobs, to avoid delays due to on-demand spawning of the jobs - An int, giving the exact number of total jobs that are spawned - A string, giving an expression as a function of n_jobs, as in '2*n_jobs' Returns ------- preds : ndarray This is the result of calling 'predict' """ X, y = indexable(X, y) cv = check_cv(cv, X, y, classifier=is_classifier(estimator)) # We clone the estimator to make sure that all the folds are # independent, and that it is pickle-able. parallel = Parallel(n_jobs=n_jobs, verbose=verbose, pre_dispatch=pre_dispatch) preds_blocks = parallel(delayed(_fit_and_predict)(clone(estimator), X, y, train, test, verbose, fit_params) for train, test in cv) preds = [p for p, _ in preds_blocks] locs = np.concatenate([loc for _, loc in preds_blocks]) if not _check_is_partition(locs, _num_samples(X)): raise ValueError('cross_val_predict only works for partitions') inv_locs = np.empty(len(locs), dtype=int) inv_locs[locs] = np.arange(len(locs)) # Check for sparse predictions if sp.issparse(preds[0]): preds = sp.vstack(preds, format=preds[0].format) else: preds = np.concatenate(preds) return preds[inv_locs]
def _wrapped_cross_val_score(sklearn_pipeline, features, target, cv, scoring_function, sample_weight=None, groups=None, use_dask=False): """Fit estimator and compute scores for a given dataset split. Parameters ---------- sklearn_pipeline : pipeline object implementing 'fit' The object to use to fit the data. features : array-like of shape at least 2D The data to fit. target : array-like, optional, default: None The target variable to try to predict in the case of supervised learning. cv: int or cross-validation generator If CV is a number, then it is the number of folds to evaluate each pipeline over in k-fold cross-validation during the TPOT optimization process. If it is an object then it is an object to be used as a cross-validation generator. scoring_function : callable A scorer callable object / function with signature ``scorer(estimator, X, y)``. sample_weight : array-like, optional List of sample weights to balance (or un-balanace) the dataset target as needed groups: array-like {n_samples, }, optional Group labels for the samples used while splitting the dataset into train/test set use_dask : bool, default False Whether to use dask """ sample_weight_dict = set_sample_weight(sklearn_pipeline.steps, sample_weight) features, target, groups = indexable(features, target, groups) cv = check_cv(cv, target, classifier=is_classifier(sklearn_pipeline)) cv_iter = list(cv.split(features, target, groups)) scorer = check_scoring(sklearn_pipeline, scoring=scoring_function) if use_dask: try: import dask_ml.model_selection # noqa import dask # noqa from dask.delayed import Delayed except ImportError: msg = "'use_dask' requires the optional dask and dask-ml depedencies." raise ImportError(msg) dsk, keys, n_splits = dask_ml.model_selection._search.build_graph( estimator=sklearn_pipeline, cv=cv, scorer=scorer, candidate_params=[{}], X=features, y=target, groups=groups, fit_params=sample_weight_dict, refit=False, error_score=float('-inf'), ) cv_results = Delayed(keys[0], dsk) scores = [cv_results['split{}_test_score'.format(i)] for i in range(n_splits)] CV_score = dask.delayed(np.array)(scores)[:, 0] return dask.delayed(np.nanmean)(CV_score) else: try: with warnings.catch_warnings(): warnings.simplefilter('ignore') scores = [_fit_and_score(estimator=clone(sklearn_pipeline), X=features, y=target, scorer=scorer, train=train, test=test, verbose=0, parameters=None, fit_params=sample_weight_dict) for train, test in cv_iter] CV_score = np.array(scores)[:, 0] return np.nanmean(CV_score) except TimeoutException: return "Timeout" except Exception as e: return -float('inf')
def train_test_split(*arrays, **options): """Extend sklearn.model_selection.train_test_slit to have group split. Parameters ---------- *arrays : sequence of indexables with same length / shape[0] Allowed inputs are lists, numpy arrays, scipy-sparse matrices or pandas dataframes. test_size : float, int or None, optional (default=None) If float, should be between 0.0 and 1.0 and represent the proportion of the dataset to include in the test split. If int, represents the absolute number of test samples. If None, the value is set to the complement of the train size. If ``train_size`` is also None, it will be set to 0.25. train_size : float, int, or None, (default=None) If float, should be between 0.0 and 1.0 and represent the proportion of the dataset to include in the train split. If int, represents the absolute number of train samples. If None, the value is automatically set to the complement of the test size. random_state : int, RandomState instance or None, optional (default=None) If int, random_state is the seed used by the random number generator; If RandomState instance, random_state is the random number generator; If None, the random number generator is the RandomState instance used by `np.random`. shuffle : None or str (default='simple') How to shuffle the data before splitting. None, no shuffle. For str, one of 'simple', 'stratified' and 'group', corresponding to `ShuffleSplit`, `StratifiedShuffleSplit` and `GroupShuffleSplit`, respectively. labels : array-like or None (default=None) Ignored if shuffle is None or 'simple'. When shuffle='stratified', this array is used as class labels. When shuffle='group', this array is used as groups. Returns ------- splitting : list, length=2 * len(arrays) List containing train-test split of inputs. """ n_arrays = len(arrays) if n_arrays == 0: raise ValueError("At least one array required as input") test_size = options.pop('test_size', None) train_size = options.pop('train_size', None) random_state = options.pop('random_state', None) shuffle = options.pop('shuffle', 'simple') labels = options.pop('labels', None) if options: raise TypeError("Invalid parameters passed: %s" % str(options)) arrays = indexable(*arrays) n_samples = _num_samples(arrays[0]) if shuffle == 'group': if labels is None: raise ValueError("When shuffle='group', " "labels should not be None!") labels = check_array(labels, ensure_2d=False, dtype=None) uniques = np.unique(labels) n_samples = uniques.size n_train, n_test = _validate_shuffle_split(n_samples, test_size, train_size, default_test_size=0.25) shuffle_options = dict(test_size=n_test, train_size=n_train, random_state=random_state) if shuffle is None: if labels is not None: warnings.warn("The `labels` is ignored for " "shuffle being None!") train = np.arange(n_train) test = np.arange(n_train, n_train + n_test) elif shuffle == 'simple': if labels is not None: warnings.warn("The `labels` is not needed and therefore " "ignored for ShuffleSplit, as shuffle='simple'!") cv = ShuffleSplit(**shuffle_options) train, test = next(cv.split(X=arrays[0], y=None)) elif shuffle == 'stratified': cv = StratifiedShuffleSplit(**shuffle_options) train, test = next(cv.split(X=arrays[0], y=labels)) elif shuffle == 'group': cv = GroupShuffleSplit(**shuffle_options) train, test = next(cv.split(X=arrays[0], y=None, groups=labels)) else: raise ValueError("The argument `shuffle` only supports None, " "'simple', 'stratified' and 'group', but got `%s`!" % shuffle) return list(chain.from_iterable((safe_indexing(a, train), safe_indexing(a, test)) for a in arrays))