Ejemplo n.º 1
0
def cross_validate(estimator, X, mixed_y=None, groups=None, scoring=None, cv=None,
                   n_jobs=1, verbose=0, fit_params=None,
                   pre_dispatch='2*n_jobs', return_train_score="warn"):
    """Evaluate metric(s) by cross-validation and also record fit/score times."""

    # TODO: wrapper patch, key hard coding?
    _y = mixed_y['classifier'] if isinstance(mixed_y, dict) else mixed_y

    X, y, groups = indexable(X, _y, groups)
    cv = check_cv(cv, y, classifier=is_classifier(estimator))

    scorers, _ = _check_multimetric_scoring(estimator, scoring=scoring)

    # We clone the estimator to make sure that all the folds are
    # independent, and that it is pickle-able.
    parallel = Parallel(n_jobs=n_jobs, verbose=verbose,
                        pre_dispatch=pre_dispatch)
    scores = parallel(
        delayed(_fit_and_score)(
            clone(estimator), X, mixed_y, scorers, train, test, verbose, None,
            fit_params, return_train_score=return_train_score,
            return_times=True)
        for train, test in cv.split(X, y, groups))

    if return_train_score:
        train_scores, test_scores, fit_times, score_times = zip(*scores)
        train_scores = _aggregate_score_dicts(train_scores)
    else:
        test_scores, fit_times, score_times = zip(*scores)
    test_scores = _aggregate_score_dicts(test_scores)

    # TODO: replace by a dict in 0.21
    ret = DeprecationDict() if return_train_score == 'warn' else {}
    ret['fit_time'] = np.array(fit_times)
    ret['score_time'] = np.array(score_times)

    for name in scorers:
        ret['test_%s' % name] = np.array(test_scores[name])
        if return_train_score:
            key = 'train_%s' % name
            ret[key] = np.array(train_scores[name])
            if return_train_score == 'warn':
                message = (
                    'You are accessing a training score ({!r}), '
                    'which will not be available by default '
                    'any more in 0.21. If you need training scores, '
                    'please set return_train_score=True').format(key)
                # warn on key access
                ret.add_warning(key, message, FutureWarning)

    return ret
Ejemplo n.º 2
0
    def _format_results(candidate_params, scorers, out):
        n_candidates = len(candidate_params)
        (test_score_dicts, fit_time, score_time) = zip(*out)
        test_scores = _aggregate_score_dicts(test_score_dicts)

        results = {}

        def _store(key_name, array, rank=False, greater_is_better=False):
            """A small helper to store the scores/times to the cv_results_"""
            # When iterated first by splits, then by parameters
            # We want `array` to have `n_candidates` rows and `n_splits` cols.
            array = np.array(array, dtype=np.float64)

            results["mean_%s" % key_name] = array

            if rank:
                array = -array if greater_is_better else array
                results["rank_%s" % key_name] = np.asarray(rankdata(
                    array, method="min"),
                                                           dtype=np.int32)

        _store("fit_time", fit_time)
        _store("score_time", score_time)
        # Use one MaskedArray and mask all the places where the param is not
        # applicable for that candidate. Use defaultdict as each candidate may
        # not contain all the params
        param_results = defaultdict(
            partial(
                np.ma.MaskedArray,
                np.empty(n_candidates, ),
                mask=True,
                dtype=object,
            ))
        for cand_i, params in enumerate(candidate_params):
            for name, value in params.items():
                # An all masked empty array gets created for the key
                # `"param_%s" % name` at the first occurrence of `name`.
                # Setting the value at an index also unmasks that index
                param_results["param_%s" % name][cand_i] = value

        results.update(param_results)
        # Store a list of param dicts at the key "params"
        results["params"] = candidate_params

        for scorer_name, scorer in scorers.items():
            # Computed the (weighted) mean and std for test scores alone
            _store(
                "test_%s" % scorer_name,
                test_scores[scorer_name],
                rank=True,
                greater_is_better=scorer.greater_is_better,
            )

        return results
Ejemplo n.º 3
0
def cross_val_score_weighted(estimator,
                             x_data,
                             y_data=None,
                             groups=None,
                             scoring=None,
                             cv=None,
                             n_jobs=None,
                             verbose=0,
                             fit_params=None,
                             pre_dispatch='2*n_jobs',
                             error_score=np.nan,
                             sample_weights=None):
    """Expand :func:`sklearn.model_selection.cross_val_score`."""
    scorer = check_scoring(estimator, scoring=scoring)
    scorer_name = 'score'
    scoring = {scorer_name: scorer}
    x_data, y_data, groups = indexable(x_data, y_data, groups)

    cv = check_cv(cv, y_data, classifier=is_classifier(estimator))
    scorers, _ = _check_multimetric_scoring(estimator, scoring=scoring)

    # We clone the estimator to make sure that all the folds are
    # independent, and that it is pickle-able.
    parallel = Parallel(n_jobs=n_jobs,
                        verbose=verbose,
                        pre_dispatch=pre_dispatch)
    scores = parallel(
        delayed(_fit_and_score_weighted)(clone(estimator),
                                         x_data,
                                         y_data,
                                         scorers,
                                         train,
                                         test,
                                         verbose,
                                         None,
                                         fit_params,
                                         error_score=error_score,
                                         sample_weights=sample_weights)
        for train, test in cv.split(x_data, y_data, groups))

    test_scores = list(zip(*scores))[0]
    test_scores = _aggregate_score_dicts(test_scores)

    return np.array(test_scores[scorer_name])
Ejemplo n.º 4
0
    def fit(self, X, y=None, groups=None, **fit_params):
        """Run fit with all sets of parameters.

        Parameters
        ----------
        X : array-like, shape = [n_samples, n_features]
            Training vector, where n_samples is the number of samples and
            n_features is the number of features.

        y : array-like, shape = [n_samples] or [n_samples, n_output], optional
            Target relative to X for classification or regression;
            None for unsupervised learning.

        groups : array-like, with shape (n_samples,), optional
            Group labels for the samples used while splitting the dataset into
            train/test set.

        **fit_params : dict of string -> object
            Parameters passed to the ``fit`` method of the estimator

        """
        if self.fit_params is not None:
            warnings.warn(
                '"fit_params" as a constructor argument was '
                'deprecated in version 0.19 and will be removed '
                'in version 0.21. Pass fit parameters to the '
                '"fit" method instead.', DeprecationWarning)
            if fit_params:
                warnings.warn(
                    'Ignoring fit_params passed as a constructor '
                    'argument in favor of keyword arguments to '
                    'the "fit" method.', RuntimeWarning)
            else:
                fit_params = self.fit_params
        estimator = self.estimator
        cv = check_cv(self.cv, y, classifier=is_classifier(estimator))

        scorers, self.multimetric_ = _check_multimetric_scoring(
            self.estimator, scoring=self.scoring)

        if self.multimetric_:
            if self.refit is not False and (
                    not isinstance(self.refit, six.string_types) or
                    # This will work for both dict / list (tuple)
                    self.refit not in scorers):
                raise ValueError("For multi-metric scoring, the parameter "
                                 "refit must be set to a scorer key "
                                 "to refit an estimator with the best "
                                 "parameter setting on the whole data and "
                                 "make the best_* attributes "
                                 "available for that metric. If this is not "
                                 "needed, refit should be set to False "
                                 "explicitly. %r was passed." % self.refit)
            else:
                refit_metric = self.refit
        else:
            refit_metric = 'score'

        # X, y, groups = indexable(X, y, groups)
        if groups is not None:
            raise NotImplementedError("groups are not supported")

        # n_splits = cv.get_n_splits(X, y, groups)
        n_splits = min(
            cv.get_n_splits(X_.transpose(1, 2, 0), y_, None)
            for X_, y_ in zip(X, y))

        def generate_index(X_list, y_list):
            split = [
                cv.split(X.transpose(1, 2, 0), y)
                for X, y in zip(X_list, y_list)
            ]
            for i in range(n_splits):
                yield zip(*[next(s) for s in split])

        generate_index_iter = generate_index(X, y)

        # Regenerate parameter iterable for each fit
        candidate_params = list(self._get_param_iterator())
        n_candidates = len(candidate_params)
        if self.verbose > 0:
            print("Fitting {0} folds for each of {1} candidates, totalling"
                  " {2} fits".format(n_splits, n_candidates,
                                     n_candidates * n_splits))

        base_estimator = clone(self.estimator)
        pre_dispatch = self.pre_dispatch

        out = Parallel(
            n_jobs=self.n_jobs,
            verbose=self.verbose,
            pre_dispatch=pre_dispatch)(delayed(_fit_and_score)(
                clone(base_estimator),
                X,
                y,
                scorers,
                train,
                test,
                self.verbose,
                parameters,
                fit_params=fit_params,
                return_train_score=self.return_train_score,
                return_n_test_samples=True,
                return_times=True,
                return_parameters=False,
                error_score=self.error_score,
                return_estimator=True,
                return_idx=True) for parameters, (
                    train,
                    test) in product(candidate_params, generate_index_iter))

        # if one choose to see train score, "out" will contain train score info
        if self.return_train_score:
            (train_score_dicts, test_score_dicts, test_sample_counts, fit_time,
             score_time, estimators, train_idxs, test_idxs) = zip(*out)
        else:
            (test_score_dicts, test_sample_counts, fit_time, score_time,
             estimators, train_idxs, test_idxs) = zip(*out)

        # test_score_dicts and train_score dicts are lists of dictionaries and
        # we make them into dict of lists
        test_scores = _aggregate_score_dicts(test_score_dicts)
        if self.return_train_score:
            train_scores = _aggregate_score_dicts(train_score_dicts)

        # TODO: replace by a dict in 0.21
        results = (DeprecationDict()
                   if self.return_train_score == 'warn' else {})

        def _store(key_name, array, weights=None, splits=False, rank=False):
            """Store the scores/times to the cv_results_."""
            # When iterated first by splits, then by parameters
            # We want `array` to have `n_candidates` rows and `n_splits` cols.
            array = np.array(array,
                             dtype=np.float64).reshape(n_candidates, n_splits)
            if splits:
                for split_i in range(n_splits):
                    # Uses closure to alter the results
                    results["split%d_%s" %
                            (split_i, key_name)] = array[:, split_i]

            array_means = np.average(array, axis=1, weights=weights)
            results['mean_%s' % key_name] = array_means
            # Weighted std is not directly available in numpy
            array_stds = np.sqrt(
                np.average((array - array_means[:, np.newaxis])**2,
                           axis=1,
                           weights=weights))
            results['std_%s' % key_name] = array_stds

            if rank:
                results["rank_%s" % key_name] = np.asarray(rankdata(
                    -array_means, method='min'),
                                                           dtype=np.int32)

        _store('fit_time', fit_time)
        _store('score_time', score_time)
        results['estimators'] = estimators
        results['train_index'] = train_idxs
        results['test_index'] = test_idxs

        # Use one MaskedArray and mask all the places where the param is not
        # applicable for that candidate. Use defaultdict as each candidate may
        # not contain all the params
        param_results = defaultdict(
            partial(MaskedArray,
                    np.empty(n_candidates, ),
                    mask=True,
                    dtype=object))
        for cand_i, params in enumerate(candidate_params):
            for name, value in params.items():
                # An all masked empty array gets created for the key
                # `"param_%s" % name` at the first occurence of `name`.
                # Setting the value at an index also unmasks that index
                param_results["param_%s" % name][cand_i] = value

        results.update(param_results)
        # Store a list of param dicts at the key 'params'
        results['params'] = candidate_params

        # NOTE test_sample counts (weights) remain the same for all candidates
        test_sample_counts = np.array(test_sample_counts[:n_splits],
                                      dtype=np.int)
        for scorer_name in scorers.keys():
            # Computed the (weighted) mean and std for test scores alone
            _store('test_%s' % scorer_name,
                   test_scores[scorer_name],
                   splits=True,
                   rank=True,
                   weights=test_sample_counts if self.iid else None)
            if self.return_train_score:
                prev_keys = set(results.keys())
                _store('train_%s' % scorer_name,
                       train_scores[scorer_name],
                       splits=True)

                if self.return_train_score == 'warn':
                    for key in set(results.keys()) - prev_keys:
                        message = (
                            'You are accessing a training score ({!r}), '
                            'which will not be available by default '
                            'any more in 0.21. If you need training scores, '
                            'please set return_train_score=True').format(key)
                        # warn on key access
                        results.add_warning(key, message, FutureWarning)

        # For multi-metric evaluation, store the best_index_, best_params_ and
        # best_score_ iff refit is one of the scorer names
        # In single metric evaluation, refit_metric is "score"
        if self.refit or not self.multimetric_:
            self.best_index_ = results["rank_test_%s" % refit_metric].argmin()
            self.best_params_ = candidate_params[self.best_index_]
            self.best_score_ = results["mean_test_%s" %
                                       refit_metric][self.best_index_]

        if self.refit:
            self.best_estimator_ = clone(base_estimator).set_params(
                **self.best_params_)
            if y is not None:
                self.best_estimator_.fit(X, y, **fit_params)
            else:
                self.best_estimator_.fit(X, **fit_params)

        # Store the only scorer not as a dict for single metric evaluation
        self.scorer_ = scorers if self.multimetric_ else scorers['score']

        self.cv_results_ = results
        self.n_splits_ = n_splits

        return self
Ejemplo n.º 5
0
    def fit(self, X, y=None, groups=None, **fit_params):
        """Run fit with all sets of parameters.

        Parameters
        ----------
        X : array-like, shape = [n_samples, n_features]
            Training vector, where n_samples is the number of samples and
            n_features is the number of features.

        y : array-like, shape = [n_samples] or [n_samples, n_output], optional
            Target relative to X for classification or regression;
            None for unsupervised learning.

        groups : array-like, with shape (n_samples,), optional
            Group labels for the samples used while splitting the dataset into
            train/test set.

        **fit_params : dict of string -> object
            Parameters passed to the ``fit`` method of the estimator

        """
        if self.fit_params is not None:
            warnings.warn('"fit_params" as a constructor argument was '
                          'deprecated in version 0.19 and will be removed '
                          'in version 0.21. Pass fit parameters to the '
                          '"fit" method instead.', DeprecationWarning)
            if fit_params:
                warnings.warn('Ignoring fit_params passed as a constructor '
                              'argument in favor of keyword arguments to '
                              'the "fit" method.', RuntimeWarning)
            else:
                fit_params = self.fit_params
        estimator = self.estimator
        cv = check_cv(self.cv, y, classifier=is_classifier(estimator))

        scorers, self.multimetric_ = _check_multimetric_scoring(
            self.estimator, scoring=self.scoring)

        if self.multimetric_:
            if self.refit is not False and (
                    not isinstance(self.refit, six.string_types) or
                    # This will work for both dict / list (tuple)
                    self.refit not in scorers):
                raise ValueError("For multi-metric scoring, the parameter "
                                 "refit must be set to a scorer key "
                                 "to refit an estimator with the best "
                                 "parameter setting on the whole data and "
                                 "make the best_* attributes "
                                 "available for that metric. If this is not "
                                 "needed, refit should be set to False "
                                 "explicitly. %r was passed." % self.refit)
            else:
                refit_metric = self.refit
        else:
            refit_metric = 'score'

        # X, y, groups = indexable(X, y, groups)
        if groups is not None:
            raise NotImplementedError("groups are not supported")

        # n_splits = cv.get_n_splits(X, y, groups)
        n_splits = min(cv.get_n_splits(X_.transpose(1, 2, 0), y_, None)
                       for X_, y_ in zip(X, y))

        def generate_index(X_list, y_list):
            split = [cv.split(X.transpose(1, 2, 0), y)
                     for X, y in zip(X_list, y_list)]
            for i in range(n_splits):
                yield zip(*[next(s) for s in split])

        generate_index_iter = generate_index(X, y)

        # Regenerate parameter iterable for each fit
        candidate_params = list(self._get_param_iterator())
        n_candidates = len(candidate_params)
        if self.verbose > 0:
            print("Fitting {0} folds for each of {1} candidates, totalling"
                  " {2} fits".format(n_splits, n_candidates,
                                     n_candidates * n_splits))

        base_estimator = clone(self.estimator)
        pre_dispatch = self.pre_dispatch

        out = Parallel(
            n_jobs=self.n_jobs, verbose=self.verbose,
            pre_dispatch=pre_dispatch
        )(delayed(_fit_and_score)(clone(base_estimator), X, y, scorers, train,
                                  test, self.verbose, parameters,
                                  fit_params=fit_params,
                                  return_train_score=self.return_train_score,
                                  return_n_test_samples=True,
                                  return_times=True, return_parameters=False,
                                  error_score=self.error_score,
                                  return_estimator=True, return_idx=True)
          for parameters, (train, test) in product(
            candidate_params, generate_index_iter))

        # if one choose to see train score, "out" will contain train score info
        if self.return_train_score:
            (train_score_dicts, test_score_dicts, test_sample_counts, fit_time,
             score_time, estimators, train_idxs, test_idxs) = zip(*out)
        else:
            (test_score_dicts, test_sample_counts, fit_time,
             score_time, estimators, train_idxs, test_idxs) = zip(*out)

        # test_score_dicts and train_score dicts are lists of dictionaries and
        # we make them into dict of lists
        test_scores = _aggregate_score_dicts(test_score_dicts)
        if self.return_train_score:
            train_scores = _aggregate_score_dicts(train_score_dicts)

        # TODO: replace by a dict in 0.21
        results = (DeprecationDict() if self.return_train_score == 'warn'
                   else {})

        def _store(key_name, array, weights=None, splits=False, rank=False):
            """Store the scores/times to the cv_results_."""
            # When iterated first by splits, then by parameters
            # We want `array` to have `n_candidates` rows and `n_splits` cols.
            array = np.array(array, dtype=np.float64).reshape(n_candidates,
                                                              n_splits)
            if splits:
                for split_i in range(n_splits):
                    # Uses closure to alter the results
                    results["split%d_%s"
                            % (split_i, key_name)] = array[:, split_i]

            array_means = np.average(array, axis=1, weights=weights)
            results['mean_%s' % key_name] = array_means
            # Weighted std is not directly available in numpy
            array_stds = np.sqrt(np.average((array -
                                             array_means[:, np.newaxis]) ** 2,
                                            axis=1, weights=weights))
            results['std_%s' % key_name] = array_stds

            if rank:
                results["rank_%s" % key_name] = np.asarray(
                    rankdata(-array_means, method='min'), dtype=np.int32)

        _store('fit_time', fit_time)
        _store('score_time', score_time)
        results['estimators'] = estimators
        results['train_index'] = train_idxs
        results['test_index'] = test_idxs

        # Use one MaskedArray and mask all the places where the param is not
        # applicable for that candidate. Use defaultdict as each candidate may
        # not contain all the params
        param_results = defaultdict(partial(MaskedArray,
                                            np.empty(n_candidates,),
                                            mask=True,
                                            dtype=object))
        for cand_i, params in enumerate(candidate_params):
            for name, value in params.items():
                # An all masked empty array gets created for the key
                # `"param_%s" % name` at the first occurence of `name`.
                # Setting the value at an index also unmasks that index
                param_results["param_%s" % name][cand_i] = value

        results.update(param_results)
        # Store a list of param dicts at the key 'params'
        results['params'] = candidate_params

        # NOTE test_sample counts (weights) remain the same for all candidates
        test_sample_counts = np.array(test_sample_counts[:n_splits],
                                      dtype=np.int)
        for scorer_name in scorers.keys():
            # Computed the (weighted) mean and std for test scores alone
            _store('test_%s' % scorer_name, test_scores[scorer_name],
                   splits=True, rank=True,
                   weights=test_sample_counts if self.iid else None)
            if self.return_train_score:
                prev_keys = set(results.keys())
                _store('train_%s' % scorer_name, train_scores[scorer_name],
                       splits=True)

                if self.return_train_score == 'warn':
                    for key in set(results.keys()) - prev_keys:
                        message = (
                            'You are accessing a training score ({!r}), '
                            'which will not be available by default '
                            'any more in 0.21. If you need training scores, '
                            'please set return_train_score=True').format(key)
                        # warn on key access
                        results.add_warning(key, message, FutureWarning)

        # For multi-metric evaluation, store the best_index_, best_params_ and
        # best_score_ iff refit is one of the scorer names
        # In single metric evaluation, refit_metric is "score"
        if self.refit or not self.multimetric_:
            self.best_index_ = results["rank_test_%s" % refit_metric].argmin()
            self.best_params_ = candidate_params[self.best_index_]
            self.best_score_ = results["mean_test_%s" % refit_metric][
                self.best_index_]

        if self.refit:
            self.best_estimator_ = clone(base_estimator).set_params(
                **self.best_params_)
            if y is not None:
                self.best_estimator_.fit(X, y, **fit_params)
            else:
                self.best_estimator_.fit(X, **fit_params)

        # Store the only scorer not as a dict for single metric evaluation
        self.scorer_ = scorers if self.multimetric_ else scorers['score']

        self.cv_results_ = results
        self.n_splits_ = n_splits

        return self
Ejemplo n.º 6
0
    def fit(self, X, y=None, groups=None, **fit_params):
        """Run fit with all sets of parameters.
        Parameters
        ----------
        X : array-like, shape = [n_samples, n_features]
            Training vector, where n_samples is the number of samples and
            n_features is the number of features.
        y : array-like, shape = [n_samples] or [n_samples, n_output], optional
            Target relative to X for classification or regression;
            None for unsupervised learning.
        groups : array-like, with shape (n_samples,), optional
            Group labels for the samples used while splitting the dataset into
            train/test set.
        **fit_params : dict of string -> object
            Parameters passed to the ``fit`` method of the estimator
        """

        estimator = self.estimator
        cv = check_cv(self.cv, y, classifier=is_classifier(estimator))

        scorers, self.multimetric_ = _check_multimetric_scoring(
            self.estimator, scoring=self.scoring)

        if self.multimetric_:
            if self.refit is not False and (
                not isinstance(self.refit, six.string_types)
                # This will work for both dict / list (tuple)
                or self.refit not in scorers):
                raise ValueError("For multi-metric scoring, the parameter "
                "refit must be set to a scorer key to refit an estimator with "
                "the best parameter setting on the whole data and make the "
                "best_* attributes available for that metric. If this is not "
                "needed, refit should be set to False explicitly. %r was "
                "passed." % self.refit)
            else:
                refit_metric = self.refit
        else:
            refit_metric = 'score'

        X, y, groups = indexable(X, y, groups)
        n_splits = cv.get_n_splits(X, y, groups)

        # Regenerate parameter iterable for each fit
        candidate_params = list(self._get_param_iterator())
        n_candidates = len(candidate_params)

        if self.verbose > 0:
            print("Fitting {0} folds for each of {1} candidates, totalling"
                  " {2} fits".format(n_splits, n_candidates,
                                     n_candidates * n_splits))

        base_estimator = clone(self.estimator)

        param_grid = [(parameters, train, test)
            for parameters in candidate_params
            for train, test in list(cv.split(X, y, groups))]

        # Because the original python code expects a certain order for the
        # elements, we need to respect it.
        indexed_param_grid = list(zip(range(len(param_grid)), param_grid))
        par_param_grid = self.sc.parallelize(indexed_param_grid,
                                             len(indexed_param_grid))
        X_bc = self.sc.broadcast(X)
        y_bc = self.sc.broadcast(y)

        verbose = self.verbose
        error_score = self.error_score
        return_train_score = self.return_train_score

        def fun(tup):
            # DO NOT REFERENCE TO `self` ANYWHERE IN THIS FUNCTION.
            # IT WILL CAUSE A SPARK-5063 ERROR.
            (index, (parameters, train, test)) = tup
            local_estimator = clone(base_estimator)
            local_X = X_bc.value
            local_y = y_bc.value
            res = _fit_and_score(local_estimator, local_X, local_y, scorers,
                train, test, verbose, parameters, fit_params=fit_params,
                return_train_score=return_train_score,
                return_n_test_samples=True, return_times=True,
                error_score=error_score)
            return (index, res)

        indexed_out0 = dict(par_param_grid.map(fun).collect())
        out = [indexed_out0[idx] for idx in range(len(param_grid))]
        X_bc.unpersist()
        y_bc.unpersist()

        # if one choose to see train score, "out" will contain train score info
        if self.return_train_score:
            (train_score_dicts, test_score_dicts, test_sample_counts,
             fit_time, score_time) = zip(*out)

        else:
            (test_score_dicts, test_sample_counts,
             fit_time, score_time) = zip(*out)

        if self.verbose > 2:
            print('test_sample_counts: {}'.format(test_sample_counts))
            print('fit_time: {}'.format(fit_time))
            print('score_time: {}'.format(score_time))

        # test_score_dicts and train_score dicts are lists of dictionaries and
        # we make them into dict of lists
        test_scores = _aggregate_score_dicts(test_score_dicts)
        if self.verbose > 1:
            print('TEST')
            print(test_scores)

        if self.return_train_score:
            train_scores = _aggregate_score_dicts(train_score_dicts)
            if self.verbose > 1:
                print('TRAIN')
                print(train_scores)

        results = dict()

        def _store(key_name, array, weights=None, splits=False, rank=False):
            """A small helper to store the scores/times to the cv_results_"""
            # When iterated first by splits, then by parameters
            # We want `array` to have `n_candidates` rows and `n_splits` cols.
            array = (np.array(array, dtype=np.float64)
                       .reshape(n_candidates, n_splits))

            if splits:
                for split_i in range(n_splits):
                    # Uses closure to alter the results
                    results["split%d_%s"
                            % (split_i, key_name)] = array[:, split_i]

            array_means = np.average(array, axis=1, weights=weights)
            results['mean_%s' % key_name] = array_means
            # Weighted std is not directly available in numpy
            array_stds = np.sqrt(np.average((array -
                                             array_means[:, np.newaxis]) ** 2,
                                            axis=1, weights=weights))
            results['std_%s' % key_name] = array_stds

            if rank:
                results["rank_%s" % key_name] = np.asarray(
                    rankdata(-array_means, method='min'), dtype=np.int32)

        _store('fit_time', fit_time)
        _store('score_time', score_time)

        # Use one MaskedArray and mask all the places where the param is not
        # applicable for that candidate. Use defaultdict as each candidate may
        # not contain all the params
        param_results = defaultdict(partial(MaskedArray,
                                            np.empty(n_candidates,),
                                            mask=True,
                                            dtype=object))
        for cand_i, params in enumerate(candidate_params):
            for name, value in params.items():
                # An all masked empty array gets created for the key
                # `"param_%s" % name` at the first occurence of `name`.
                # Setting the value at an index also unmasks that index
                param_results["param_%s" % name][cand_i] = value

        results.update(param_results)

        # Store a list of param dicts at the key 'params'
        results['params'] = candidate_params

        # NOTE test_sample counts (weights) remain the same for all candidates
        test_sample_counts = np.array(test_sample_counts[:n_splits],
                                      dtype=np.int)
        for scorer_name in scorers.keys():
            # Computed the (weighted) mean and std for test scores alone
            _store('test_%s' % scorer_name, test_scores[scorer_name],
                   splits=True, rank=True,
                   weights=test_sample_counts if self.iid else None)
            if self.return_train_score:
                prev_keys = set(results.keys())
                _store('train_%s' % scorer_name, train_scores[scorer_name],
                       splits=True)

        # For multi-metric evaluation, store the best_index_, best_params_ and
        # best_score_ iff refit is one of the scorer names
        # In single metric evaluation, refit_metric is "score"
        if self.refit or not self.multimetric_:
            self.best_index_ = results["rank_test_%s" % refit_metric].argmin()
            self.best_params_ = candidate_params[self.best_index_]
            self.best_score_ = results["mean_test_%s" % refit_metric][
                self.best_index_]

        if self.refit:
            self.best_estimator = clone(base_estimator).set_params(
                **self.best_params_)
            if y is not None:
                self.best_estimator_.fit(X, y, **fit_params)
            else:
                self.best_estimator_.fit(X, **fit_params)

        # Store the only scorer not as a dict for single metric evaluation
        self.scorer_ = scorers if self.multimetric_ else scorers['score']
        self.cv_results_ = results
        self.n_splits_ = n_splits

        if self.verbose > 1:
            print(self.scorer_)
            print(self.cv_results_)
            print(self.n_splits_)
        return self
Ejemplo n.º 7
0
def cross_validate(estimator,
                   X,
                   y=None,
                   groups=None,
                   scoring=None,
                   cv=None,
                   n_jobs=1,
                   verbose=0,
                   fit_params=None,
                   pre_dispatch='2*n_jobs',
                   return_train_score="warn",
                   return_estimator=True,
                   return_idx=True):
    """Evaluate metric(s) by cross-validation and also record fit/score times.

    Read more in the :ref:`User Guide <multimetric_cross_validation>`.

    Parameters
    ----------
    estimator : estimator object implementing 'fit'
        The object to use to fit the data.

    X : array-like
        The data to fit. Can be for example a list, or an array.

    y : array-like, optional, default: None
        The target variable to try to predict in the case of
        supervised learning.

    groups : array-like, with shape (n_samples,), optional
        Group labels for the samples used while splitting the dataset into
        train/test set.

    scoring : string, callable, list/tuple, dict or None, default: None
        A single string (see :ref:`scoring_parameter`) or a callable
        (see :ref:`scoring`) to evaluate the predictions on the test set.

        For evaluating multiple metrics, either give a list of (unique) strings
        or a dict with names as keys and callables as values.

        NOTE that when using custom scorers, each scorer should return a single
        value. Metric functions returning a list/array of values can be wrapped
        into multiple scorers that return one value each.

        See :ref:`multimetric_grid_search` for an example.

        If None, the estimator's default scorer (if available) is used.

    cv : int, cross-validation generator or an iterable, optional
        Determines the cross-validation splitting strategy.
        Possible inputs for cv are:
          - None, to use the default 3-fold cross validation,
          - integer, to specify the number of folds in a `(Stratified)KFold`,
          - An object to be used as a cross-validation generator.
          - An iterable yielding train, test splits.

        For integer/None inputs, if the estimator is a classifier and ``y`` is
        either binary or multiclass, :class:`StratifiedKFold` is used. In all
        other cases, :class:`KFold` is used.

        Refer :ref:`User Guide <cross_validation>` for the various
        cross-validation strategies that can be used here.

    n_jobs : integer, optional
        The number of CPUs to use to do the computation. -1 means
        'all CPUs'.

    verbose : integer, optional
        The verbosity level.

    fit_params : dict, optional
        Parameters to pass to the fit method of the estimator.

    pre_dispatch : int, or string, optional
        Controls the number of jobs that get dispatched during parallel
        execution. Reducing this number can be useful to avoid an
        explosion of memory consumption when more jobs get dispatched
        than CPUs can process. This parameter can be:

            - None, in which case all the jobs are immediately
              created and spawned. Use this for lightweight and
              fast-running jobs, to avoid delays due to on-demand
              spawning of the jobs

            - An int, giving the exact number of total jobs that are
              spawned

            - A string, giving an expression as a function of n_jobs,
              as in '2*n_jobs'

    return_train_score : boolean, optional
        Whether to include train scores.

        Current default is ``'warn'``, which behaves as ``True`` in addition
        to raising a warning when a training score is looked up.
        That default will be changed to ``False`` in 0.21.
        Computing training scores is used to get insights on how different
        parameter settings impact the overfitting/underfitting trade-off.
        However computing the scores on the training set can be computationally
        expensive and is not strictly required to select the parameters that
        yield the best generalization performance.

    return_estimator : boolean, optional
        Whether to include the estimator

    Returns
    -------
    scores : dict of float arrays of shape=(n_splits,)
        Array of scores of the estimator for each run of the cross validation.

        A dict of arrays containing the score/time arrays for each scorer is
        returned. The possible keys for this ``dict`` are:

            ``test_score``
                The score array for test scores on each cv split.
            ``train_score``
                The score array for train scores on each cv split.
                This is available only if ``return_train_score`` parameter
                is ``True``.
            ``fit_time``
                The time for fitting the estimator on the train
                set for each cv split.
            ``score_time``
                The time for scoring the estimator on the test set for each
                cv split. (Note time for scoring on the train set is not
                included even if ``return_train_score`` is set to ``True``
            '' estimator''


    Examples
    --------
    >>> from sklearn import datasets, linear_model
    >>> from sklearn.model_selection import cross_validate
    >>> from sklearn.metrics.scorer import make_scorer
    >>> from sklearn.metrics import confusion_matrix
    >>> from sklearn.svm import LinearSVC
    >>> diabetes = datasets.load_diabetes()
    >>> X = diabetes.data[:150]
    >>> y = diabetes.target[:150]
    >>> lasso = linear_model.Lasso()

    Single metric evaluation using ``cross_validate``

    >>> cv_results = cross_validate(lasso, X, y, return_train_score=False)
    >>> sorted(cv_results.keys())                         # doctest: +ELLIPSIS
    ['fit_time', 'score_time', 'test_score']
    >>> cv_results['test_score']    # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE
    array([ 0.33...,  0.08...,  0.03...])

    Multiple metric evaluation using ``cross_validate``
    (please refer the ``scoring`` parameter doc for more information)

    >>> scores = cross_validate(lasso, X, y,
    ...                         scoring=('r2', 'neg_mean_squared_error'))
    >>> print(scores['test_neg_mean_squared_error'])      # doctest: +ELLIPSIS
    [-3635.5... -3573.3... -6114.7...]
    >>> print(scores['train_r2'])                         # doctest: +ELLIPSIS
    [ 0.28...  0.39...  0.22...]

    See Also
    ---------
    :func:`sklearn.model_selection.cross_val_score`:
        Run cross-validation for single metric evaluation.

    :func:`sklearn.metrics.make_scorer`:
        Make a scorer from a performance metric or loss function.

    """
    # X, y, groups = indexable(X, y, groups)
    if groups is not None:
        raise NotImplementedError("groups are not supported")

    cv = check_cv(cv, y, classifier=is_classifier(estimator))
    scorers, _ = _check_multimetric_scoring(estimator, scoring=scoring)

    n_splits = min(
        cv.get_n_splits(X_.transpose(1, 2, 0), y_, None)
        for X_, y_ in zip(X, y))

    def generate_index(X_list, y_list):
        split = [
            cv.split(X.transpose(1, 2, 0), y) for X, y in zip(X_list, y_list)
        ]
        for i in range(n_splits):
            yield zip(*[next(s) for s in split])

    generate_index_iter = generate_index(X, y)

    # We clone the estimator to make sure that all the folds are
    # independent, and that it is pickle-able.
    parallel = Parallel(n_jobs=n_jobs,
                        verbose=verbose,
                        pre_dispatch=pre_dispatch)
    scores = parallel(
        delayed(_fit_and_score)(clone(estimator),
                                X,
                                y,
                                scorers,
                                train,
                                test,
                                verbose,
                                None,
                                fit_params,
                                return_train_score=return_train_score,
                                return_times=True,
                                return_estimator=return_estimator)
        for train, test in generate_index_iter)

    if return_train_score and return_estimator and return_idx:
        train_scores, test_scores, fit_times, score_times, estima, train_idx, test_idx = zip(
            *scores)
        train_scores = _aggregate_score_dicts(train_scores)
    else:
        test_scores, fit_times, score_times = zip(*scores)
    test_scores = _aggregate_score_dicts(test_scores)

    # TODO: replace by a dict in 0.21
    ret = DeprecationDict() if return_train_score == 'warn' else {}
    ret['fit_time'] = np.array(fit_times)
    ret['score_time'] = np.array(score_times)

    for name in scorers:
        ret['test_%s' % name] = np.array(test_scores[name])
        if return_train_score:
            key = 'train_%s' % name
            ret[key] = np.array(train_scores[name])
            if return_train_score == 'warn':
                message = ('You are accessing a training score ({!r}), '
                           'which will not be available by default '
                           'any more in 0.21. If you need training scores, '
                           'please set return_train_score=True').format(key)
                # warn on key access
                ret.add_warning(key, message, FutureWarning)
        if return_estimator:
            key = "estimator"
            ret[key] = estima
        if return_idx:
            key1 = "train_index"
            ret[key1] = train_idx
            key2 = "test_index"
            ret[key2] = test_idx

    return ret
Ejemplo n.º 8
0
    def fit(self, estimator, x, y=None, sample_weight=None):
        x = check_array(x, allow_multivariate=False)
        y = check_array(y, ensure_2d=False)
        random_state = check_random_state(self.random_state)
        if x.shape[0] != y.shape[0]:
            raise ValueError(
                "expected the same number of samples (%d) and labels (%d)"
                % (x.shape[0], y.shape[0])
            )

        if self.n_interval == "sqrt":
            n_interval = math.ceil(math.sqrt(x.shape[-1]))
        elif self.n_interval == "log":
            n_interval = math.ceil(math.log2(x.shape[-1]))
        elif isinstance(self.n_interval, numbers.Integral):
            n_interval = self.n_interval
        elif isinstance(self.n_interval, numbers.Real):
            if not 0 < self.n_interval <= 1:
                raise ValueError(
                    "n_interval (%r) not in range [0, 1[" % self.n_interval
                )
            n_interval = math.floor(x.shape[-1] * self.n_interval)
        else:
            raise ValueError("unsupported n_interval, got %r" % self.n_interval)

        if callable(self.scoring):
            scoring = self.scoring
        elif self.scoring is None or isinstance(self.scoring, str):
            scoring = check_scoring(estimator, self.scoring)
        else:
            scoring_dict = _check_multimetric_scoring(estimator, self.scoring)
            scoring = _MultimetricScorer(**scoring_dict)

        if isinstance(self.domain, str):
            self.domain_ = _PERMUTATION_DOMAIN.get(self.domain, None)()
            if self.domain_ is None:
                raise ValueError("domain (%s) is not supported" % self.domain)
        else:
            self.domain_ = self.domain

        x_transform = self.domain_.transform(x=x)
        self.intervals_ = list(
            self.domain_.intervals(x_transform.shape[-1], n_interval)
        )
        scores = []
        for iter, (start, end) in enumerate(self.intervals_):
            if self.verbose:
                print(
                    f"Running iteration {iter + 1} of "
                    f"{len(self.intervals_)}. {start}:{end}"
                )
            x_perm_transform = x_transform.copy()
            rep_scores = []
            for rep in range(self.n_repeat):
                self.domain_.randomize(
                    x_perm_transform, start, end, random_state=random_state
                )
                x_perm_inverse = self.domain_.inverse_transform(x_perm_transform)
                if sample_weight is not None:
                    score = scoring(
                        estimator, x_perm_inverse, y, sample_weight=sample_weight
                    )
                else:
                    score = scoring(estimator, x_perm_inverse, y)
                rep_scores.append(score)

            if isinstance(rep_scores[0], dict):
                scores.append(_aggregate_score_dicts(rep_scores))
            else:
                scores.append(rep_scores)

        if sample_weight is not None:
            self.baseline_score_ = scoring(estimator, x, y, sample_weight=sample_weight)
        else:
            self.baseline_score_ = scoring(estimator, x, y)

        if self.verbose:
            print(f"Baseline score is: {self.baseline_score_}")

        if isinstance(self.baseline_score_, dict):
            self.importances_ = {
                name: _unpack_scores(
                    self.baseline_score_[name],
                    np.array([scores[i][name] for i in range(n_interval)]),
                )
                for name in self.baseline_score_
            }
        else:
            self.importances_ = _unpack_scores(self.baseline_score_, np.array(scores))
        return self
Ejemplo n.º 9
0
def _wrapped_cross_val_score(sklearn_pipeline,
                             features,
                             target,
                             cv,
                             scoring_function,
                             sample_weight=None,
                             groups=None,
                             use_dask=False):
    """Fit estimator and compute scores for a given dataset split.

    Parameters
    ----------
    sklearn_pipeline : pipeline object implementing 'fit'
        The object to use to fit the data.
    features : array-like of shape at least 2D
        The data to fit.
    target : array-like, optional, default: None
        The target variable to try to predict in the case of
        supervised learning.
    cv: cross-validation generator
        Object to be used as a cross-validation generator.
    scoring_function : callable
        A scorer callable object / function with signature
        ``scorer(estimator, X, y)``.
    sample_weight : array-like, optional
        List of sample weights to balance (or un-balanace) the dataset target as needed
    groups: array-like {n_samples, }, optional
        Group labels for the samples used while splitting the dataset into train/test set
    use_dask : bool, default False
        Whether to use dask
    """
    sample_weight_dict = set_sample_weight(sklearn_pipeline.steps,
                                           sample_weight)

    features, target, groups = indexable(features, target, groups)

    cv_iter = list(cv.split(features, target, groups))
    scorer = check_scoring(sklearn_pipeline, scoring=scoring_function)

    if use_dask:
        try:
            import dask_ml.model_selection  # noqa
            import dask  # noqa
            from dask.delayed import Delayed
        except Exception as e:
            msg = "'use_dask' requires the optional dask and dask-ml depedencies.\n{}".format(
                e)
            raise ImportError(msg)

        dsk, keys, n_splits = dask_ml.model_selection._search.build_graph(
            estimator=sklearn_pipeline,
            cv=cv,
            scorer=scorer,
            candidate_params=[{}],
            X=features,
            y=target,
            groups=groups,
            fit_params=sample_weight_dict,
            refit=False,
            error_score=float('-inf'),
        )

        cv_results = Delayed(keys[0], dsk)
        scores = [
            cv_results['split{}_test_score'.format(i)] for i in range(n_splits)
        ]
        CV_score = dask.delayed(np.array)(scores)[:, 0]
        return dask.delayed(np.nanmean)(CV_score)
    else:
        try:
            with warnings.catch_warnings():
                warnings.simplefilter('ignore')
                scores = [
                    _fit_and_score(estimator=clone(sklearn_pipeline),
                                   X=features,
                                   y=target,
                                   scorer=scorer,
                                   train=train,
                                   test=test,
                                   verbose=0,
                                   parameters=None,
                                   error_score='raise',
                                   return_estimator=True,
                                   fit_params=sample_weight_dict)
                    for train, test in cv_iter
                ]
                if isinstance(scores[0], list):  #scikit-learn <= 0.23.2
                    CV_score = np.array(scores)[:, 0]
                elif isinstance(scores[0], dict):  # scikit-learn >= 0.24
                    from sklearn.model_selection._validation import _aggregate_score_dicts
                    CV_score = _aggregate_score_dicts(scores)["test_scores"]
                    CV_fitted_pipeline = _aggregate_score_dicts(
                        scores)["estimator"]
                else:
                    raise ValueError(
                        "Incorrect output format from _fit_and_score!")
                fit_and_score_details = dict()
                fit_and_score_details["CV_score_mean"] = np.nanmean(CV_score)
                fit_and_score_details[
                    "CV_fitted_best_pipeline"] = CV_fitted_pipeline[0]
            return fit_and_score_details
        except TimeoutException:
            fit_and_score_details = dict()
            fit_and_score_details["CV_score_mean"] = "Timeout"
            fit_and_score_details["CV_fitted_best_pipeline"] = None
            return fit_and_score_details
        except Exception as e:
            fit_and_score_details = dict()
            fit_and_score_details["CV_score_mean"] = -float('inf')
            fit_and_score_details["CV_fitted_best_pipeline"] = None
            return fit_and_score_details
Ejemplo n.º 10
0
def cross_validate_checkpoint(
    estimator,
    X,
    y=None,
    *,
    groups=None,
    scoring=None,
    cv=None,
    n_jobs=None,
    verbose=0,
    fit_params=None,
    pre_dispatch="2*n_jobs",
    return_train_score=False,
    return_estimator=False,
    error_score=np.nan,
    workdir=None,
    checkpoint=True,
    force_refresh=False,
    serialize_cv=False,
):
    """Evaluate metric(s) by cross-validation and also record fit/score times.

    This is a copy of :func:`sklearn:sklearn.model_selection.cross_validate`
    that uses :func:`_fit_and_score_ckpt` to checkpoint scores and estimators
    for each CV split.
    Read more in the :ref:`sklearn user guide <sklearn:multimetric_cross_validation>`.

    Parameters
    ----------
    estimator : estimator object implementing 'fit'
        The object to use to fit the data.

    X : array-like of shape (n_samples, n_features)
        The data to fit. Can be for example a list, or an array.

    y : array-like of shape (n_samples,) or (n_samples, n_outputs), default=None
        The target variable to try to predict in the case of
        supervised learning.

    groups : array-like of shape (n_samples,), default=None
        Group labels for the samples used while splitting the dataset into
        train/test set. Only used in conjunction with a "Group" :term:`cv`
        instance (e.g., :class:`sklearn:GroupKFold`).

    scoring : str, callable, list/tuple, or dict, default=None
        A single str (see :ref:`sklearn:scoring_parameter`) or a callable
        (see :ref:`sklearn:scoring`) to evaluate the predictions on the test set.

        For evaluating multiple metrics, either give a list of (unique) strings
        or a dict with names as keys and callables as values.

        NOTE that when using custom scorers, each scorer should return a single
        value. Metric functions returning a list/array of values can be wrapped
        into multiple scorers that return one value each.

        See :ref:`sklearn:multimetric_grid_search` for an example.

        If None, the estimator's score method is used.

    cv : int, cross-validation generator or an iterable, default=None
        Determines the cross-validation splitting strategy.
        Possible inputs for cv are:

        - None, to use the default 5-fold cross validation,
        - int, to specify the number of folds in a `(Stratified)KFold`,
        - an sklearn `CV splitter <https://scikit-learn.org/stable/glossary.html#term-cv-splitter>`_,
        - An iterable yielding (train, test) splits as arrays of indices.

        For int/None inputs, if the estimator is a classifier and ``y`` is
        either binary or multiclass,
        :class:`sklearn.model_selection.StratifiedKFold` is used. In all
        other cases, :class:`sklearn.model_selection.KFold` is used.
        Refer :ref:`sklearn user guide <sklearn:cross_validation>` for the
        various cross-validation strategies that can be used here.

    n_jobs : int, default=None
        The number of CPUs to use to do the computation.
        ``None`` means 1 unless in a :obj:`joblib:joblib.parallel_backend` context.
        ``-1`` means using all processors. See :term:`sklearn Glossary <sklearn:n_jobs>`
        for more details.

    verbose : int, default=0
        The verbosity level.

    fit_params : dict, default=None
        Parameters to pass to the fit method of the estimator.

    pre_dispatch : int or str, default='2*n_jobs'
        Controls the number of jobs that get dispatched during parallel
        execution. Reducing this number can be useful to avoid an
        explosion of memory consumption when more jobs get dispatched
        than CPUs can process. This parameter can be:

            - None, in which case all the jobs are immediately
              created and spawned. Use this for lightweight and
              fast-running jobs, to avoid delays due to on-demand
              spawning of the jobs

            - An int, giving the exact number of total jobs that are
              spawned

            - A str, giving an expression as a function of n_jobs,
              as in '2*n_jobs'

    return_train_score : bool, default=False
        Whether to include train scores.
        Computing training scores is used to get insights on how different
        parameter settings impact the overfitting/underfitting trade-off.
        However computing the scores on the training set can be computationally
        expensive and is not strictly required to select the parameters that
        yield the best generalization performance.

    return_estimator : bool, default=False
        Whether to return the estimators fitted on each split.

    error_score : 'raise' or numeric
        Value to assign to the score if an error occurs in estimator fitting.
        If set to 'raise', the error is raised.
        If a numeric value is given, FitFailedWarning is raised. This parameter
        does not affect the refit step, which will always raise the error.

    workdir : path-like object, default=None
        A string or :term:`python:path-like-object` indicating the directory
        in which to store checkpoint files

    checkpoint : bool, default=True
        If True, checkpoint the parameters, estimators, and scores.

    force_refresh : bool, default=False
        If True, recompute scores even if the checkpoint file already exists.
        Otherwise, load scores from checkpoint files and return.

    serialize_cv : bool, default=False
        If True, do not use joblib.Parallel to evaluate each CV split.

    Returns
    -------
    scores : dict of float arrays of shape (n_splits,)
        Array of scores of the estimator for each run of the cross validation.

        A dict of arrays containing the score/time arrays for each scorer is
        returned. The possible keys for this ``dict`` are:

            ``test_score``
                The score array for test scores on each cv split.
                Suffix ``_score`` in ``test_score`` changes to a specific
                metric like ``test_r2`` or ``test_auc`` if there are
                multiple scoring metrics in the scoring parameter.
            ``train_score``
                The score array for train scores on each cv split.
                Suffix ``_score`` in ``train_score`` changes to a specific
                metric like ``train_r2`` or ``train_auc`` if there are
                multiple scoring metrics in the scoring parameter.
                This is available only if ``return_train_score`` parameter
                is ``True``.
            ``fit_time``
                The time for fitting the estimator on the train
                set for each cv split.
            ``score_time``
                The time for scoring the estimator on the test set for each
                cv split. (Note time for scoring on the train set is not
                included even if ``return_train_score`` is set to ``True``
            ``estimator``
                The estimator objects for each cv split.
                This is available only if ``return_estimator`` parameter
                is set to ``True``.

    Examples
    --------
    >>> import shutil
    >>> import tempfile
    >>> from sklearn import datasets, linear_model
    >>> from afqinsight import cross_validate_checkpoint
    >>> from sklearn.pipeline import make_pipeline
    >>> from sklearn.preprocessing import StandardScaler
    >>> diabetes = datasets.load_diabetes()
    >>> X = diabetes.data[:150]
    >>> y = diabetes.target[:150]
    >>> lasso = linear_model.Lasso()

    Single metric evaluation using ``cross_validate``

    >>> cv_results = cross_validate_checkpoint(lasso, X, y, cv=3, checkpoint=False)
    >>> sorted(cv_results.keys())
    ['fit_time', 'score_time', 'test_score']
    >>> cv_results['test_score']
    array([0.33150734, 0.08022311, 0.03531764])

    Multiple metric evaluation using ``cross_validate``, an estimator
    pipeline, and checkpointing (please refer the ``scoring`` parameter doc
    for more information)

    >>> tempdir = tempfile.mkdtemp()
    >>> scaler = StandardScaler()
    >>> pipeline = make_pipeline(scaler, lasso)
    >>> scores = cross_validate_checkpoint(pipeline, X, y, cv=3,
    ...                         scoring=('r2', 'neg_mean_squared_error'),
    ...                         return_train_score=True, checkpoint=True,
    ...                         workdir=tempdir, return_estimator=True)
    >>> shutil.rmtree(tempdir)
    >>> print(scores['test_neg_mean_squared_error'])
    [-2479.2... -3281.2... -3466.7...]
    >>> print(scores['train_r2'])
    [0.507... 0.602... 0.478...]

    See Also
    --------
    sklearn.model_selection.cross_val_score:
        Run cross-validation for single metric evaluation.
    sklearn.model_selection.cross_val_predict:
        Get predictions from each split of cross-validation for diagnostic
        purposes.
    sklearn.metrics.make_scorer:
        Make a scorer from a performance metric or loss function.
    """
    X, y, groups = indexable(X, y, groups)

    cv = check_cv(cv, y, classifier=is_classifier(estimator))
    scorers, _ = _check_multimetric_scoring(estimator, scoring=scoring)

    # We clone the estimator to make sure that all the folds are
    # independent, and that it is pickle-able.
    if serialize_cv:
        scores = [
            _fit_and_score_ckpt(
                workdir=workdir,
                checkpoint=checkpoint,
                force_refresh=force_refresh,
                estimator=clone(estimator),
                X=X,
                y=y,
                scorer=scorers,
                train=train,
                test=test,
                verbose=verbose,
                parameters=None,
                fit_params=fit_params,
                return_train_score=return_train_score,
                return_times=True,
                return_estimator=return_estimator,
                error_score=error_score,
            ) for train, test in cv.split(X, y, groups)
        ]
    else:
        parallel = Parallel(n_jobs=n_jobs,
                            verbose=verbose,
                            pre_dispatch=pre_dispatch)
        scores = parallel(
            delayed(_fit_and_score_ckpt)(
                workdir=workdir,
                checkpoint=checkpoint,
                force_refresh=force_refresh,
                estimator=clone(estimator),
                X=X,
                y=y,
                scorer=scorers,
                train=train,
                test=test,
                verbose=verbose,
                parameters=None,
                fit_params=fit_params,
                return_train_score=return_train_score,
                return_times=True,
                return_estimator=return_estimator,
                error_score=error_score,
            ) for train, test in cv.split(X, y, groups))

    zipped_scores = list(zip(*scores))
    if return_train_score:
        train_scores = zipped_scores.pop(0)
        train_scores = _aggregate_score_dicts(train_scores)
    if return_estimator:
        fitted_estimators = zipped_scores.pop()
    test_scores, fit_times, score_times = zipped_scores
    test_scores = _aggregate_score_dicts(test_scores)

    ret = {}
    ret["fit_time"] = np.array(fit_times)
    ret["score_time"] = np.array(score_times)

    if return_estimator:
        ret["estimator"] = fitted_estimators

    for name in scorers:
        ret["test_%s" % name] = np.array(test_scores[name])
        if return_train_score:
            key = "train_%s" % name
            ret[key] = np.array(train_scores[name])

    return ret
Ejemplo n.º 11
0
    def fit(self, X, y=None, groups=None, **fit_params):
        if self.fit_params is not None:
            warnings.warn(
                '"fit_params" as a constructor argument was '
                'deprecated in version 0.19 and will be removed '
                'in version 0.21. Pass fit parameters to the '
                '"fit" method instead.', DeprecationWarning)
            if fit_params:
                warnings.warn(
                    'Ignoring fit_params passed as a constructor '
                    'argument in favor of keyword arguments to '
                    'the "fit" method.', RuntimeWarning)
            else:
                fit_params = self.fit_params
        estimator = self.estimator
        cv = check_cv(self.cv, y, classifier=is_classifier(estimator))

        scorers, self.multimetric_ = _check_multimetric_scoring(
            self.estimator, scoring=self.scoring)

        if self.multimetric_:
            if self.refit is not False and (
                    not isinstance(self.refit, six.string_types) or
                    # This will work for both dict / list (tuple)
                    self.refit not in scorers):
                raise ValueError("For multi-metric scoring, the parameter "
                                 "refit must be set to a scorer key "
                                 "to refit an estimator with the best "
                                 "parameter setting on the whole data and "
                                 "make the best_* attributes "
                                 "available for that metric. If this is not "
                                 "needed, refit should be set to False "
                                 "explicitly. %r was passed." % self.refit)
            else:
                refit_metric = self.refit
        else:
            refit_metric = 'score'

        X, y, groups = indexable(X, y, groups)
        n_splits = cv.get_n_splits(X, y, groups)
        # Regenerate parameter iterable for each fit
        candidate_params = list(self._get_param_iterator())
        n_candidates = len(candidate_params)
        if self.verbose > 0:
            print("Fitting {0} folds for each of {1} candidates, totalling"
                  " {2} fits".format(n_splits, n_candidates,
                                     n_candidates * n_splits))

        base_estimator = clone(self.estimator)
        pre_dispatch = self.pre_dispatch

        out = Parallel(
            n_jobs=self.n_jobs,
            verbose=self.verbose,
            pre_dispatch=pre_dispatch)(delayed(_fit_and_score)(
                clone(base_estimator),
                X,
                y,
                scorers,
                train,
                test,
                self.verbose,
                parameters,
                fit_params=fit_params,
                return_train_score=self.return_train_score,
                return_n_test_samples=True,
                return_times=True,
                return_parameters=False,
                error_score=self.error_score,
                return_estimator=True) for parameters, (
                    train,
                    test) in product(candidate_params, cv.split(X, y, groups)))

        n_candidates = len(candidate_params)
        n_folds = cv.get_n_splits()
        self.cv_estimators = []
        for i in range(n_candidates):
            current_slice = out[(i * n_folds):((i + 1) * n_folds)]
            self.cv_estimators.append(
                ('model_%d' % (i + 1),
                 [info[-1]['estimator'] for info in current_slice]))
        out = [info[:-1] for info in out]
        self.folds = list(cv.split(X, y, groups))

        # if one choose to see train score, "out" will contain train score info
        if self.return_train_score:
            (train_score_dicts, test_score_dicts, test_sample_counts, fit_time,
             score_time) = zip(*out)
        else:
            (test_score_dicts, test_sample_counts, fit_time,
             score_time) = zip(*out)

        # test_score_dicts and train_score dicts are lists of dictionaries and
        # we make them into dict of lists
        test_scores = _aggregate_score_dicts(test_score_dicts)
        if self.return_train_score:
            train_scores = _aggregate_score_dicts(train_score_dicts)

        results = dict()

        def _store(key_name, array, weights=None, splits=False, rank=False):
            """A small helper to store the scores/times to the cv_results_"""
            # When iterated first by splits, then by parameters
            # We want `array` to have `n_candidates` rows and `n_splits` cols.
            array = np.array(array,
                             dtype=np.float64).reshape(n_candidates, n_splits)
            if splits:
                for split_i in range(n_splits):
                    # Uses closure to alter the results
                    results["split%d_%s" %
                            (split_i, key_name)] = array[:, split_i]

            array_means = np.average(array, axis=1, weights=weights)
            results['mean_%s' % key_name] = array_means
            # Weighted std is not directly available in numpy
            array_stds = np.sqrt(
                np.average((array - array_means[:, np.newaxis])**2,
                           axis=1,
                           weights=weights))
            results['std_%s' % key_name] = array_stds

            if rank:
                results["rank_%s" % key_name] = np.asarray(rankdata(
                    -array_means, method='min'),
                                                           dtype=np.int32)

        _store('fit_time', fit_time)
        _store('score_time', score_time)
        # Use one MaskedArray and mask all the places where the param is not
        # applicable for that candidate. Use defaultdict as each candidate may
        # not contain all the params
        param_results = defaultdict(
            partial(MaskedArray,
                    np.empty(n_candidates, ),
                    mask=True,
                    dtype=object))
        for cand_i, params in enumerate(candidate_params):
            for name, value in params.items():
                # An all masked empty array gets created for the key
                # `"param_%s" % name` at the first occurence of `name`.
                # Setting the value at an index also unmasks that index
                param_results["param_%s" % name][cand_i] = value

        results.update(param_results)
        # Store a list of param dicts at the key 'params'
        results['params'] = candidate_params

        # NOTE test_sample counts (weights) remain the same for all candidates
        test_sample_counts = np.array(test_sample_counts[:n_splits],
                                      dtype=np.int)
        for scorer_name in scorers.keys():
            # Computed the (weighted) mean and std for test scores alone
            _store('test_%s' % scorer_name,
                   test_scores[scorer_name],
                   splits=True,
                   rank=True,
                   weights=test_sample_counts if self.iid else None)
            if self.return_train_score:
                _store('train_%s' % scorer_name,
                       train_scores[scorer_name],
                       splits=True)

        # For multi-metric evaluation, store the best_index_, best_params_ and
        # best_score_ iff refit is one of the scorer names
        # In single metric evaluation, refit_metric is "score"
        if self.refit or not self.multimetric_:
            self.best_index_ = results["rank_test_%s" % refit_metric].argmin()
            self.best_params_ = candidate_params[self.best_index_]
            self.best_score_ = results["mean_test_%s" %
                                       refit_metric][self.best_index_]

        if self.refit:
            self.best_estimator_ = clone(base_estimator).set_params(
                **self.best_params_)
            if y is not None:
                self.best_estimator_.fit(X, y, **fit_params)
            else:
                self.best_estimator_.fit(X, **fit_params)

        # Store the only scorer not as a dict for single metric evaluation
        self.scorer_ = scorers if self.multimetric_ else scorers['score']

        self.cv_results_ = results
        self.n_splits_ = n_splits

        return self
Ejemplo n.º 12
0
    def _format_results(self, candidate_params, scorers, n_splits, out):
        # candidate_params, scorers, n_splits, out = all_candidate_params, scorers, n_splits, all_out
        n_candidates = len(candidate_params)

        values = dict()
        for d in out:
            for k, v in d.items():
                if k in values:
                    values[k].append(v)
                else:
                    values[k] = [v]

        # test_score_dicts, train_score dicts and confmat_dicts are lists of
        # dictionaries and we make them into dict of lists
        test_scores = _aggregate_score_dicts(values['test_scores'])
        if 'train_scores' in values:
            train_scores = _aggregate_score_dicts(values['train_scores'])
        if 'confusion_matrix' in values:
            confmats = _aggregate_score_dicts(values['confusion_matrix'])

        results = {}

        def _store(key_name, array, weights=None, splits=False, rank=False):
            """A small helper to store the scores/times to the cv_results_"""
            # When iterated first by splits, then by parameters
            # We want `array` to have `n_candidates` rows and `n_splits` cols.
            array = np.array(array,
                             dtype=np.float64).reshape(n_candidates, n_splits)
            if splits:
                for split_i in range(n_splits):
                    # Uses closure to alter the results
                    results["split%d_%s" %
                            (split_i, key_name)] = array[:, split_i]

            array_means = np.average(array, axis=1, weights=weights)
            results['mean_%s' % key_name] = array_means
            # Weighted std is not directly available in numpy
            array_stds = np.sqrt(
                np.average((array - array_means[:, np.newaxis])**2,
                           axis=1,
                           weights=weights))
            results['std_%s' % key_name] = array_stds

            if rank:
                results["rank_%s" % key_name] = np.asarray(rankdata(
                    -array_means, method='min'),
                                                           dtype=np.int32)

        for s in ['fit_time', 'score_time', 'n_iter']:
            # s = 'n_iter'
            if s in values:
                _store(s, values[s])

        # Use one MaskedArray and mask all the places where the param is not
        # applicable for that candidate. Use defaultdict as each candidate may
        # not contain all the params
        param_results = defaultdict(
            partial(MaskedArray,
                    np.empty(n_candidates, ),
                    mask=True,
                    dtype=object))
        for cand_i, params in enumerate(candidate_params):
            for name, value in params.items():
                # An all masked empty array gets created for the key
                # `"param_%s" % name` at the first occurrence of `name`.
                # Setting the value at an index also unmasks that index
                param_results["param_%s" % name][cand_i] = value

        results.update(param_results)
        # Store a list of param dicts at the key 'params'
        results['params'] = candidate_params

        # NOTE test_sample counts (weights) remain the same for all candidates
        if 'n_test_samples' in values:
            test_sample_counts = np.array(values['n_test_samples'][:n_splits],
                                          dtype=np.int)

        if self.iid != 'deprecated':
            warnings.warn(
                "The parameter 'iid' is deprecated in 0.22 and will be "
                "removed in 0.24.", FutureWarning)
            iid = self.iid
        else:
            iid = False

        for scorer_name in scorers.keys():
            # Computed the (weighted) mean and std for test scores alone
            _store('test_%s' % scorer_name,
                   test_scores[scorer_name],
                   splits=True,
                   rank=True,
                   weights=test_sample_counts if iid else None)
        if self.return_train_score:
            _store('train_%s' % scorer_name,
                   train_scores[scorer_name],
                   splits=True)
        if 'confusion_matrix' in values:
            for bin_name, bin_values in confmats.items():
                _store(bin_name, bin_values)

        # Store the plotting dicts
        for n in ['roc_values', 'prc_values', 'threshc_values']:
            if n in values:
                results[n] = np.array(values[n]).reshape(
                    n_candidates, n_splits)

        return results
Ejemplo n.º 13
0
def _skl_format_cv_results(
    out,
    return_train_score,
    candidate_params,
    n_candidates,
    n_splits,
    scorers,
    iid,
):

    out = _aggregate_score_dicts(out)

    results = dict()

    def _store(key_name, array, weights=None, splits=False, rank=False):
        """A small helper to store the scores/times to the cv_results_"""
        # When iterated first by splits, then by parameters
        # We want `array` to have `n_candidates` rows and `n_splits` cols.
        array = np.array(array,
                         dtype=np.float64).reshape(n_candidates, n_splits)
        if splits:
            for split_i in range(n_splits):
                # Uses closure to alter the results
                results["split%d_%s" % (split_i, key_name)] = array[:, split_i]

        array_means = np.average(array, axis=1, weights=weights)
        results["mean_%s" % key_name] = array_means

        if key_name.startswith(
            ("train_", "test_")) and np.any(~np.isfinite(array_means)):
            warnings.warn(
                f"One or more of the {key_name.split('_')[0]} scores "
                f"are non-finite: {array_means}",
                category=UserWarning,
            )

        # Weighted std is not directly available in numpy
        array_stds = np.sqrt(
            np.average(
                (array - array_means[:, np.newaxis])**2,
                axis=1,
                weights=weights,
            ))
        results["std_%s" % key_name] = array_stds

        if rank:
            results["rank_%s" % key_name] = np.asarray(get_ranks(-array_means),
                                                       dtype=np.int32)

    _store("fit_time", out["fit_time"])
    _store("score_time", out["score_time"])
    # Use one MaskedArray and mask all the places where the param is not
    # applicable for that candidate. Use defaultdict as each candidate may
    # not contain all the params
    param_results = defaultdict(
        partial(MaskedArray, np.empty(n_candidates), mask=True, dtype=object))

    for cand_i, params in enumerate(candidate_params):
        for name, value in params.items():
            # An all masked empty array gets created for the key
            # `"param_%s" % name` at the first occurence of `name`.
            # Setting the value at an index also unmasks that index
            param_results["param_%s" % name][cand_i] = value

    results.update(param_results)
    # Store a list of param dicts at the key 'params'
    results["params"] = candidate_params

    test_scores_dict = _normalize_score_results(out["test_scores"])
    if return_train_score:
        train_scores_dict = _normalize_score_results(out["train_scores"])

    for scorer_name in test_scores_dict:
        # Computed the (weighted) mean and std for test scores alone
        _store(
            "test_%s" % scorer_name,
            test_scores_dict[scorer_name],
            splits=True,
            rank=True,
            weights=None,
        )
        if return_train_score:
            _store(
                "train_%s" % scorer_name,
                train_scores_dict[scorer_name],
                splits=True,
            )
    return results
Ejemplo n.º 14
0
def _skl_format_cv_results(
    out,
    return_train_score,
    candidate_params,
    n_candidates,
    n_splits,
    scorers,
    iid,
):

    # if one choose to see train score, "out" will contain train score info
    if return_train_score:
        (
            train_score_dicts,
            test_score_dicts,
            test_sample_counts,
            fit_time,
            score_time,
        ) = zip(*out)
    else:
        (test_score_dicts, test_sample_counts, fit_time, score_time) = zip(*out)

    # test_score_dicts and train_score dicts are lists of dictionaries and
    # we make them into dict of lists
    test_scores = _aggregate_score_dicts(test_score_dicts)
    if return_train_score:
        train_scores = _aggregate_score_dicts(train_score_dicts)

    results = dict()

    def _store(key_name, array, weights=None, splits=False, rank=False):
        """A small helper to store the scores/times to the cv_results_"""
        # When iterated first by splits, then by parameters
        # We want `array` to have `n_candidates` rows and `n_splits` cols.
        array = np.array(array, dtype=np.float64).reshape(
            n_candidates, n_splits
        )
        if splits:
            for split_i in range(n_splits):
                # Uses closure to alter the results
                results["split%d_%s" % (split_i, key_name)] = array[:, split_i]

        array_means = np.average(array, axis=1, weights=weights)
        results["mean_%s" % key_name] = array_means
        # Weighted std is not directly available in numpy
        array_stds = np.sqrt(
            np.average(
                (array - array_means[:, np.newaxis]) ** 2,
                axis=1,
                weights=weights,
            )
        )
        results["std_%s" % key_name] = array_stds

        if rank:
            results["rank_%s" % key_name] = np.asarray(
                get_ranks(-array_means), dtype=np.int32
            )

    _store("fit_time", fit_time)
    _store("score_time", score_time)
    # Use one MaskedArray and mask all the places where the param is not
    # applicable for that candidate. Use defaultdict as each candidate may
    # not contain all the params
    param_results = defaultdict(
        partial(MaskedArray, np.empty(n_candidates), mask=True, dtype=object)
    )
    for cand_i, params in enumerate(candidate_params):
        for name, value in params.items():
            # An all masked empty array gets created for the key
            # `"param_%s" % name` at the first occurence of `name`.
            # Setting the value at an index also unmasks that index
            param_results["param_%s" % name][cand_i] = value

    results.update(param_results)
    # Store a list of param dicts at the key 'params'
    results["params"] = candidate_params

    # NOTE test_sample counts (weights) remain the same for all candidates
    test_sample_counts = np.array(test_sample_counts[:n_splits], dtype=np.int)
    for scorer_name in scorers.keys():
        # Computed the (weighted) mean and std for test scores alone
        _store(
            "test_%s" % scorer_name,
            test_scores[scorer_name],
            splits=True,
            rank=True,
            weights=test_sample_counts if iid else None,
        )
        if return_train_score:
            _store(
                "train_%s" % scorer_name, train_scores[scorer_name], splits=True
            )

    return results
Ejemplo n.º 15
0
def my_cross_validate(estimator,
                      X,
                      y,
                      groups=None,
                      scoring=None,
                      cv=None,
                      n_jobs=1,
                      verbose=0,
                      fit_params=None,
                      pre_dispatch='2*n_jobs',
                      return_train_score="warn"):
    """
    In this project, data is pre-split,
    and estimator is always a classifier so:
    cv: None (do not use)
    groups: None (do not use)
    X: ((X_train1, X_test1), (X_train2, X_test2), ...)
    y: ((y_train1, y_test1), (y_train2, y_test2), ...)
    """

    # X, y, groups = indexable(X, y, groups)

    # cv = check_cv(cv, y, classifier=is_classifier(estimator))
    scorers, _ = _check_multimetric_scoring(estimator, scoring=scoring)

    # We clone the estimator to make sure that all the folds are
    # independent, and that it is pickle-able.
    parallel = Parallel(n_jobs=n_jobs,
                        verbose=verbose,
                        pre_dispatch=pre_dispatch)
    scores = parallel(
        delayed(_my_fit_and_score)(clone(estimator),
                                   Xi,
                                   yi,
                                   scorers,
                                   verbose,
                                   None,
                                   fit_params,
                                   return_train_score=return_train_score,
                                   return_times=True) for Xi, yi in zip(X, y))

    if return_train_score:
        train_scores, test_scores, fit_times, score_times = zip(*scores)
        train_scores = _aggregate_score_dicts(train_scores)
    else:
        test_scores, fit_times, score_times = zip(*scores)
    test_scores = _aggregate_score_dicts(test_scores)

    # TODO: replace by a dict in 0.21
    ret = DeprecationDict() if return_train_score == 'warn' else {}
    ret['fit_time'] = np.array(fit_times)
    ret['score_time'] = np.array(score_times)

    for name in scorers:
        ret['test_%s' % name] = np.array(test_scores[name])
        if return_train_score:
            key = 'train_%s' % name
            ret[key] = np.array(train_scores[name])
            if return_train_score == 'warn':
                message = ('You are accessing a training score ({!r}), '
                           'which will not be available by default '
                           'any more in 0.21. If you need training scores, '
                           'please set return_train_score=True').format(key)
                # warn on key access
                ret.add_warning(key, message, FutureWarning)

    return ret
Ejemplo n.º 16
0
    def fit(self, X, y=None, groups=None, type="Classifier", **fit_params):
        """Run fit with all sets of parameters.

        Parameters
        ----------

        X : array-like, shape = [n_samples, n_features]
            Training vector, where n_samples is the number of samples and
            n_features is the number of features.

        y : array-like, shape = [n_samples] or [n_samples, n_output], optional
            Target relative to X for classification or regression;
            None for unsupervised learning.

        groups : array-like, with shape (n_samples,), optional
            Group labels for the samples used while splitting the dataset into
            train/test set.

        **fit_params : dict of string -> object
            Parameters passed to the ``fit`` method of the estimator
        """
        if self.fit_params is not None:
            warnings.warn(
                '"fit_params" as a constructor argument was '
                'deprecated in version 0.19 and will be removed '
                'in version 0.21. Pass fit parameters to the '
                '"fit" method instead.', DeprecationWarning)
            if fit_params:
                warnings.warn(
                    'Ignoring fit_params passed as a constructor '
                    'argument in favor of keyword arguments to '
                    'the "fit" method.', RuntimeWarning)
            else:
                fit_params = self.fit_params
        #estimator = self.estimator

        if type == "Classification":
            from keras.wrappers.scikit_learn import KerasClassifier
            estimator = KerasClassifier(build_fn=self.estimator, verbose=0)
        else:
            from keras.wrappers.scikit_learn import KerasRegressor
            estimator = KerasRegressor(build_fn=self.estimator, verbose=0)
        cv = check_cv(self.cv, y, classifier=is_classifier(estimator))

        scorers, self.multimetric_ = _check_multimetric_scoring(
            clone(estimator), scoring=self.scoring)

        if self.multimetric_:
            if self.refit is not False and (
                    not isinstance(self.refit, six.string_types) or
                    # This will work for both dict / list (tuple)
                    self.refit not in scorers):
                raise ValueError("For multi-metric scoring, the parameter "
                                 "refit must be set to a scorer key "
                                 "to refit an estimator with the best "
                                 "parameter setting on the whole data and "
                                 "make the best_* attributes "
                                 "available for that metric. If this is not "
                                 "needed, refit should be set to False "
                                 "explicitly. %r was passed." % self.refit)
            else:
                refit_metric = self.refit
        else:
            refit_metric = 'score'

        X, y, groups = indexable(X, y, groups)
        n_splits = cv.get_n_splits(X, y, groups)
        # Regenerate parameter iterable for each fit
        candidate_params = list(self._get_param_iterator())
        n_candidates = len(candidate_params)
        if self.verbose > 0:
            print("Fitting {0} folds for each of {1} candidates, totalling"
                  " {2} fits".format(n_splits, n_candidates,
                                     n_candidates * n_splits))

        base_estimator = self.estimator
        pre_dispatch = self.pre_dispatch
        # One of the main changes is instead of using the _fit_and_score from sklearn.model_selection._validation
        # We use a modified one (_fit_and_score_keras) that clears the session after each iteration
        out = Parallel(
            n_jobs=self.n_jobs,
            verbose=self.verbose,
            pre_dispatch=pre_dispatch)(
                delayed(_fit_and_score_keras2)(
                    base_estimator,
                    X,
                    y,
                    scorers,
                    train,
                    test,
                    self.verbose,
                    parameters,
                    fit_params=fit_params,
                    return_train_score=self.return_train_score,
                    return_n_test_samples=True,
                    return_times=True,
                    return_parameters=False,
                    error_score=self.error_score,
                    type=type)  # Passing the session (Keras backend) argument
                for parameters, (
                    train,
                    test) in product(candidate_params, cv.split(X, y, groups)))

        # if one choose to see train score, "out" will contain train score info
        if self.return_train_score:
            (train_score_dicts, test_score_dicts, test_sample_counts, fit_time,
             score_time) = zip(*out)
        else:
            (test_score_dicts, test_sample_counts, fit_time,
             score_time) = zip(*out)

        # test_score_dicts and train_score dicts are lists of dictionaries and
        # we make them into dict of lists
        test_scores = _aggregate_score_dicts(test_score_dicts)
        if self.return_train_score:
            train_scores = _aggregate_score_dicts(train_score_dicts)

        results = dict()

        def _store(key_name, array, weights=None, splits=False, rank=False):
            """A small helper to store the scores/times to the cv_results_"""
            # When iterated first by splits, then by parameters
            # We want `array` to have `n_candidates` rows and `n_splits` cols.
            array = np.array(array,
                             dtype=np.float64).reshape(n_candidates, n_splits)
            if splits:
                for split_i in range(n_splits):
                    # Uses closure to alter the results
                    results["split%d_%s" %
                            (split_i, key_name)] = array[:, split_i]

            array_means = np.average(array, axis=1, weights=weights)
            results['mean_%s' % key_name] = array_means
            # Weighted std is not directly available in numpy
            array_stds = np.sqrt(
                np.average((array - array_means[:, np.newaxis])**2,
                           axis=1,
                           weights=weights))
            results['std_%s' % key_name] = array_stds

            if rank:
                results["rank_%s" % key_name] = np.asarray(rankdata(
                    -array_means, method='min'),
                                                           dtype=np.int32)

        _store('fit_time', fit_time)
        _store('score_time', score_time)
        # Use one MaskedArray and mask all the places where the param is not
        # applicable for that candidate. Use defaultdict as each candidate may
        # not contain all the params
        param_results = defaultdict(
            partial(MaskedArray,
                    np.empty(n_candidates, ),
                    mask=True,
                    dtype=object))
        for cand_i, params in enumerate(candidate_params):
            for name, value in params.items():
                # An all masked empty array gets created for the key
                # `"param_%s" % name` at the first occurence of `name`.
                # Setting the value at an index also unmasks that index
                param_results["param_%s" % name][cand_i] = value

        results.update(param_results)
        # Store a list of param dicts at the key 'params'
        results['params'] = candidate_params

        # NOTE test_sample counts (weights) remain the same for all candidates
        test_sample_counts = np.array(test_sample_counts[:n_splits],
                                      dtype=np.int)
        for scorer_name in scorers.keys():
            # Computed the (weighted) mean and std for test scores alone
            _store('test_%s' % scorer_name,
                   test_scores[scorer_name],
                   splits=True,
                   rank=True,
                   weights=test_sample_counts if self.iid else None)
            if self.return_train_score:
                _store('train_%s' % scorer_name,
                       train_scores[scorer_name],
                       splits=True)

        # For multi-metric evaluation, store the best_index_, best_params_ and
        # best_score_ iff refit is one of the scorer names
        # In single metric evaluation, refit_metric is "score"
        if self.refit or not self.multimetric_:
            self.best_index_ = results["rank_test_%s" % refit_metric].argmin()
            self.best_params_ = candidate_params[self.best_index_]
            self.best_score_ = results["mean_test_%s" %
                                       refit_metric][self.best_index_]

        if self.refit:
            from keras import backend as K
            import tensorflow as tf
            tf.logging.set_verbosity(
                tf.logging.ERROR
            )  # This is useful to avoid the info log of tensorflow
            # The next 4 lines are for avoiding tensorflow to allocate all the GPU memory
            config = tf.ConfigProto()
            config.gpu_options.allow_growth = True
            sess = tf.Session(config=config)
            K.set_session(sess)

            self.best_estimator_ = clone(estimator).set_params(
                **self.best_params_)
            if y is not None:
                self.best_estimator_.fit(X, y, **fit_params)
            else:
                self.best_estimator_.fit(X, **fit_params)

        # Store the only scorer not as a dict for single metric evaluation
        self.scorer_ = scorers if self.multimetric_ else scorers['score']

        self.cv_results_ = results
        self.n_splits_ = n_splits

        return self
Ejemplo n.º 17
0
    def _format_results(self, candidate_params, scorers, n_splits, out,
                        more_results={}):
        n_candidates = len(candidate_params)

        # if one choose to see train score, "out" will contain train score info
        if self.return_train_score:
            (train_score_dicts, test_score_dicts, test_sample_counts, fit_time,
             score_time) = zip(*out)
        else:
            (test_score_dicts, test_sample_counts, fit_time,
             score_time) = zip(*out)

        # test_score_dicts and train_score dicts are lists of dictionaries and
        # we make them into dict of lists
        test_scores = _aggregate_score_dicts(test_score_dicts)
        if self.return_train_score:
            train_scores = _aggregate_score_dicts(train_score_dicts)

        results = dict(more_results)

        def _store(key_name, array, weights=None, splits=False, rank=False):
            """A small helper to store the scores/times to the cv_results_"""
            # When iterated first by splits, then by parameters
            # We want `array` to have `n_candidates` rows and `n_splits` cols.
            array = np.array(array, dtype=np.float64).reshape(n_candidates,
                                                              n_splits)
            if splits:
                for split_i in range(n_splits):
                    # Uses closure to alter the results
                    results["split%d_%s"
                            % (split_i, key_name)] = array[:, split_i]

            array_means = np.average(array, axis=1, weights=weights)
            results['mean_%s' % key_name] = array_means
            # Weighted std is not directly available in numpy
            array_stds = np.sqrt(np.average((array -
                                             array_means[:, np.newaxis]) ** 2,
                                            axis=1, weights=weights))
            results['std_%s' % key_name] = array_stds

            if rank:
                results["rank_%s" % key_name] = np.asarray(
                    rankdata(-array_means, method='min'), dtype=np.int32)

        _store('fit_time', fit_time)
        _store('score_time', score_time)
        # Use one MaskedArray and mask all the places where the param is not
        # applicable for that candidate. Use defaultdict as each candidate may
        # not contain all the params
        param_results = defaultdict(partial(MaskedArray,
                                            np.empty(n_candidates,),
                                            mask=True,
                                            dtype=object))
        for cand_i, params in enumerate(candidate_params):
            for name, value in params.items():
                # An all masked empty array gets created for the key
                # `"param_%s" % name` at the first occurrence of `name`.
                # Setting the value at an index also unmasks that index
                param_results["param_%s" % name][cand_i] = value

        results.update(param_results)
        # Store a list of param dicts at the key 'params'
        results['params'] = candidate_params

        # NOTE test_sample counts (weights) remain the same for all candidates
        test_sample_counts = np.array(test_sample_counts[:n_splits],
                                      dtype=np.int)
        iid = self.iid
        if self.iid == 'warn':
            warn = False
            for scorer_name in scorers.keys():
                scores = test_scores[scorer_name].reshape(n_candidates,
                                                          n_splits)
                means_weighted = np.average(scores, axis=1,
                                            weights=test_sample_counts)
                means_unweighted = np.average(scores, axis=1)
                if not np.allclose(means_weighted, means_unweighted,
                                   rtol=1e-4, atol=1e-4):
                    warn = True
                    break

            if warn:
                warnings.warn("The default of the `iid` parameter will change "
                              "from True to False in version 0.22 and will be"
                              " removed in 0.24. This will change numeric"
                              " results when test-set sizes are unequal.",
                              DeprecationWarning)
            iid = True

        for scorer_name in scorers.keys():
            # Computed the (weighted) mean and std for test scores alone
            _store('test_%s' % scorer_name, test_scores[scorer_name],
                   splits=True, rank=True,
                   weights=test_sample_counts if iid else None)
            if self.return_train_score:
                _store('train_%s' % scorer_name, train_scores[scorer_name],
                       splits=True)

        return results
Ejemplo n.º 18
0
    def _format_results(self, candidate_params, scorers, n_splits, out):
        n_candidates = len(candidate_params)

        # if one choose to see train score, "out" will contain train score info
        if self.return_train_score:
            (train_score_dicts, test_score_dicts, test_sample_counts, fit_time,
             score_time, estimators) = zip(*out)
        else:
            (test_score_dicts, test_sample_counts, fit_time, score_time,
             estimators) = zip(*out)

        # test_score_dicts and train_score dicts are lists of dictionaries and
        # we make them into dict of lists
        test_scores = _aggregate_score_dicts(test_score_dicts)
        if self.return_train_score:
            train_scores = _aggregate_score_dicts(train_score_dicts)

        results = {}

        def _store(key_name, array, weights=None, splits=False, rank=False):
            """A small helper to store the scores/times to the cv_results_"""
            # When iterated first by splits, then by parameters
            # We want `array` to have `n_candidates` rows and `n_splits` cols.
            array = np.array(array,
                             dtype=np.float64).reshape(n_candidates, n_splits)
            if splits:
                for split_i in range(n_splits):
                    # Uses closure to alter the results
                    results["split%d_%s" %
                            (split_i, key_name)] = array[:, split_i]

            array_means = np.average(array, axis=1, weights=weights)
            results['mean_%s' % key_name] = array_means
            # Weighted std is not directly available in numpy
            array_stds = np.sqrt(
                np.average((array - array_means[:, np.newaxis])**2,
                           axis=1,
                           weights=weights))
            results['std_%s' % key_name] = array_stds

            if rank:
                results["rank_%s" % key_name] = np.asarray(rankdata(
                    -array_means, method='min'),
                                                           dtype=np.int32)

        _store('fit_time', fit_time)
        _store('score_time', score_time)
        # Use one MaskedArray and mask all the places where the param is not
        # applicable for that candidate. Use defaultdict as each candidate may
        # not contain all the params
        param_results = defaultdict(
            partial(MaskedArray,
                    np.empty(n_candidates, ),
                    mask=True,
                    dtype=object))
        for cand_i, params in enumerate(candidate_params):
            for name, value in params.items():
                # An all masked empty array gets created for the key
                # `"param_%s" % name` at the first occurrence of `name`.
                # Setting the value at an index also unmasks that index
                param_results["param_%s" % name][cand_i] = value

        results.update(param_results)
        # Store a list of param dicts at the key 'params'
        results['params'] = candidate_params

        # NOTE test_sample counts (weights) remain the same for all candidates
        test_sample_counts = np.array(test_sample_counts[:n_splits],
                                      dtype=np.int)

        if self.iid != 'deprecated':
            warnings.warn(
                "The parameter 'iid' is deprecated in 0.22 and will be "
                "removed in 0.24.", DeprecationWarning)
            iid = self.iid
        else:
            iid = False

        for scorer_name in scorers.keys():
            # Computed the (weighted) mean and std for test scores alone
            _store('test_%s' % scorer_name,
                   test_scores[scorer_name],
                   splits=True,
                   rank=True,
                   weights=test_sample_counts if iid else None)
            if self.return_train_score:
                _store('train_%s' % scorer_name,
                       train_scores[scorer_name],
                       splits=True)

        estimators = np.asarray(estimators).reshape(n_candidates, n_splits)
        array_means = np.array(
            [global_instability(e_split) for e_split in estimators])

        # monotonize instabilities - require ordered parameters,
        # from high sparsity to low

        monotonized_instabilities = [array_means[0]] + [
            np.max(array_means[:i]) for i in range(1, array_means.size)
        ]
        monotonized_instabilities = np.array(monotonized_instabilities)
        self.monotonized_instabilities = np.copy(monotonized_instabilities)

        if self.mode.lower() == 'gstars':
            graphlets_stability = np.array(
                [graphlet_instability(e_split) for e_split in estimators])
            self.graphlets_instabilities = np.copy(graphlets_stability)

            upper_bounds = np.array(
                [upper_bound(e_split) for e_split in estimators])
            upper_bounds = [upper_bounds[0]] + [
                np.max(upper_bounds[:i]) for i in range(1, upper_bounds.size)
            ]
            self.upper_bounds = np.array(upper_bounds)
            lb = np.where(np.array(monotonized_instabilities) <= 0.05)[0]
            ub = np.where(np.array(upper_bounds) <= 0.05)[0]
            lb = lb[-1] if lb.size != 0 else len(monotonized_instabilities)
            ub = ub[-1] if ub.size != 0 else 0
            self.lower_bound = lb
            self.upper_bound = ub
            graphlets_stability[0:ub] = np.inf
            graphlets_stability[lb + 1:] = np.inf

            key_name = 'test_instability'
            results['raw_%s' % key_name] = array_means
            results['mean_%s' % key_name] = monotonized_instabilities
            results["rank_%s" % key_name] = np.asarray(rankdata(
                graphlets_stability, method='min'),
                                                       dtype=np.int32)
        else:
            # discard high values
            monotonized_instabilities[monotonized_instabilities > 0.05] = \
                -np.inf
            key_name = 'test_instability'
            results['raw_%s' % key_name] = array_means
            results['mean_%s' % key_name] = monotonized_instabilities
            results["rank_%s" % key_name] = np.asarray(rankdata(
                -monotonized_instabilities, method='min'),
                                                       dtype=np.int32)
        self.results = results
        return results
Ejemplo n.º 19
0
def _cross_validate_with_warm_start(
    estimators,
    X,
    y=None,
    *,
    groups=None,
    scoring=None,
    cv=None,
    n_jobs=None,
    verbose=0,
    fit_params=None,
    pre_dispatch="2*n_jobs",
    return_train_score=False,
    return_estimator=False,
    error_score=np.nan,
):
    """Evaluate metric(s) by cross-validation and also record fit/score times.

    Read more in the :ref:`User Guide <multimetric_cross_validation>`.

    Parameters
    ----------
    estimator : estimator object implementing 'fit'
        The object to use to fit the data.

    X : array-like of shape (n_samples, n_features)
        The data to fit. Can be for example a list, or an array.

    y : array-like of shape (n_samples,) or (n_samples, n_outputs), \
            default=None
        The target variable to try to predict in the case of
        supervised learning.

    groups : array-like of shape (n_samples,), default=None
        Group labels for the samples used while splitting the dataset into
        train/test set. Only used in conjunction with a "Group" :term:`cv`
        instance (e.g., :class:`GroupKFold`).

    scoring : str, callable, list/tuple, or dict, default=None
        A single str (see :ref:`scoring_parameter`) or a callable
        (see :ref:`scoring`) to evaluate the predictions on the test set.

        For evaluating multiple metrics, either give a list of (unique) strings
        or a dict with names as keys and callables as values.

        NOTE that when using custom scorers, each scorer should return a single
        value. Metric functions returning a list/array of values can be wrapped
        into multiple scorers that return one value each.

        See :ref:`multimetric_grid_search` for an example.

        If None, the estimator's score method is used.

    cv : int, cross-validation generator or an iterable, default=None
        Determines the cross-validation splitting strategy.
        Possible inputs for cv are:

        - None, to use the default 5-fold cross validation,
        - int, to specify the number of folds in a `(Stratified)KFold`,
        - :term:`CV splitter`,
        - An iterable yielding (train, test) splits as arrays of indices.

        For int/None inputs, if the estimator is a classifier and ``y`` is
        either binary or multiclass, :class:`StratifiedKFold` is used. In all
        other cases, :class:`KFold` is used.

        Refer :ref:`User Guide <cross_validation>` for the various
        cross-validation strategies that can be used here.

        .. versionchanged:: 0.22
            ``cv`` default value if None changed from 3-fold to 5-fold.

    n_jobs : int, default=None
        The number of CPUs to use to do the computation.
        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`
        for more details.

    verbose : int, default=0
        The verbosity level.

    fit_params : dict, default=None
        Parameters to pass to the fit method of the estimator.

    pre_dispatch : int or str, default='2*n_jobs'
        Controls the number of jobs that get dispatched during parallel
        execution. Reducing this number can be useful to avoid an
        explosion of memory consumption when more jobs get dispatched
        than CPUs can process. This parameter can be:

            - None, in which case all the jobs are immediately
              created and spawned. Use this for lightweight and
              fast-running jobs, to avoid delays due to on-demand
              spawning of the jobs

            - An int, giving the exact number of total jobs that are
              spawned

            - A str, giving an expression as a function of n_jobs,
              as in '2*n_jobs'

    return_train_score : bool, default=False
        Whether to include train scores.
        Computing training scores is used to get insights on how different
        parameter settings impact the overfitting/underfitting trade-off.
        However computing the scores on the training set can be computationally
        expensive and is not strictly required to select the parameters that
        yield the best generalization performance.

        .. versionadded:: 0.19

        .. versionchanged:: 0.21
            Default value was changed from ``True`` to ``False``

    return_estimator : bool, default=False
        Whether to return the estimators fitted on each split.

        .. versionadded:: 0.20

    error_score : 'raise' or numeric
        Value to assign to the score if an error occurs in estimator fitting.
        If set to 'raise', the error is raised.
        If a numeric value is given, FitFailedWarning is raised. This parameter
        does not affect the refit step, which will always raise the error.

        .. versionadded:: 0.20

    Returns
    -------
    scores : dict of float arrays of shape (n_splits,)
        Array of scores of the estimator for each run of the cross validation.

        A dict of arrays containing the score/time arrays for each scorer is
        returned. The possible keys for this ``dict`` are:

            ``test_score``
                The score array for test scores on each cv split.
                Suffix ``_score`` in ``test_score`` changes to a specific
                metric like ``test_r2`` or ``test_auc`` if there are
                multiple scoring metrics in the scoring parameter.
            ``train_score``
                The score array for train scores on each cv split.
                Suffix ``_score`` in ``train_score`` changes to a specific
                metric like ``train_r2`` or ``train_auc`` if there are
                multiple scoring metrics in the scoring parameter.
                This is available only if ``return_train_score`` parameter
                is ``True``.
            ``fit_time``
                The time for fitting the estimator on the train
                set for each cv split.
            ``score_time``
                The time for scoring the estimator on the test set for each
                cv split. (Note time for scoring on the train set is not
                included even if ``return_train_score`` is set to ``True``
            ``estimator``
                The estimator objects for each cv split.
                This is available only if ``return_estimator`` parameter
                is set to ``True``.

    Examples
    --------
    >>> from sklearn import datasets, linear_model
    >>> from sklearn.model_selection import cross_validate
    >>> from sklearn.metrics import make_scorer
    >>> from sklearn.metrics import confusion_matrix
    >>> from sklearn.svm import LinearSVC
    >>> diabetes = datasets.load_diabetes()
    >>> X = diabetes.data[:150]
    >>> y = diabetes.target[:150]
    >>> lasso = linear_model.Lasso()

    Single metric evaluation using ``cross_validate``

    >>> cv_results = cross_validate(lasso, X, y, cv=3)
    >>> sorted(cv_results.keys())
    ['fit_time', 'score_time', 'test_score']
    >>> cv_results['test_score']
    array([0.33150734, 0.08022311, 0.03531764])

    Multiple metric evaluation using ``cross_validate``
    (please refer the ``scoring`` parameter doc for more information)

    >>> scores = cross_validate(lasso, X, y, cv=3,
    ...                         scoring=('r2', 'neg_mean_squared_error'),
    ...                         return_train_score=True)
    >>> print(scores['test_neg_mean_squared_error'])
    [-3635.5... -3573.3... -6114.7...]
    >>> print(scores['train_r2'])
    [0.28010158 0.39088426 0.22784852]

    See Also
    ---------
    :func:`sklearn.model_selection.cross_val_score`:
        Run cross-validation for single metric evaluation.

    :func:`sklearn.model_selection.cross_val_predict`:
        Get predictions from each split of cross-validation for diagnostic
        purposes.

    :func:`sklearn.metrics.make_scorer`:
        Make a scorer from a performance metric or loss function.

    """
    X, y, groups = indexable(X, y, groups)

    cv = check_cv(cv, y, classifier=is_classifier(estimators[0]))
    if callable(scoring):
        scorers = {"score": scoring}
    elif scoring is None or isinstance(scoring, str):
        scorers = {"score": check_scoring(estimators[0], scoring=scoring)}
    else:
        try:
            scorers = _check_multimetric_scoring(estimators[0], scoring=scoring)
            # sklearn < 0.24.0 compatibility
            if isinstance(scorers, tuple):
                scorers = scorers[0]
        except KeyError:
            pass

    # We clone the estimator to make sure that all the folds are
    # independent, and that it is pickle-able.

    parallel = Parallel(n_jobs=n_jobs, verbose=verbose, pre_dispatch=pre_dispatch)
    results_org = parallel(
        delayed(_fit_and_score)(
            estimators[i],
            X,
            y,
            scorers,
            train_test_tuple[0],
            train_test_tuple[1],
            verbose,
            None,
            fit_params[i] if isinstance(fit_params, list) else fit_params,
            return_train_score=return_train_score,
            return_times=True,
            return_n_test_samples=True,
            return_estimator=return_estimator,
            error_score=error_score,
        )
        for i, train_test_tuple in enumerate(cv.split(X, y, groups))
    )

    results = _aggregate_score_dicts(results_org)

    ret = {}
    ret["fit_time"] = results["fit_time"]
    ret["score_time"] = results["score_time"]

    if return_estimator:
        ret["estimator"] = results["estimator"]

    test_scores_dict = _normalize_score_results(results["test_scores"])
    if return_train_score:
        train_scores_dict = _normalize_score_results(results["train_scores"])

    for name in test_scores_dict:
        ret["test_%s" % name] = test_scores_dict[name]
        if return_train_score:
            key = "train_%s" % name
            ret[key] = train_scores_dict[name]

    return (ret, results_org)
Ejemplo n.º 20
0
# NOTE: Make sure that the outcome column is labeled 'target' in the data file
tpot_data = pd.read_csv('tpot_data_train.csv', sep=',')
tpot_data.columns = [c.lower() for c in tpot_data.columns.values]
tpot_data = tpot_data[features + labels]
tpot_data = tpot_data.rename(columns={'micro_confirmed': 'target'})

features = tpot_data.drop('target', axis=1)
training_features, testing_features, training_target, testing_target = \
            train_test_split(features, tpot_data['target'], random_state=None)

# Average CV score on the training set was: 0.739462953567469
exported_pipeline = make_pipeline(
    SelectPercentile(score_func=f_classif, percentile=69),
    ExtraTreesClassifier(bootstrap=True,
                         criterion="gini",
                         max_features=0.6000000000000001,
                         min_samples_leaf=12,
                         min_samples_split=14,
                         n_estimators=100))

exported_pipeline.fit(training_features, training_target)
results = exported_pipeline.predict(testing_features)

print(_DEFAULT_METRICS)

scorers, _ = _check_multimetric_scoring(exported_pipeline,
                                        scoring=_DEFAULT_METRICS)
scores = _score(exported_pipeline, testing_features, testing_target, scorers)
print(scores)
scores = _aggregate_score_dicts(scores)
def monkeypatch_fit(self, X, y=None, groups=None, **fit_params):
    if self.fit_params is not None:
        warnings.warn('"fit_params" as a constructor argument was '
                      'deprecated in version 0.19 and will be removed '
                      'in version 0.21. Pass fit parameters to the '
                      '"fit" method instead.', DeprecationWarning)
        if fit_params:
            warnings.warn('Ignoring fit_params passed as a constructor '
                          'argument in favor of keyword arguments to '
                          'the "fit" method.', RuntimeWarning)
        else:
            fit_params = self.fit_params
    estimator = self.estimator
    cv = check_cv(self.cv, y, classifier=is_classifier(estimator))

    scorers, self.multimetric_ = _check_multimetric_scoring(
        self.estimator, scoring=self.scoring)

    if self.multimetric_:
        if self.refit is not False and (
                not isinstance(self.refit, six.string_types) or
                # This will work for both dict / list (tuple)
                self.refit not in scorers):
            raise ValueError("For multi-metric scoring, the parameter "
                             "refit must be set to a scorer key "
                             "to refit an estimator with the best "
                             "parameter setting on the whole data and "
                             "make the best_* attributes "
                             "available for that metric. If this is not "
                             "needed, refit should be set to False "
                             "explicitly. %r was passed." % self.refit)
        else:
            refit_metric = self.refit
    else:
        refit_metric = 'score'

    X, y, groups = indexable(X, y, groups)
    n_splits = cv.get_n_splits(X, y, groups)
    # Regenerate parameter iterable for each fit
    candidate_params = list(self._get_param_iterator())
    n_candidates = len(candidate_params)
    if self.verbose > 0:
        print("Fitting {0} folds for each of {1} candidates, totalling"
              " {2} fits".format(n_splits, n_candidates,
                                 n_candidates * n_splits))

    base_estimator = clone(self.estimator)
    pre_dispatch = self.pre_dispatch

    # ===================================================================
    # BEGIN MONKEYPATCH MODIFICATION
    # ===================================================================

    parallel_cv = cv.split(X, y, groups)

    if type(self.pipeline_split_idx) == int and isinstance(base_estimator,
                                                           Pipeline):
        split_idx = self.pipeline_split_idx

        pre_pipe_steps = base_estimator.steps[:split_idx]
        new_pipe_steps = base_estimator.steps[split_idx:]
        memory = base_estimator.memory

        pre_pipe = Pipeline(pre_pipe_steps, memory)

        if len(new_pipe_steps) == 1:
            est_name, base_estimator = new_pipe_steps[0]
        else:
            est_name = None
            base_estimator = Pipeline(new_pipe_steps, memory)

        fit_params_pre_pipe = {}
        steps_pre_pipe = [tup[0] for tup in pre_pipe_steps]
        fit_param_keys = fit_params.keys()

        for pname in fit_param_keys:
            step, param = pname.split('__', 1)

            if step in steps_pre_pipe:
                fit_params_pre_pipe[pname] = fit_params.pop(pname)
            elif step == est_name:
                fit_params[param] = fit_params.pop(pname)

        if est_name is not None:
            for dic in candidate_params:
                for k in dic:
                    step, param = k.split('__', 1)

                    if step == est_name:
                        dic.update({param: dic.pop(k)})

        try:
            X = pre_pipe.fit_transform(X, **fit_params_pre_pipe)
        except TypeError:
            raise RuntimeError('Pipeline before pipeline_split_idx requires '
                               'fitting to y. Please initialize with an '
                               'earlier index.')

    if self.transform_before_grid and isinstance(base_estimator, Pipeline):
        pipe = base_estimator
        est_name, base_estimator = pipe.steps.pop()
        X_cv, y_cv, parallel_cv = [], [], []
        sample_count = 0

        fit_params_est = {}
        fit_param_keys = fit_params.keys()

        for pname in fit_param_keys:
            step, param = pname.split('__', 1)
            if step == est_name:
                fit_params_est[param] = fit_params.pop(pname)

        for dic in candidate_params:
            for k in dic:
                step, param = k.split('__', 1)

                if step == est_name:
                    dic.update({param: dic.pop(k)})

        for (train, test) in cv.split(X, y, groups):
            if y is not None:
                if isinstance(X, pd.DataFrame):
                    pipe.fit(X.iloc[train], y.iloc[train], **fit_params)
                else:
                    pipe.fit(X[train], y[train], **fit_params)
                y_cv.append(y)
            else:
                if isinstance(X, pd.DataFrame):
                    pipe.fit(X.iloc[train], **fit_params)
                else:
                    pipe.fit(X[train], **fit_params)

            X_cv.append(pipe.transform(X))

            train = train + sample_count
            test = test + sample_count
            sample_count += len(train)
            sample_count += len(test)

            parallel_cv.append((train, test))

        if isinstance(X, pd.DataFrame):
            X = pd.concat(tuple(X_cv))
        else:
            X = np.vstack(tuple(X_cv))

        if y is not None:
            if isinstance(y, pd.Series):
                y = pd.concat(tuple(y_cv))
            else:
                y = np.hstack(tuple(y_cv))

            if 'sample_weight' in fit_params_est:
                samp_weight = fit_params_est['sample_weight']
                fit_params_est['sample_weight'] = np.tile(samp_weight,
                                                          len(y_cv))

        fit_params = fit_params_est

    out = Parallel(
        n_jobs=self.n_jobs, verbose=self.verbose,
        pre_dispatch=pre_dispatch
    )(delayed(monkeypatch_fit_and_score)
      (clone(base_estimator), X, y, scorers, train,
                              test, self.verbose, parameters,
                              fit_params=fit_params,
                              return_train_score=self.return_train_score,
                              return_n_test_samples=True,
                              return_times=True, return_parameters=False,
                              error_score=self.error_score)
      for parameters, (train, test) in product(candidate_params,
                                               parallel_cv))

    # ===================================================================
    # END MONKEYPATCH MODIFICATION
    # ===================================================================

    # if one choose to see train score, "out" will contain train score info
    if self.return_train_score:
        (train_score_dicts, test_score_dicts, test_sample_counts, fit_time,
         score_time) = zip(*out)
    else:
        (test_score_dicts, test_sample_counts, fit_time,
         score_time) = zip(*out)

    # test_score_dicts and train_score dicts are lists of dictionaries and
    # we make them into dict of lists
    test_scores = _aggregate_score_dicts(test_score_dicts)
    if self.return_train_score:
        train_scores = _aggregate_score_dicts(train_score_dicts)

    # TODO: replace by a dict in 0.21
    results = (DeprecationDict() if self.return_train_score == 'warn'
               else {})

    def _store(key_name, array, weights=None, splits=False, rank=False):
        """A small helper to store the scores/times to the cv_results_"""
        # When iterated first by splits, then by parameters
        # We want `array` to have `n_candidates` rows and `n_splits` cols.
        array = np.array(array, dtype=np.float64).reshape(n_candidates,
                                                          n_splits)
        if splits:
            for split_i in range(n_splits):
                # Uses closure to alter the results
                results["split%d_%s"
                        % (split_i, key_name)] = array[:, split_i]

        array_means = np.average(array, axis=1, weights=weights)
        results['mean_%s' % key_name] = array_means
        # Weighted std is not directly available in numpy
        array_stds = np.sqrt(np.average((array -
                                         array_means[:, np.newaxis]) ** 2,
                                        axis=1, weights=weights))
        results['std_%s' % key_name] = array_stds

        if rank:
            results["rank_%s" % key_name] = np.asarray(
                rankdata(-array_means, method='min'), dtype=np.int32)

    _store('fit_time', fit_time)
    _store('score_time', score_time)
    # Use one MaskedArray and mask all the places where the param is not
    # applicable for that candidate. Use defaultdict as each candidate may
    # not contain all the params
    param_results = defaultdict(partial(MaskedArray,
                                        np.empty(n_candidates,),
                                        mask=True,
                                        dtype=object))
    for cand_i, params in enumerate(candidate_params):
        for name, value in params.items():
            # An all masked empty array gets created for the key
            # `"param_%s" % name` at the first occurence of `name`.
            # Setting the value at an index also unmasks that index
            param_results["param_%s" % name][cand_i] = value

    results.update(param_results)
    # Store a list of param dicts at the key 'params'
    results['params'] = candidate_params

    # NOTE test_sample counts (weights) remain the same for all candidates
    test_sample_counts = np.array(test_sample_counts[:n_splits],
                                  dtype=np.int)
    for scorer_name in scorers.keys():
        # Computed the (weighted) mean and std for test scores alone
        _store('test_%s' % scorer_name, test_scores[scorer_name],
               splits=True, rank=True,
               weights=test_sample_counts if self.iid else None)
        if self.return_train_score:
            prev_keys = set(results.keys())
            _store('train_%s' % scorer_name, train_scores[scorer_name],
                   splits=True)

            if self.return_train_score == 'warn':
                for key in set(results.keys()) - prev_keys:
                    message = (
                        'You are accessing a training score ({!r}), '
                        'which will not be available by default '
                        'any more in 0.21. If you need training scores, '
                        'please set return_train_score=True').format(key)
                    # warn on key access
                    results.add_warning(key, message, FutureWarning)

    # For multi-metric evaluation, store the best_index_, best_params_ and
    # best_score_ iff refit is one of the scorer names
    # In single metric evaluation, refit_metric is "score"
    if self.refit or not self.multimetric_:
        self.best_index_ = results["rank_test_%s" % refit_metric].argmin()
        self.best_params_ = candidate_params[self.best_index_]
        self.best_score_ = results["mean_test_%s" % refit_metric][
            self.best_index_]

    if self.refit:
        self.best_estimator_ = clone(base_estimator).set_params(
            **self.best_params_)
        if y is not None:
            self.best_estimator_.fit(X, y, **fit_params)
        else:
            self.best_estimator_.fit(X, **fit_params)

    # Store the only scorer not as a dict for single metric evaluation
    self.scorer_ = scorers if self.multimetric_ else scorers['score']

    self.cv_results_ = results
    self.n_splits_ = n_splits

    return self
Ejemplo n.º 22
0
def fp_cross_val_score(estimator,
                       X_original,
                       y_original,
                       X_fingerprint,
                       y_fingerprint,
                       cv=5,
                       scoring=None,
                       n_jobs=None,
                       verbose=0,
                       pre_dispatch='2*n_jobs',
                       groups=None,
                       fit_params=None,
                       return_train_score=False,
                       return_estimator=False,
                       error_score=np.nan):
    '''
    Perform a custom cross validation on fingerprinted data such that the model is trained on fingerprinted, but
    evaluated on original data
    Beware that the X_original, y_original, X_fingerprint and y_fingerprint are expected to match on index!
    There is no index matching within this method.
    '''
    X_original, y_original = indexable(X_original, y_original)

    cv = check_cv(cv, y_original, classifier=is_classifier(estimator))

    if callable(scoring):
        scorers = scoring
    elif scoring is None or isinstance(scoring, str):
        scorers = check_scoring(estimator, scoring)
    else:
        scorers = _check_multimetric_scoring(estimator, scoring)

    # We clone the estimator to make sure that all the folds are
    # independent, and that it is pickle-able.
    parallel = Parallel(n_jobs=n_jobs,
                        verbose=verbose,
                        pre_dispatch=pre_dispatch)
    results = parallel(
        delayed(fp_fit_and_score)(clone(estimator),
                                  X_original,
                                  y_original,
                                  X_fingerprint,
                                  y_fingerprint,
                                  scorers,
                                  train_original,
                                  test_original,
                                  train_fingerprint,
                                  test_fingerprint,
                                  verbose,
                                  None,
                                  fit_params,
                                  return_train_score=return_train_score,
                                  return_times=True,
                                  return_estimator=return_estimator,
                                  error_score=error_score)
        for (train_original,
             test_original), (train_fingerprint, test_fingerprint) in zip(
                 cv.split(X_original, y_original, groups),
                 cv.split(X_fingerprint, y_fingerprint, groups)))
    # issues might be above. Check this step

    # For callabe scoring, the return type is only know after calling. If the
    # return type is a dictionary, the error scores can now be inserted with
    # the correct key.
    if callable(scoring):
        _insert_error_scores(results, error_score)

    results = _aggregate_score_dicts(results)

    ret = {}
    ret['fit_time'] = results["fit_time"]
    ret['score_time'] = results["score_time"]

    if return_estimator:
        ret['estimator'] = results["estimator"]

    test_scores_dict = _normalize_score_results(results["test_scores"])
    if return_train_score:
        train_scores_dict = _normalize_score_results(results["train_scores"])

    for name in test_scores_dict:
        ret['test_%s' % name] = test_scores_dict[name]
        if return_train_score:
            key = 'train_%s' % name
            ret[key] = train_scores_dict[name]

    return ret