Python _joblib_parallel_argsの例、sklearn.utils.fixes._joblib_parallel_args Pythonの例

コード例 #1

0

ファイルを表示

ファイル: ensemble.py プロジェクト: tridungduong16/Interpretable-Machine-Learning

    def _mean_fn(self, X, fn, acc, slice=None):
        # Helper class that accumulates an arbitrary function in parallel on the accumulator acc
        # and calls the function fn on each tree e and returns the mean output. The function fn
        # should take as input a tree e, and return another function g_e, which takes as input X, check_input
        # If slice is not None, but rather a tuple (start, end), then a subset of the trees from
        # index start to index end will be used. The returned result is essentially:
        # (mean over e in slice)(g_e(X)).
        check_is_fitted(self, 'estimators_')
        # Check data
        X = self._validate_X_predict(X)

        if slice is None:
            estimator_slice = self.estimators_
        else:
            estimator_slice = self.estimators_[slice[0]:slice[1]]

        # Assign chunk of trees to jobs
        n_jobs, _, _ = _partition_estimators(len(estimator_slice), self.n_jobs)
        lock = threading.Lock()
        Parallel(n_jobs=n_jobs,
                 verbose=self.verbose,
                 **_joblib_parallel_args(require="sharedmem"))(
                     delayed(_accumulate_prediction)(fn(e), X, [acc], lock)
                     for e in estimator_slice)
        acc /= len(estimator_slice)
        return acc

コード例 #2

0

ファイルを表示

ファイル: forest.py プロジェクト: mehrdad-shokri/scikit-survival

    def _predict(self, predict_fn, X):
        check_is_fitted(self, 'estimators_')
        # Check data
        X = self._validate_X_predict(X)

        # Assign chunk of trees to jobs
        n_jobs, _, _ = _partition_estimators(self.n_estimators, self.n_jobs)

        # avoid storing the output of every estimator by summing them here
        if predict_fn == "predict":
            y_hat = np.zeros((X.shape[0]), dtype=np.float64)
        else:
            y_hat = np.zeros((X.shape[0], self.n_outputs_), dtype=np.float64)

        def _get_fn(est, name):
            fn = getattr(est, name)
            if name in ("predict_cumulative_hazard_function",
                        "predict_survival_function"):
                fn = partial(fn, return_array=True)
            return fn

        # Parallel loop
        lock = threading.Lock()
        Parallel(n_jobs=n_jobs,
                 verbose=self.verbose,
                 **_joblib_parallel_args(require="sharedmem"))(
                     delayed(_accumulate_prediction)(_get_fn(e, predict_fn), X,
                                                     [y_hat], lock)
                     for e in self.estimators_)

        y_hat /= len(self.estimators_)

        return y_hat

コード例 #3

0

ファイルを表示

    def apply(self, X):
        """
        Apply trees in the forest to X, return leaf indices.

        Parameters
        ----------
        X : {array-like, sparse matrix} of shape (n_samples, n_features)
            The input samples. Internally, its dtype will be converted to
            ``dtype=np.float32``. If a sparse matrix is provided, it will be
            converted into a sparse ``csr_matrix``.

        Returns
        -------
        X_leaves : ndarray of shape (n_samples, n_estimators)
            For each datapoint x in X and for each tree in the forest,
            return the index of the leaf x ends up in.
        """
        X = self._validate_X_predict(X)
        results = Parallel(
            n_jobs=self.n_jobs,
            verbose=self.verbose,
            **_joblib_parallel_args(prefer="threads"),
        )(delayed(tree.apply)(X, check_input=False)
          for tree in self.estimators_)

        return np.array(results).T

コード例 #4

0

ファイルを表示

ファイル: forest.py プロジェクト: zeta1999/Deep-Forest

    def predict_proba(self, X):
        check_is_fitted(self)

        # Assign chunk of trees to jobs
        n_jobs, _, _ = _partition_estimators(self.n_estimators, self.n_jobs)

        # Avoid storing the output of every estimator by summing them here
        all_proba = [
            np.zeros((X.shape[0], j), dtype=np.float64)
            for j in np.atleast_1d(self.n_classes_)
        ]
        lock = threading.Lock()
        Parallel(n_jobs=n_jobs,
                 verbose=self.verbose,
                 **_joblib_parallel_args(require="sharedmem"))(
                     delayed(_accumulate_prediction)
                     (self.features[i], self.thresholds[i], self.childrens[i],
                      self.values[i], X, all_proba, lock)
                     for i in range(self.n_estimators))

        for proba in all_proba:
            proba /= len(self.features)

        if len(all_proba) == 1:
            return all_proba[0]
        else:
            return all_proba

コード例 #5

0

ファイルを表示

ファイル: RandomForest.py プロジェクト: tungtokyo1108/Random_Forests_Forward_Variable_Selection_for_High-Dimensional_Data

    def predict_proba(self, X):

        check_is_fitted(self)

        X = self._validate_X_predict(X)

        n_jobs, _, _ = _partition_estimators(self.n_estimators, self.n_jobs)

        all_proba = [
            np.zeros((X.shape[0], j), dtype=np.float64)
            for j in np.atleast_1d(self.n_classes_)
        ]

        lock = threading.Lock()
        Parallel(n_jobs=-1,
                 verbose=self.verbose,
                 **_joblib_parallel_args(require="sharedmem"))(
                     delayed(accumalate_prediction)(e.predict_proba, X,
                                                    all_proba, lock)
                     for e in self.estimators_)

        for proba in all_proba:
            proba /= len(self.estimators_)

        if len(all_proba) == 1:
            return all_proba[0]
        else:
            return all_proba

コード例 #6

0

ファイルを表示

ファイル: iforest.py プロジェクト: Pandinosaurus/pyod

    def feature_importances_(self):
        """The impurity-based feature importance. The higher, the more
        important the feature. The importance of a feature is computed as the
        (normalized) total reduction of the criterion brought by that feature.
        It is also known as the Gini importance.

        .. warning::
        impurity-based feature importance can be misleading for
        high cardinality features (many unique values). See
        https://scikit-learn.org/stable/modules/generated/sklearn.inspection.permutation_importance.html
        as an alternative.

        Returns
        -------
        feature_importances_ : ndarray of shape (n_features,)
            The values of this array sum to 1, unless all trees are single node
            trees consisting of only the root node, in which case it will be an
            array of zeros.
        """
        check_is_fitted(self)
        all_importances = Parallel(
            n_jobs=self.n_jobs, **_joblib_parallel_args(prefer="threads"))(
                delayed(getattr)(tree, "feature_importances_")
                for tree in self.detector_.estimators_
                if tree.tree_.node_count > 1)

        if not all_importances:
            return np.zeros(self.n_features_in_, dtype=np.float64)

        all_importances = np.mean(all_importances, axis=0, dtype=np.float64)
        return all_importances / np.sum(all_importances)

コード例 #7

0

ファイルを表示

ファイル: essai.py プロジェクト: yiyang-yu/wildwood

def fit(random_state):
    np.random.seed(random_state)
    random_states = randint(np.iinfo(np.intp).max, size=n_jobs)
    trees = Parallel(
        n_jobs=16,
        **_joblib_parallel_args(prefer="threads"),
    )(delayed(f)(out, n, random_state, idx)
      for idx, random_state in zip(range(n_jobs), random_states))

コード例 #8

0

ファイルを表示

ファイル: interval_regressor.py プロジェクト: elejke/mlinsights

    def fit(self, X, y, sample_weight=None):
        """
        Trains the binner and an estimator on every
        bucket.

        Parameters
        ----------
        X: features, *X* is converted into an array if *X* is a dataframe

        y: target

        sample_weight: sample weights

        Returns
        -------
        self: returns an instance of self.

        Attributes
        ----------

        binner_ : binner

        estimators_ : dictionary of estimators, each of them
            mapped to a leave to the tree

        mean_estimator_ : estimator trained on the whole
            datasets in case the binner can find a bucket for
            a new observation

        dim_: dimension of the output
        mean_: average targets
        """
        self.estimators_ = []
        estimators = [clone(self.estimator) for i in range(self.n_estimators)]

        loop = tqdm(range(
            len(estimators))) if self.verbose == 'tqdm' else range(
                len(estimators))
        verbose = 1 if self.verbose == 'tqdm' else (1 if self.verbose else 0)

        def _fit_piecewise_estimator(i, est, X, y, sample_weight, alpha):
            new_size = int(X.shape[0] * alpha + 0.5)
            rnd = numpy.random.randint(0, X.shape[0] - 1, new_size)
            Xr = X[rnd]
            yr = y[rnd]
            sr = sample_weight[rnd] if sample_weight else None
            return est.fit(Xr, yr, sr)

        self.estimators_ = \
            Parallel(n_jobs=self.n_jobs, verbose=verbose,
                     **_joblib_parallel_args(prefer='threads'))(
                delayed(_fit_piecewise_estimator)(
                    i, estimators[i], X, y, sample_weight, self.alpha)
                for i in loop)

        return self

コード例 #9

0

ファイルを表示

ファイル: forest.py プロジェクト: Kinteshi/RFEP

    def predict(self, X, mask):
        """Predict regression target for X.
        The predicted regression target of an input sample is computed as the
        mean predicted regression targets of the trees in the forest.
        Parameters
        ----------
        X : array-like or sparse matrix of shape = [n_samples, n_features]
            The input samples. Internally, its dtype will be converted to
            ``dtype=np.float32``. If a sparse matrix is provided, it will be
            converted into a sparse ``csr_matrix``.

        mask : array que armazena a informaçao de haver ou não haver a arvore na floresta.

        fileCache : Rota onde se encontra as arvores que seram trabalhadas, lembrando que cada colecao e cada fold
            possui um conjunto unico de arvores
        Returns
        -------
        y : array of shape = [n_samples] or [n_samples, n_outputs]
            The predicted values.
        """

        mask = [i == '1' or i == 1 for i in mask]

        self.n_outputs_ = 1

        check_is_fitted(self, 'estimators_')

        # Check data
        X = self._validate_X_predict(X)

        # Assign chunk of trees to jobs
        n_jobs, _, _ = _partition_estimators(self.n_estimators, self.n_jobs)

        # avoid storing the output of every estimator by summing them here
        if self.n_outputs_ > 1:
            y_hat = np.zeros((X.shape[0], self.n_outputs_), dtype=np.float64)
        else:
            y_hat = np.zeros((X.shape[0]), dtype=np.float64)

        # Parallel loop
        lock = threading.Lock()
        Parallel(n_jobs=n_jobs,
                 verbose=self.verbose,
                 **_joblib_parallel_args(require="sharedmem"))(
                     delayed(_accumulate_prediction_mod)(e.predict, X, gene,
                                                         [y_hat], lock)
                     for e, gene in zip(self.estimators_, mask))

        n_trees = 0
        for g in mask:
            if g:
                n_trees += 1

        y_hat /= n_trees

        return y_hat

コード例 #10

0

ファイルを表示

ファイル: forest.py プロジェクト: Kinteshi/RFEP

    def oob_predict(self, X, y, genes, parallel=True):
        """
        Compute out-of-bag prediction.
        """
        X = check_array(X, dtype=DTYPE, accept_sparse='csr')

        n_samples = y.shape[0]

        predictions = np.zeros((n_samples, self.n_outputs_))
        n_predictions = np.zeros((n_samples, self.n_outputs_))

        n_samples_bootstrap = _get_n_samples_bootstrap(n_samples, None)

        genes = [i == '1' or i == 1 for i in genes]

        if parallel:
            # Assign chunk of trees to jobs
            n_jobs, _, _ = _partition_estimators(self.n_estimators,
                                                 self.n_jobs)

            # Parallel loop
            lock = threading.Lock()
            Parallel(n_jobs=n_jobs,
                     verbose=self.verbose,
                     **_joblib_parallel_args(require="sharedmem"))(
                         delayed(_oob_accumulate_prediction)
                         (e.predict, X, gene, [predictions, n_predictions],
                          lock, n_samples, n_samples_bootstrap,
                          self.n_outputs_, e.random_state)
                         for e, gene in zip(self.estimators_, genes))
        else:
            for e, gene in zip(self.estimators_, genes):
                if gene:
                    unsampled_indices = _generate_unsampled_indices(
                        e.random_state, n_samples, n_samples_bootstrap)
                    p_estimator = e.predict(X[unsampled_indices, :],
                                            check_input=False)

                    if self.n_outputs_ == 1:
                        p_estimator = p_estimator[:, np.newaxis]

                    predictions[unsampled_indices, :] += p_estimator
                    n_predictions[unsampled_indices, :] += 1
                else:
                    pass

        if (n_predictions == 0).any():
            warn("Some inputs do not have OOB scores. "
                 "This probably means too few trees were used "
                 "to compute any reliable oob estimates.")
            n_predictions[n_predictions == 0] = 1

        predictions /= n_predictions
        return predictions

コード例 #11

0

ファイルを表示

ファイル: data.py プロジェクト: Xiangyan93/AIMS

 def from_df(cls, args: CommonArgs, df: pd.DataFrame):
     args.update_columns(df.keys().to_list())
     if args.group_reading:
         pure_columns = args.pure_columns or []
         mixture_columns = args.mixture_columns or []
         reaction_columns = args.reaction_columns or []
         n1 = len(pure_columns)
         n2 = len(mixture_columns)
         n3 = len(reaction_columns)
         groups = df.groupby(pure_columns + mixture_columns +
                             reaction_columns)
         data = Parallel(
             n_jobs=args.n_jobs,
             verbose=True,
             **_joblib_parallel_args(prefer='processes'))(
                 delayed(cls.get_subDataset)
                 ((lambda x: [x] if x.__class__ == str else tolist(x))(g[0])
                  [0:n1], (lambda x: tolist([x]) if x.__class__ == str else
                           tolist(x))(g[0])[n1:n1 + n2], args.mixture_type,
                  (lambda x: [x] if x.__class__ == str else tolist(x)
                   )(g[0])[n1 + n2:n1 + n2 + n3], args.reaction_type,
                  to_numpy(g[1][args.target_columns]),
                  to_numpy(g[1][
                      args.feature_columns]), args.features_generator)
                 for g in groups)
     else:
         data = Parallel(
             n_jobs=args.n_jobs,
             verbose=True,
             **_joblib_parallel_args(prefer='processes'))(
                 delayed(cls.get_subDataset)(
                     tolist(df.iloc[i].get(args.pure_columns)),
                     tolist(df.iloc[i].get(args.mixture_columns)),
                     args.mixture_type,
                     tolist(df.iloc[i].get(args.reaction_columns)),
                     args.reaction_type,
                     to_numpy(df.iloc[i:i + 1][args.target_columns]),
                     to_numpy(df.iloc[i:i + 1].get(args.feature_columns)),
                     args.features_generator,
                 ) for i in df.index)
     return cls(data)

コード例 #12

0

ファイルを表示

def _decision_path(isolation_forest, X, n_jobs):
    # code from sklearn RandomForest.
    X = check_array(X, dtype=DTYPE, accept_sparse='csr')
    indicators = Parallel(n_jobs=n_jobs,
                          **_joblib_parallel_args(prefer='threads'))(
                              delayed(parallel_helper)(
                                  tree, 'decision_path', X, check_input=False)
                              for tree in isolation_forest.estimators_)
    n_nodes = [0]
    n_nodes.extend([i.shape[1] for i in indicators])
    n_nodes_ptr = np.array(n_nodes).cumsum()
    indicators = sparse_hstack(indicators).tocsr()
    return indicators, n_nodes_ptr

コード例 #13

0

ファイルを表示

ファイル: cbpm.py プロジェクト: juaml/julearn

    def fit(self, X, y):

        X, y = self._validate_data(X, y)

        self.X_y_correlations_ = np.array(
            Parallel(n_jobs=self.n_jobs,
                     verbose=self.verbose,
                     **_joblib_parallel_args())(
                         delayed(self.corr_method)(X[:, X_idx], y)
                         for X_idx in range(X.shape[1])))

        self.create_masks(y)

        return self

コード例 #14

0

ファイルを表示

ファイル: RandomForest.py プロジェクト: tungtokyo1108/Random_Forests_Forward_Variable_Selection_for_High-Dimensional_Data

    def feature_importances_(self):

        check_is_fitted(self)

        all_importances = Parallel(
            n_jobs=-1, **_joblib_parallel_args(prefer='threads'))(
                delayed(getattr)(tree, 'feature_importances_')
                for tree in self.estimators_ if tree.tree_.node_count > 1)

        if not all_importances:
            return np.zeros(self.n_features_, dtype=np.float64)

        all_importances = np.mean(all_importances, axis=0, dtype=np.float64)

        return all_importances / np.sum(all_importances)

コード例 #15

0

ファイルを表示

    def predict(self, X):
        """
        Predict regression target for X.

        The predicted regression target of an input sample is computed as the
        mean predicted regression targets of the trees in the forest.

        Parameters
        ----------
        X : {array-like, sparse matrix} of shape (n_samples, n_features)
            The input samples. Internally, its dtype will be converted to
            ``dtype=np.float32``. If a sparse matrix is provided, it will be
            converted into a sparse ``csr_matrix``.

        Returns
        -------
        y : ndarray of shape (n_samples,) or (n_samples, n_outputs)
            The predicted values.
        """
        check_is_fitted(self)

        # Assign chunk of trees to jobs
        n_jobs, _, _ = _partition_estimators(self.n_estimators, self.n_jobs)

        # avoid storing the output of every estimator by summing them here
        y_hat = np.zeros((X.shape[0], 1), dtype=np.float64)

        # Parallel loop
        lock = threading.Lock()
        Parallel(
            n_jobs=n_jobs,
            verbose=self.verbose,
            **_joblib_parallel_args(require="sharedmem")
        )(
            delayed(_accumulate_prediction)(
                self.features[i],
                self.thresholds[i],
                self.childrens[i],
                self.values[i],
                X,
                [y_hat],
                lock,
            )
            for i in range(self.n_estimators)
        )

        y_hat /= self.n_estimators
        return y_hat

コード例 #16

0

ファイルを表示

    def predict_proba_trees(self, X):
        check_is_fitted(self)
        # Check data
        X = self._validate_X_predict(X)
        # TODO: we can also avoid data binning for predictions...
        X_binned = self._bin_data(X, is_training_data=False)
        n_samples, n_features = X.shape
        n_estimators = len(self.trees)
        n_jobs, _, _ = _partition_estimators(self.n_estimators, self.n_jobs)
        probas = np.empty((n_estimators, n_samples, n_features))

        lock = threading.Lock()
        Parallel(
            n_jobs=n_jobs,
            verbose=self.verbose,
            **_joblib_parallel_args(require="sharedmem"),
        )(delayed(_get_tree_prediction)(e.predict_proba, X_binned, probas,
                                        lock, tree_idx)
          for tree_idx, e in enumerate(self.trees))
        return probas

コード例 #17

0

ファイルを表示

    def predict(self, X):
        """
        Predict regression target for X.
        The predicted regression target of an input sample is computed as the
        mean predicted regression targets of the trees in the forest.
        Parameters
        ----------
        X : array-like or sparse matrix of shape (n_samples, n_features)
            The input samples. Internally, its dtype will be converted to
            ``dtype=np.float32``. If a sparse matrix is provided, it will be
            converted into a sparse ``csr_matrix``.
        Returns
        -------
        y : array-like of shape (n_samples,) or (n_samples, n_outputs)
            The predicted values.
        """
        check_is_fitted(self)
        # Check data
        X = self._validate_X_predict(X)

        # Assign chunk of trees to jobs
        n_jobs, _, _ = _partition_estimators(self.n_estimators, self.n_jobs)

        # Parallel loop
        # Store the output of every estimator in order to compute confidence intervals
        y_hat = Parallel(n_jobs=self.n_jobs,
                         verbose=self.verbose,
                         **_joblib_parallel_args(require="sharedmem"))(
                             delayed(_accumulate_prediction)(
                                 e.predict, X, self.minimum_value)
                             for e in self.forest.estimators_)

        y_hat_below = np.percentile(y_hat,
                                    self.confidence_interval_lower,
                                    axis=0)
        y_hat_above = np.percentile(y_hat,
                                    self.confidence_interval_upper,
                                    axis=0)

        return np.dstack((y_hat_below, y_hat_above))

コード例 #18

0

ファイルを表示

ファイル: piecewise_estimator.py プロジェクト: klegaard/mlinsights

    def _apply_predict_method(self, X, method, parallelized, dimout):
        """
        Generic *predict* method, works for *predict_proba* and
        *decision_function* as well.
        """
        check_is_fitted(self, 'estimators_')
        if len(self.estimators_) == 0:
            raise RuntimeError(
                "Estimator was apparently fitted but contains no estimator.")
        if not hasattr(self.estimators_[0], method):
            raise TypeError("Estimator {} does not have method '{}'.".format(
                type(self.estimators_[0]), method))
        if isinstance(X, pandas.DataFrame):
            X = X.values

        association = self.transform_bins(X)

        indpred = Parallel(n_jobs=self.n_jobs,
                           **_joblib_parallel_args(prefer='threads'))(
                               delayed(parallelized)(i, model, X, association)
                               for i, model in enumerate(self.estimators_))

        pred = numpy.zeros((X.shape[0],
                            dimout) if dimout > 1 else (X.shape[0], ))
        indall = numpy.empty((X.shape[0], ))
        indall[:] = False
        for ind, p in indpred:
            if ind is None:
                continue
            pred[ind] = p
            indall = numpy.logical_or(indall, ind)  # pylint: disable=E1111

        # no in a bucket
        indall = numpy.logical_not(indall)  # pylint: disable=E1111
        Xmissed = X[indall]
        if Xmissed.shape[0] > 0:
            meth = getattr(self.mean_estimator_, method)
            missed = meth(Xmissed)
            pred[indall] = missed
        return pred

コード例 #19

0

ファイルを表示

ファイル: forest.py プロジェクト: Kinteshi/RFEP

    def oob_predict_buffer(self, X, y, parallel=True):

        X = check_array(X, dtype=DTYPE, accept_sparse='csr')

        n_samples = X.shape[0]

        n_samples_bootstrap = _get_n_samples_bootstrap(n_samples, None)

        prediction_buffer = np.zeros(
            (n_samples_bootstrap, len(self.estimators_)), dtype='float32')

        prediction_buffer[:, :] = np.nan

        if parallel:
            # Assign chunk of trees to jobs
            n_jobs, _, _ = _partition_estimators(self.n_estimators,
                                                 self.n_jobs)

            # Parallel loop
            lock = threading.Lock()
            Parallel(
                n_jobs=n_jobs,
                verbose=self.verbose,
                **_joblib_parallel_args(require="sharedmem"))(
                    delayed(_oob_bufferize_prediction)
                    (e.predict, X, estimator, prediction_buffer, lock,
                     n_samples, n_samples_bootstrap, self.n_outputs_,
                     e.random_state) for e, estimator in zip(
                         self.estimators_, range(0, len(self.estimators_))))
        else:
            for e, estimator in zip(self.estimators_,
                                    range(0, len(self.estimators_))):
                unsampled_indices = _generate_unsampled_indices(
                    e.random_state, n_samples, n_samples_bootstrap)
                p_estimator = e.predict(X[unsampled_indices, :],
                                        check_input=False)
                prediction_buffer[unsampled_indices, estimator] = p_estimator

        self.__buffer = prediction_buffer

コード例 #20

0

ファイルを表示

ファイル: data.py プロジェクト: Xiangyan93/AIMS

 def from_public(cls, args: CommonArgs):
     if args.data_public == 'qm7':
         qm_data = QM7(ase=True)
     elif args.data_public == 'qm9':
         qm_data = QM9(ase=True)
     else:
         raise RuntimeError(f'Unknown public data set {args.data_public}')
     data = Parallel(
         n_jobs=args.n_jobs,
         verbose=True,
         **_joblib_parallel_args(prefer='processes'))(
             delayed(cls.get_subDataset)(
                 [],
                 [],
                 args.mixture_type,
                 [],
                 args.reaction_type,
                 tolist(qm_data.iloc[i].get('atoms')),
                 to_numpy(qm_data.iloc[i:i + 1][args.target_columns]),
                 to_numpy(qm_data.iloc[i:i + 1].get(args.feature_columns)),
                 args.features_generator,
             ) for i in qm_data.index)
     return cls(data)

コード例 #21

0

ファイルを表示

def _forest_predict_var(forest, X_test, n_jobs):
    """Helper function to accumulate predictions and their variances.

    Parameters
    ----------
    forest : RandomForestRegressor
        Regressor object.

    X_test : ndarray, shape (n_test_samples,)
        The design matrix for testing data.

    n_jobs : int or None, optional (default=None)
        The number of jobs to run in parallel. ``None`` means 1. ``-1`` means
        use all processors.
    """
    check_is_fitted(forest)
    X_test = forest._validate_X_predict(X_test)

    n_jobs, _, _ = _partition_estimators(forest.n_estimators, n_jobs)

    y_hat = np.zeros((X_test.shape[0]), dtype=np.float64)
    y_var = np.zeros((X_test.shape[0]), dtype=np.float64)

    # Parallel loop
    lock = threading.Lock()
    Parallel(n_jobs=n_jobs,
             verbose=forest.verbose,
             **_joblib_parallel_args(require='sharedmem'))(
                 delayed(_accumulate_predictions_and_var)(e.predict, X_test,
                                                          [y_hat, y_var], lock)
                 for e in forest.estimators_)

    y_hat /= len(forest.estimators_)
    y_var /= len(forest.estimators_)
    y_var -= y_hat**2

    return [y_hat, y_var]

コード例 #22

0

ファイルを表示

    def fit(self, X, y=None, *, groups=None, **fit_kwargs):

        cv = check_cv(self.cv)

        jobs = (delayed(self._compute_score_path)(X, y, train, test,
                                                  **fit_kwargs)
                for train, test in cv.split(X, y, groups=groups))

        score_path = Parallel(n_jobs=self.n_jobs,
                              verbose=self.verbose_cv,
                              **_joblib_parallel_args(prefer='threads'))(jobs)

        self.mean_scores_ = np.mean(score_path, axis=0)

        self.best_index_ = np.argmin(self.mean_scores_)
        self.best_score_ = np.min(self.mean_scores_)
        self.best_hyperparams_ = self.hyperparams_grid_[self.best_index_]

        for name in self.hyperparam_names:
            setattr(self.model, name, self.best_hyperparams_[name])

        self.model.fit(X, y, **fit_kwargs)

        return self

コード例 #23

0

ファイルを表示

ファイル: forest.py プロジェクト: zeta1999/Deep-Forest

    def fit(self, X, y, sample_weight=None):
        """
        Build a forest of trees from the training set (X, y).

        Parameters
        ----------
        X : {array-like, sparse matrix} of shape (n_samples, n_features)
            The training input samples. Internally, its dtype will be converted
            to ``dtype=np.float32``. If a sparse matrix is provided, it will be
            converted into a sparse ``csc_matrix``.

        y : array-like of shape (n_samples,) or (n_samples, n_outputs)
            The target values (class labels in classification, real numbers in
            regression).

        sample_weight : array-like of shape (n_samples,), default=None
            Sample weights. If None, then samples are equally weighted. Splits
            that would create child nodes with net zero or negative weight are
            ignored while searching for a split in each node. In the case of
            classification, splits are also ignored if they would result in any
            single class carrying a negative weight in either child node.

        Returns
        -------
        self : object
        """
        # Validate or convert input data
        if issparse(y):
            raise ValueError(
                "sparse multilabel-indicator for y is not supported.")

        if sample_weight is not None:
            sample_weight = _check_sample_weight(sample_weight, X)

        if issparse(X):
            # Pre-sort indices to avoid that each individual tree of the
            # ensemble sorts the indices.
            X.sort_indices()

        # Remap output
        n_samples, self.n_features_ = X.shape

        y = np.atleast_1d(y)
        if y.ndim == 2 and y.shape[1] == 1:
            warn(
                "A column-vector y was passed when a 1d array was"
                " expected. Please change the shape of y to "
                "(n_samples,), for example using ravel().",
                DataConversionWarning,
                stacklevel=2)

        if y.ndim == 1:
            # reshape is necessary to preserve the data contiguity against vs
            # [:, np.newaxis] that does not.
            y = np.reshape(y, (-1, 1))

        self.n_outputs_ = y.shape[1]
        y, expanded_class_weight = self._validate_y_class_weight(y)

        if getattr(y, "dtype", None) != DOUBLE or not y.flags.contiguous:
            y = np.ascontiguousarray(y, dtype=DOUBLE)

        # Get bootstrap sample size
        n_samples_bootstrap = _get_n_samples_bootstrap(
            n_samples=X.shape[0], max_samples=self.max_samples)

        # Check parameters
        self._validate_estimator()
        random_state = check_random_state(self.random_state)
        n_jobs, _, _ = _partition_estimators(self.n_estimators, self.n_jobs)

        trees = [
            self._make_estimator(append=False, random_state=random_state)
            for i in range(self.n_estimators)
        ]

        # Pre-allocate OOB estimations
        oob_decision_function = np.zeros(
            (n_samples, self.classes_[0].shape[0]))

        lock = threading.Lock()
        rets = Parallel(
            n_jobs=n_jobs,
            verbose=self.verbose,
            **_joblib_parallel_args(prefer='threads', require="sharedmem"))(
                delayed(_parallel_build_trees)(
                    t, X, y, n_samples_bootstrap, sample_weight,
                    oob_decision_function, lock) for i, t in enumerate(trees))

        # Collect newly grown trees
        for feature, threshold, children, value in rets:

            # No check on feature and threshold since 1-D array is always
            # C-aligned and F-aligned.
            self.features.append(feature)
            self.thresholds.append(threshold)
            self.childrens.append(children)
            self.values.append(value)

        # Check the OOB predictions
        if (oob_decision_function.sum(axis=1) == 0).any():
            warn("Some inputs do not have OOB predictions. "
                 "This probably means too few trees were used "
                 "to compute any reliable oob predictions.")

        prediction = (oob_decision_function /
                      oob_decision_function.sum(axis=1)[:, np.newaxis])

        self.oob_decision_function_ = prediction

        # Decapsulate classes_ attributes
        if hasattr(self, "classes_") and self.n_outputs_ == 1:
            self.n_classes_ = self.n_classes_[0]
            self.classes_ = self.classes_[0]

        return self

コード例 #24

0

ファイルを表示

ファイル: forest.py プロジェクト: mehrdad-shokri/scikit-survival

    def fit(self, X, y, sample_weight=None):
        """Build a forest of survival trees from the training set (X, y).

        Parameters
        ----------
        X : array-like, shape = (n_samples, n_features)
            Data matrix

        y : structured array, shape = (n_samples,)
            A structured array containing the binary event indicator
            as first field, and time of event or time of censoring as
            second field.

        Returns
        -------
        self
        """
        X, event, time = check_arrays_survival(X, y)

        self.n_features_ = X.shape[1]
        time = time.astype(np.float64)
        self.event_times_ = np.unique(time[event])
        self.n_outputs_ = self.event_times_.shape[0]

        y_numeric = np.empty((X.shape[0], 2), dtype=np.float64)
        y_numeric[:, 0] = time
        y_numeric[:, 1] = event.astype(np.float64)

        # Get bootstrap sample size
        n_samples_bootstrap = _get_n_samples_bootstrap(
            n_samples=X.shape[0], max_samples=self.max_samples)

        # Check parameters
        self._validate_estimator()

        if not self.bootstrap and self.oob_score:
            raise ValueError("Out of bag estimation only available"
                             " if bootstrap=True")

        random_state = check_random_state(self.random_state)

        if not self.warm_start or not hasattr(self, "estimators_"):
            # Free allocated memory, if any
            self.estimators_ = []

        n_more_estimators = self.n_estimators - len(self.estimators_)

        if n_more_estimators < 0:
            raise ValueError("n_estimators=%d must be larger or equal to "
                             "len(estimators_)=%d when warm_start==True" %
                             (self.n_estimators, len(self.estimators_)))

        elif n_more_estimators == 0:
            warnings.warn("Warm-start fitting without increasing n_estimators "
                          "does not fit new trees.")
        else:
            if self.warm_start and len(self.estimators_) > 0:
                # We draw from the random state to get the random state we
                # would have got if we hadn't used a warm_start.
                random_state.randint(MAX_INT, size=len(self.estimators_))

            trees = [
                self._make_estimator(append=False, random_state=random_state)
                for i in range(n_more_estimators)
            ]

            # Parallel loop: we prefer the threading backend as the Cython code
            # for fitting the trees is internally releasing the Python GIL
            # making threading more efficient than multiprocessing in
            # that case. However, for joblib 0.12+ we respect any
            # parallel_backend contexts set at a higher level,
            # since correctness does not rely on using threads.
            trees = Parallel(n_jobs=self.n_jobs,
                             verbose=self.verbose,
                             **_joblib_parallel_args(prefer='threads'))(
                                 delayed(_parallel_build_trees)(
                                     t,
                                     self,
                                     X, (y_numeric, self.event_times_),
                                     sample_weight,
                                     i,
                                     len(trees),
                                     verbose=self.verbose,
                                     n_samples_bootstrap=n_samples_bootstrap)
                                 for i, t in enumerate(trees))

            # Collect newly grown trees
            self.estimators_.extend(trees)

        if self.oob_score:
            self._set_oob_score(X, (event, time))

        return self

コード例 #25

0

ファイルを表示

def test_joblib_parallel_args(monkeypatch, joblib_version):
    import joblib

    monkeypatch.setattr(joblib, "__version__", joblib_version)

    if joblib_version == "0.12.0":
        # arguments are simply passed through
        assert _joblib_parallel_args(prefer="threads") == {"prefer": "threads"}
        assert _joblib_parallel_args(prefer="processes", require=None) == {
            "prefer": "processes",
            "require": None,
        }
        assert _joblib_parallel_args(non_existing=1) == {"non_existing": 1}
    elif joblib_version == "0.11":
        # arguments are mapped to the corresponding backend
        assert _joblib_parallel_args(prefer="threads") == {
            "backend": "threading"
        }
        assert _joblib_parallel_args(prefer="processes") == {
            "backend": "multiprocessing"
        }
        with pytest.raises(ValueError):
            _joblib_parallel_args(prefer="invalid")
        assert _joblib_parallel_args(prefer="processes",
                                     require="sharedmem") == {
                                         "backend": "threading"
                                     }
        with pytest.raises(ValueError):
            _joblib_parallel_args(require="invalid")
        with pytest.raises(NotImplementedError):
            _joblib_parallel_args(verbose=True)
    else:
        raise ValueError

コード例 #26

0

ファイルを表示

ファイル: piecewise_estimator.py プロジェクト: xadupre/mlinsights

    def fit(self, X, y, sample_weight=None):
        """
        Trains the binner and an estimator on every
        bucket.

        :param X: features, *X* is converted into an array if *X* is a dataframe
        :param y: target
        :param sample_weight: sample weights
        :return: self: returns an instance of self.

        Fitted attributes:

        * `binner_`: binner
        * `estimators_`: dictionary of estimators, each of them
            mapped to a leave to the tree
        * `mean_estimator_`: estimator trained on the whole
            datasets in case the binner can find a bucket for
            a new observation
        * `dim_`: dimension of the output
        * `mean_`: average targets
        """
        if isinstance(X, pandas.DataFrame):
            X = X.values
        if isinstance(X, list):
            raise TypeError(  # pragma: no cover
                "X cannot be a list.")
        binner = clone(self.binner)
        if sample_weight is None:
            self.binner_ = binner.fit(X, y)
        else:
            self.binner_ = binner.fit(X, y, sample_weight=sample_weight)

        association, self.mapping_, self.leaves_ = self._mapping_train(
            X, self.binner_)

        estimators = [clone(self.estimator) for i in self.mapping_]

        loop = (tqdm(range(len(estimators)))
                if self.verbose == 'tqdm' else range(len(estimators)))
        verbose = 1 if self.verbose == 'tqdm' else (1 if self.verbose else 0)

        self.mean_estimator_ = clone(self.estimator).fit(X, y, sample_weight)
        nb_classes = (None if not hasattr(self.mean_estimator_, 'classes_')
                      else len(set(self.mean_estimator_.classes_)))

        if hasattr(self, 'random_state') and self.random_state is not None:  # pylint: disable=E1101
            rnd = numpy.random.RandomState(  # pylint: disable=E1101
                self.random_state)  # pylint: disable=E1101
        else:
            rnd = None

        self.estimators_ = \
            Parallel(n_jobs=self.n_jobs, verbose=verbose,
                     **_joblib_parallel_args(prefer='threads'))(
                delayed(_fit_piecewise_estimator)(
                    i, estimators[i], X, y, sample_weight, association, nb_classes, rnd)
                for i in loop)

        self.dim_ = 1 if len(y.shape) == 1 else y.shape[1]
        if hasattr(self.estimators_[0], 'classes_'):
            self.classes_ = self.estimators_[0].classes_
        return self

コード例 #27

0

ファイルを表示

ファイル: test_fixes.py プロジェクト: allefpablo/scikit-learn

def test_joblib_parallel_args(monkeypatch, joblib_version):
    import sklearn.utils._joblib
    monkeypatch.setattr(sklearn.utils._joblib, '__version__', joblib_version)

    if joblib_version == '0.12.0':
        # arguments are simply passed through
        assert _joblib_parallel_args(prefer='threads') == {'prefer': 'threads'}
        assert _joblib_parallel_args(prefer='processes', require=None) == {
                    'prefer': 'processes', 'require': None}
        assert _joblib_parallel_args(non_existing=1) == {'non_existing': 1}
    elif joblib_version == '0.11':
        # arguments are mapped to the corresponding backend
        assert _joblib_parallel_args(prefer='threads') == {
                    'backend': 'threading'}
        assert _joblib_parallel_args(prefer='processes') == {
                    'backend': 'multiprocessing'}
        with pytest.raises(ValueError):
            _joblib_parallel_args(prefer='invalid')
        assert _joblib_parallel_args(
                prefer='processes', require='sharedmem') == {
                    'backend': 'threading'}
        with pytest.raises(ValueError):
            _joblib_parallel_args(require='invalid')
        with pytest.raises(NotImplementedError):
            _joblib_parallel_args(verbose=True)
    else:
        raise ValueError

コード例 #28

0

ファイルを表示

    def fit(self, X, y, sample_weight=None):
        """Build a forest of trees from the training set (X, y).

        Parameters
        ----------
        X : {array-like, sparse matrix} of shape (n_samples, n_features)
            The training input samples. Internally, its dtype will be converted
            to ``dtype=np.float32``. If a sparse matrix is provided, it will be
            converted into a sparse ``csc_matrix``.

        y : array-like of shape (n_samples,) or (n_samples, n_outputs)
            The target values (class labels in classification, real numbers in
            regression).

        sample_weight : array-like of shape (n_samples,)
            Sample weights. If None, then samples are equally weighted. Splits
            that would create child nodes with net zero or negative weight are
            ignored while searching for a split in each node. In the case of
            classification, splits are also ignored if they would result in any
            single class carrying a negative weight in either child node.

        Returns
        -------
        self : object
            The fitted instance.
        """

        # Validate or convert input data
        if issparse(y):
            raise ValueError(
                "sparse multilabel-indicator for y is not supported.")
        X, y = self._validate_data(X,
                                   y,
                                   multi_output=True,
                                   accept_sparse="csc",
                                   dtype=DTYPE)
        if sample_weight is not None:
            sample_weight = _check_sample_weight(sample_weight, X)
        self._n_features = X.shape[1]

        if issparse(X):
            # Pre-sort indices to avoid that each individual tree of the
            # ensemble sorts the indices.
            X.sort_indices()

        y = np.atleast_1d(y)
        if y.ndim == 2 and y.shape[1] == 1:
            warn(
                "A column-vector y was passed when a 1d array was"
                " expected. Please change the shape of y to "
                "(n_samples,), for example using ravel().",
                DataConversionWarning,
                stacklevel=2,
            )

        if y.ndim == 1:
            # reshape is necessary to preserve the data contiguity against vs
            # [:, np.newaxis] that does not.
            y = np.reshape(y, (-1, 1))

        self.n_outputs_ = y.shape[1]

        y_encoded, expanded_class_weight = self._validate_y_class_weight(y)

        if getattr(y, "dtype", None) != DOUBLE or not y.flags.contiguous:
            y_encoded = np.ascontiguousarray(y_encoded, dtype=DOUBLE)

        if isinstance(self.sampling_strategy, dict):
            self._sampling_strategy = {
                np.where(self.classes_[0] == key)[0][0]: value
                for key, value in check_sampling_strategy(
                    self.sampling_strategy,
                    y,
                    "under-sampling",
                ).items()
            }
        else:
            self._sampling_strategy = self.sampling_strategy

        if expanded_class_weight is not None:
            if sample_weight is not None:
                sample_weight = sample_weight * expanded_class_weight
            else:
                sample_weight = expanded_class_weight

        # Get bootstrap sample size
        n_samples_bootstrap = _get_n_samples_bootstrap(
            n_samples=X.shape[0], max_samples=self.max_samples)

        # Check parameters
        self._validate_estimator()

        if not self.bootstrap and self.oob_score:
            raise ValueError(
                "Out of bag estimation only available if bootstrap=True")

        random_state = check_random_state(self.random_state)

        if not self.warm_start or not hasattr(self, "estimators_"):
            # Free allocated memory, if any
            self.estimators_ = []
            self.samplers_ = []
            self.pipelines_ = []

        n_more_estimators = self.n_estimators - len(self.estimators_)

        if n_more_estimators < 0:
            raise ValueError("n_estimators=%d must be larger or equal to "
                             "len(estimators_)=%d when warm_start==True" %
                             (self.n_estimators, len(self.estimators_)))

        elif n_more_estimators == 0:
            warn("Warm-start fitting without increasing n_estimators does not "
                 "fit new trees.")
        else:
            if self.warm_start and len(self.estimators_) > 0:
                # We draw from the random state to get the random state we
                # would have got if we hadn't used a warm_start.
                random_state.randint(MAX_INT, size=len(self.estimators_))

            trees = []
            samplers = []
            for _ in range(n_more_estimators):
                tree, sampler = self._make_sampler_estimator(
                    random_state=random_state)
                trees.append(tree)
                samplers.append(sampler)

            # Parallel loop: we prefer the threading backend as the Cython code
            # for fitting the trees is internally releasing the Python GIL
            # making threading more efficient than multiprocessing in
            # that case. However, we respect any parallel_backend contexts set
            # at a higher level, since correctness does not rely on using
            # threads.
            samplers_trees = Parallel(
                n_jobs=self.n_jobs,
                verbose=self.verbose,
                **_joblib_parallel_args(prefer="threads"),
            )(delayed(_local_parallel_build_trees)(
                s,
                t,
                self,
                X,
                y_encoded,
                sample_weight,
                i,
                len(trees),
                verbose=self.verbose,
                class_weight=self.class_weight,
                n_samples_bootstrap=n_samples_bootstrap,
            ) for i, (s, t) in enumerate(zip(samplers, trees)))
            samplers, trees = zip(*samplers_trees)

            # Collect newly grown trees
            self.estimators_.extend(trees)
            self.samplers_.extend(samplers)

            # Create pipeline with the fitted samplers and trees
            self.pipelines_.extend([
                make_pipeline(deepcopy(s), deepcopy(t))
                for s, t in zip(samplers, trees)
            ])

        if self.oob_score:
            self._set_oob_score(X, y_encoded)

        # Decapsulate classes_ attributes
        if hasattr(self, "classes_") and self.n_outputs_ == 1:
            self.n_classes_ = self.n_classes_[0]
            self.classes_ = self.classes_[0]

        return self

コード例 #29

0

ファイルを表示

ファイル: meda.py プロジェクト: PowderL/Local-variable-importance-from-a-global-model

 def compute_feature_importance(self,
                                x,
                                y,
                                partition_feature=None,
                                norm=True,
                                n_jobs=None):
     '''
     :param x: input X of data and must be Pandas.core.frame.DataFrame or Pandas.core.series.Series
     :param y: input Y of data and do not need specify the type, but must be supported in numpy
     :param partition_feature: used for partitioning the data into local data subspaces and must
     be a column of data that can be hashed, but is optional. You can partition the data in advance
     instead and input feature subspace one by one.
     For example, if you want to compute local variable importance for each
     day, you only need to let partition_feature = day of year (1-365).
     Or input the feature subspace for each day one by one.
     :param norm: Yes or No normalise the output leading to the sum of each row equals to one..
     :param n_jobs: The number of jobs paralleling at the same time. Please refer to class Parallel
     in package sklearn for more detailed information.
     :return: local variable importance
     '''
     # to obtain the names of variables
     if not isinstance(x, Series) and not isinstance(x, DataFrame):
         raise TypeError(
             "{0} must be pandas.core.frame.DataFrame or pandas.core.series.Series not {1}"
             .format(x, type(x)))
     columns = x.columns
     # convert input X into numpy.array
     x = array(x, dtype=float64)
     # convert input Y to 1-D array
     y = array(y).ravel()
     # to obtain the number of variables
     self.FN = x.shape[1]
     # Produce data_choose array.This array contains bool values to choose rows for each feature subspace dataset
     if type(partition_feature) != type(None):
         partition_factor = list(partition_feature)
         # use set structure to extract factors
         partition_factor_set = set(partition_factor)
         partition_factor_list = list(partition_factor_set)
         # to obtain the number of group attribute
         self.FL = len(partition_factor_list)
         partition_factor_arr = np.array(partition_factor_list).reshape(
             self.FL, 1)
         # for each factor find out the rows of input group_by which is equal to it
         data_choose_bool = partition_factor_arr == partition_factor
     else:
         # if there is no group_by inputted, using all input rows
         self.FL = 1
         partition_factor_list = None
         data_choose_bool = np.ones((1, x.shape[0])) == 1
     # Parallel each tree. It is inherited from sklearn, you can refer to sklearn more detailed description.
     indicators = Parallel(
         n_jobs=n_jobs,
         verbose=self.verbose,
         max_nbytes='1M',
         **_joblib_parallel_args(prefer='threads'))(
             delayed(self.__traverse__)(tree, x, y, data_choose_bool)
             for tree in self.estimators_)  # traverse each tree in a forest
     feature_importance_trees = vstack(
         indicators
     )  # Vertically stack the arrays returned by traverse forming a
     feature_importance_forest = np.average(
         feature_importance_trees,
         axis=0)  # To compute averaged feature importance
     if not isinstance(norm, bool):
         raise TypeError('{0} must be True or False not {1}'.format(
             norm, type(norm)))
     if norm:  # whether standardise the output
         # sum up each row
         sum_of_feature_importance = feature_importance_forest.sum(
             axis=1).reshape(feature_importance_forest.shape[0], 1)
         # each one is divided by the sum of this row
         feature_importance_norm = feature_importance_forest / (
             sum_of_feature_importance + (sum_of_feature_importance == 0))
     else:
         # directly output without normalization
         feature_importance_norm = feature_importance_forest
     # return the result with the form of DataFrame
     return pd.DataFrame(feature_importance_norm,
                         columns=columns,
                         index=partition_factor_list)

コード例 #30

0

ファイルを表示

def test_joblib_parallel_args(monkeypatch, joblib_version):
    import joblib
    monkeypatch.setattr(joblib, '__version__', joblib_version)

    if joblib_version == '0.12.0':
        # arguments are simply passed through
        assert _joblib_parallel_args(prefer='threads') == {'prefer': 'threads'}
        assert _joblib_parallel_args(prefer='processes', require=None) == {
            'prefer': 'processes',
            'require': None
        }
        assert _joblib_parallel_args(non_existing=1) == {'non_existing': 1}
    elif joblib_version == '0.11':
        # arguments are mapped to the corresponding backend
        assert _joblib_parallel_args(prefer='threads') == {
            'backend': 'threading'
        }
        assert _joblib_parallel_args(prefer='processes') == {
            'backend': 'multiprocessing'
        }
        with pytest.raises(ValueError):
            _joblib_parallel_args(prefer='invalid')
        assert _joblib_parallel_args(prefer='processes',
                                     require='sharedmem') == {
                                         'backend': 'threading'
                                     }
        with pytest.raises(ValueError):
            _joblib_parallel_args(require='invalid')
        with pytest.raises(NotImplementedError):
            _joblib_parallel_args(verbose=True)
    else:
        raise ValueError

コード例 #31

0

ファイルを表示

    def fit(self, X, y, sample_weight=None):
        """Fit the model according to the given training data.

        Parameters
        ----------
        X : {array-like, sparse matrix}, shape (n_samples, n_features)
            Training vector, where n_samples is the number of samples and n_features is the number of features.

        y : array-like, shape (n_samples,)
            Target vector relative to X.

        sample_weight : ignored
            Ignored by diffprivlib.  Present for consistency with sklearn API.

        Returns
        -------
        self : class

        """
        self.accountant.check(self.epsilon, 0)

        if sample_weight is not None:
            warn_unused_args("sample_weight")

        if not isinstance(self.C, numbers.Real) or self.C < 0:
            raise ValueError("Penalty term must be positive; got (C=%r)" %
                             self.C)
        if not isinstance(self.max_iter,
                          numbers.Integral) or self.max_iter < 0:
            raise ValueError(
                "Maximum number of iteration must be positive; got (max_iter=%r)"
                % self.max_iter)
        if not isinstance(self.tol, numbers.Real) or self.tol < 0:
            raise ValueError(
                "Tolerance for stopping criteria must be positive; got (tol=%r)"
                % self.tol)

        solver = _check_solver(self.solver, self.penalty, self.dual)
        X, y = check_X_y(X,
                         y,
                         accept_sparse='csr',
                         dtype=np.float64,
                         order="C",
                         accept_large_sparse=solver != 'liblinear')
        check_classification_targets(y)
        self.classes_ = np.unique(y)
        _, n_features = X.shape

        if self.data_norm is None:
            warnings.warn(
                "Data norm has not been specified and will be calculated on the data provided.  This will "
                "result in additional privacy leakage. To ensure differential privacy and no additional "
                "privacy leakage, specify `data_norm` at initialisation.",
                PrivacyLeakWarning)
            self.data_norm = np.linalg.norm(X, axis=1).max()

        X = clip_to_norm(X, self.data_norm)

        self.multi_class = _check_multi_class(self.multi_class, solver,
                                              len(self.classes_))

        n_classes = len(self.classes_)
        classes_ = self.classes_
        if n_classes < 2:
            raise ValueError(
                "This solver needs samples of at least 2 classes in the data, but the data contains only "
                "one class: %r" % classes_[0])

        if len(self.classes_) == 2:
            n_classes = 1
            classes_ = classes_[1:]

        if self.warm_start:
            warm_start_coef = getattr(self, 'coef_', None)
        else:
            warm_start_coef = None
        if warm_start_coef is not None and self.fit_intercept:
            warm_start_coef = np.append(warm_start_coef,
                                        self.intercept_[:, np.newaxis],
                                        axis=1)

        self.coef_ = list()
        self.intercept_ = np.zeros(n_classes)

        if warm_start_coef is None:
            warm_start_coef = [None] * n_classes

        path_func = delayed(_logistic_regression_path)

        fold_coefs_ = Parallel(
            n_jobs=self.n_jobs,
            verbose=self.verbose,
            **_joblib_parallel_args(prefer='processes'))(
                path_func(X,
                          y,
                          epsilon=self.epsilon / n_classes,
                          data_norm=self.data_norm,
                          pos_class=class_,
                          Cs=[self.C],
                          fit_intercept=self.fit_intercept,
                          max_iter=self.max_iter,
                          tol=self.tol,
                          verbose=self.verbose,
                          coef=warm_start_coef_,
                          check_input=False)
                for class_, warm_start_coef_ in zip(classes_, warm_start_coef))

        fold_coefs_, _, n_iter_ = zip(*fold_coefs_)
        self.n_iter_ = np.asarray(n_iter_, dtype=np.int32)[:, 0]

        self.coef_ = np.asarray(fold_coefs_)
        self.coef_ = self.coef_.reshape(n_classes,
                                        n_features + int(self.fit_intercept))

        if self.fit_intercept:
            self.intercept_ = self.coef_[:, -1]
            self.coef_ = self.coef_[:, :-1]

        self.accountant.spend(self.epsilon, 0)

        return self