예제 #1
0
    def predict_proba(self, X):
        """
        Find probability estimates for each class for all cases in X.
        Parameters
        ----------
        X : array-like or sparse matrix of shape = [n_instances, n_columns]
            The training input samples.
            If a Pandas data frame is passed (sktime format)
            If a Pandas data frame is passed, a check is performed that it
            only has one column.
            If not, an exception is thrown, since this classifier does not
            yet have
            multivariate capability.

        Returns
        -------
        output : array of shape = [n_instances, n_classes] of probabilities
        """
        X = check_X(X, enforce_univariate=True, coerce_to_pandas=True)
        X = dataset_properties.negative_dataframe_indices(X)
        if self.n_jobs > 1 or self.n_jobs < 0:
            parallel = Parallel(self.n_jobs)
            distributions = parallel(
                delayed(self._predict_proba_tree)(X, tree)
                for tree in self.trees)
        else:
            distributions = [
                self._predict_proba_tree(X, tree) for tree in self.trees
            ]
        distributions = np.array(distributions)
        distributions = np.sum(distributions, axis=0)
        normalize(distributions, copy=False, norm="l1")
        return distributions
예제 #2
0
    def predict_proba(self, X):
        """
        Find probability estimates for each class for all cases in X.
        Parameters
        ----------
        X : array-like or sparse matrix of shape = [n_instances, n_columns]
            The training input samples.
            If a Pandas data frame is passed (sktime format)
            If a Pandas data frame is passed, a check is performed that it
            only has one column.
            If not, an exception is thrown, since this classifier does not
            yet have
            multivariate capability.
        Returns
        -------
        output : array of shape = [n_instances, n_classes] of probabilities
        """
        X = check_X(X, enforce_univariate=True, coerce_to_pandas=True)

        X = dataset_properties.negative_dataframe_indices(X)
        distances = self.distance_to_exemplars(X)
        ones = np.ones(distances.shape)
        distances = np.add(distances, ones)
        distributions = np.divide(ones, distances)
        normalize(distributions, copy=False, norm="l1")
        return distributions
예제 #3
0
    def predict_proba(self, X):
        """
        Find probability estimates for each class for all cases in X.
        Parameters
        ----------
        X : array-like or sparse matrix of shape = [n_instances, n_columns]
            The training input samples.
            If a Pandas data frame is passed (sktime format)
            If a Pandas data frame is passed, a check is performed that it
            only has one column.
            If not, an exception is thrown, since this classifier does not
            yet have
            multivariate capability.

        Returns
        -------
        output : array of shape = [n_instances, n_classes] of probabilities
        """
        X = check_X(X, enforce_univariate=True, coerce_to_pandas=True)
        X = dataset_properties.negative_dataframe_indices(X)
        closest_exemplar_indices = self.stump.find_closest_exemplar_indices(X)
        n_classes = len(self.label_encoder.classes_)
        distribution = np.zeros((X.shape[0], n_classes))
        for index in range(len(self.branches)):
            indices = np.argwhere(closest_exemplar_indices == index)
            if indices.shape[0] > 0:
                indices = np.ravel(indices)
                sub_tree = self.branches[index]
                if sub_tree is None:
                    sub_distribution = np.zeros((1, n_classes))
                    class_label = self.stump.y_exemplar[index]
                    sub_distribution[0][class_label] = 1
                else:
                    sub_X = X.iloc[indices, :]
                    sub_distribution = sub_tree.predict_proba(sub_X)
                assert sub_distribution.shape[1] == n_classes
                np.add.at(distribution, indices, sub_distribution)
        normalize(distribution, copy=False, norm="l1")
        return distribution