def predict(self, X):
        """Predict multi-class targets using underlying estimators.
        Parameters
        ----------
        X : (sparse) array-like, shape = [n_samples, n_features]
            Data.
        Returns
        -------
        y : (sparse) array-like, shape = [n_samples, ], [n_samples, n_classes].
            Predicted multi-class targets.
        """
        check_is_fitted(self, 'estimators_')
        if (hasattr(self.estimators_[0], "decision_function")
                and is_classifier(self.estimators_[0])):
            thresh = 0
        else:
            thresh = .5

        n_samples = _num_samples(X)
        if self.label_binarizer_.y_type_ == "multiclass":
            maxima = np.empty(n_samples, dtype=float)
            maxima.fill(-np.inf)
            argmaxima = np.zeros(n_samples, dtype=int)
            for i, e in enumerate(self.estimators_):
                pred = _predict_binary(e, X)
                np.maximum(maxima, pred, out=maxima)
                argmaxima[maxima == pred] = i
            return self.classes_[argmaxima]
        else:
            indices = array.array('i')
            indptr = array.array('i', [0])
            for e in self.estimators_:
                indices.extend(np.where(_predict_binary(e, X) > thresh)[0])
                indptr.append(len(indices))
            data = np.ones(len(indices), dtype=int)
            indicator = sp.csc_matrix((data, indices, indptr),
                                      shape=(n_samples, len(self.estimators_)))
            return self.label_binarizer_.inverse_transform(indicator)
    def predict(self, X):
        neighbors = self.nbrs.kneighbors(X, self.n_neighbors, return_distance=False)

        neighbors_set = get_neighbors_above_threshold(self._fit_y, neighbors[0], self.threshold)

        check_is_fitted(self, 'estimators_')
        if (hasattr(self.estimators_[0], "decision_function") and
                is_classifier(self.estimators_[0])):
            thresh = 0
        else:
            thresh = .5

        n_samples = _num_samples(X)
        if self.label_binarizer_.y_type_ == "multiclass":
            maxima = np.empty(n_samples, dtype=float)
            maxima.fill(-np.inf)
            argmaxima = np.zeros(n_samples, dtype=int)
            for i, e in enumerate(self.estimators_):
                if not i in neighbors_set:
                    continue
                pred = _predict_binary(e, X)
                np.maximum(maxima, pred, out=maxima)
                argmaxima[maxima == pred] = i
            return self.label_binarizer_.classes_[np.array(argmaxima.T)]
        else:
            indices = array.array('i')
            indptr = array.array('i', [0])
            for i, e in enumerate(self.estimators_):
                if not i in neighbors_set:
                    continue
                indices.extend(np.where(_predict_binary(e, X) > thresh)[0])
                indptr.append(len(indices))
            data = np.ones(len(indices), dtype=int)
            indicator = sp.csc_matrix((data, indices, indptr),
                                      shape=(n_samples, len(self.estimators_)))
            return self.label_binarizer_.inverse_transform(indicator)
def get_ovo_estimators_prediction(estimators, classes, X):
    '''
    This function calls predict on the OVO's estimators. Internally, the estimators use the
    decision_function to decide whether or not to attribute the sample to a class. The result
    comes back to us as a 0 or 1 (since SVCs are inherently binary). Since this is an OVO,
    a 1 simply indicates that an {m, n} estimator believes the sample belongs to the n class
    and a 0 that it belongs to the m class.
    In accordance with the hybrid algorithm, we check if an equal number of estimators have
    voted for more than one clas. If this is the case, we return an invalid value, -1. If not,
    the one class with the uniquely highest number of votes is returned.

    Parameters
    ----------
    estimators : list of `int(n_classes * code_size)` estimators
        Estimators used for predictions.

    classes : numpy array of shape [n_classes]
        Array containing labels.

    X : {array-like, sparse matrix}, shape = [n_samples, n_features]
        Data.

    Returns
    -------
    Returns -1 if there was a vote tie or the predicted class if there wasn't.
    '''

    n_samples = X.shape[0]
    n_classes = classes.shape[0]
    votes = np.zeros((n_samples, n_classes))

    k = 0
    for i in range(n_classes):
        for j in range(i + 1, n_classes):
            pred = estimators[k].predict(X)
            score = _predict_binary(estimators[k], X)
            votes[pred == 0, i] += 1
            votes[pred == 1, j] += 1
            k += 1

    # find all places with maximum votes per sample
    maxima = votes == np.max(votes, axis=1)[:, np.newaxis]

    # if there are ties, return -1 to signal that we should leave this sample unclassified
    if np.any(maxima.sum(axis=1) > 1):
        return -1
    else:
        return classes[votes.argmax(axis=1)]
def get_ovo_estimators_prediction(estimators, classes, X):
    '''
    This function calls predict on the OVO's estimators. Internally, the estimators use the
    decision_function to decide whether or not to attribute the sample to a class. The result
    comes back to us as a 0 or 1 (since SVCs are inherently binary). Since this is an OVO,
    a 1 simply indicates that an {m, n} estimator believes the sample belongs to the n class
    and a 0 that it belongs to the m class.
    In accordance with the hybrid algorithm, we check if an equal number of estimators have
    voted for more than one clas. If this is the case, we return an invalid value, -1. If not,
    the one class with the uniquely highest number of votes is returned.

    Parameters
    ----------
    estimators : list of `int(n_classes * code_size)` estimators
        Estimators used for predictions.

    classes : numpy array of shape [n_classes]
        Array containing labels.

    X : {array-like, sparse matrix}, shape = [n_samples, n_features]
        Data.

    Returns
    -------
    Returns -1 if there was a vote tie or the predicted class if there wasn't.
    '''

    n_samples = X.shape[0]
    n_classes = classes.shape[0]
    votes = np.zeros((n_samples, n_classes))

    k = 0
    for i in range(n_classes):
        for j in range(i + 1, n_classes):
            pred = estimators[k].predict(X)
            score = _predict_binary(estimators[k], X)
            votes[pred == 0, i] += 1
            votes[pred == 1, j] += 1
            k += 1

    # find all places with maximum votes per sample
    maxima = votes == np.max(votes, axis=1)[:, np.newaxis]

    # if there are ties, return -1 to signal that we should leave this sample unclassified
    if np.any(maxima.sum(axis=1) > 1):
        return -1
    else:
        return classes[votes.argmax(axis=1)]
    def decision_function(self, X):
        """Decision function for the CustomOneVsOneClassifier.

        By default, the decision values for the samples are computed by adding
        the normalized sum of pair-wise classification confidence levels to the
        votes in order to disambiguate between the decision values when the
        votes for all the classes are equal leading to a tie.

        Parameters
        ----------
        X : array-like, shape = [n_samples, n_features]

        Returns
        -------
        Y : array-like, shape = [n_samples, n_classes]
        """
        check_is_fitted(self, 'estimators_')

        predictions = np.vstack([est.predict(X) for est in self.estimators_]).T
        confidences = np.vstack([_predict_binary(est, X)
                                 for est in self.estimators_]).T

        n_clases = len(self.classes_)

        if self.strategy in ('weighted_vote', 'dynamic_vote',
                             'relative_competence'):
            # Compute matrix with classes probabilities
            scores = [_score_matrix(c, n_clases) for c in confidences]

            if self.strategy == 'dynamic_vote':
                scores = self._dynamic_ovo(scores, X, n_clases)

            elif self.strategy == 'relative_competence':
                scores = self._relative_competence(scores, X, n_clases)

            # Sum of each probability column representing each class
            votes = np.vstack([np.sum(m, axis=0) for m in scores])

            return votes

        elif self.strategy == 'vote':
            return _ovr_decision_function(predictions, confidences,
                                          n_clases)
Example #6
0
    def decision_function(self, X):
        """Decision function for the CustomOneVsOneClassifier.

        By default, the decision values for the samples are computed by adding
        the normalized sum of pair-wise classification confidence levels to the
        votes in order to disambiguate between the decision values when the
        votes for all the classes are equal leading to a tie.

        Parameters
        ----------
        X : array-like, shape = [n_samples, n_features]

        Returns
        -------
        Y : array-like, shape = [n_samples, n_classes]
        """
        check_is_fitted(self, 'estimators_')

        predictions = np.vstack([est.predict(X) for est in self.estimators_]).T
        confidences = np.vstack(
            [_predict_binary(est, X) for est in self.estimators_]).T

        n_clases = len(self.classes_)

        if self.strategy in ('weighted_vote', 'dynamic_vote',
                             'relative_competence'):
            # Compute matrix with classes probabilities
            scores = [_score_matrix(c, n_clases) for c in confidences]

            if self.strategy == 'dynamic_vote':
                scores = self._dynamic_ovo(scores, X, n_clases)

            elif self.strategy == 'relative_competence':
                scores = self._relative_competence(scores, X, n_clases)

            # Sum of each probability column representing each class
            votes = np.vstack([np.sum(m, axis=0) for m in scores])

            return votes

        elif self.strategy == 'vote':
            return _ovr_decision_function(predictions, confidences, n_clases)
    def decision_function(self, X):
        neighbors = self.nbrs.kneighbors(X, self.n_neighbors, return_distance=False)
        predictions = []
        confidences = []

        neighbors_set = get_neighbors_above_threshold(self._fit_y, neighbors[0], self.threshold)

        n_classes = int(((1 + sqrt(4 * 2 * len(self.estimators_) + 1)
                          ) / 2).real)  # n*(n-1)/2 binary classificators

        k = 0
        for i in range(n_classes):
            for j in range(i + 1, n_classes):
                if i in neighbors_set or j in neighbors_set:
                    predictions.append(self.estimators_[k].predict(X))
                    confidences.append(_predict_binary(self.estimators_[k], X))
                else:
                    predictions.append(np.nan)
                    confidences.append(np.nan)
                k += 1

        predictions = np.vstack(predictions).T
        confidences = np.vstack(confidences).T
        return self._dynamic_ovr_decision_function(predictions, confidences, len(self.classes_))