def predict(self, X): """Predict multi-class targets using underlying estimators. Parameters ---------- X : (sparse) array-like, shape = [n_samples, n_features] Data. Returns ------- y : (sparse) array-like, shape = [n_samples, ], [n_samples, n_classes]. Predicted multi-class targets. """ check_is_fitted(self, 'estimators_') if (hasattr(self.estimators_[0], "decision_function") and is_classifier(self.estimators_[0])): thresh = 0 else: thresh = .5 n_samples = _num_samples(X) if self.label_binarizer_.y_type_ == "multiclass": maxima = np.empty(n_samples, dtype=float) maxima.fill(-np.inf) argmaxima = np.zeros(n_samples, dtype=int) for i, e in enumerate(self.estimators_): pred = _predict_binary(e, X) np.maximum(maxima, pred, out=maxima) argmaxima[maxima == pred] = i return self.classes_[argmaxima] else: indices = array.array('i') indptr = array.array('i', [0]) for e in self.estimators_: indices.extend(np.where(_predict_binary(e, X) > thresh)[0]) indptr.append(len(indices)) data = np.ones(len(indices), dtype=int) indicator = sp.csc_matrix((data, indices, indptr), shape=(n_samples, len(self.estimators_))) return self.label_binarizer_.inverse_transform(indicator)
def predict(self, X): neighbors = self.nbrs.kneighbors(X, self.n_neighbors, return_distance=False) neighbors_set = get_neighbors_above_threshold(self._fit_y, neighbors[0], self.threshold) check_is_fitted(self, 'estimators_') if (hasattr(self.estimators_[0], "decision_function") and is_classifier(self.estimators_[0])): thresh = 0 else: thresh = .5 n_samples = _num_samples(X) if self.label_binarizer_.y_type_ == "multiclass": maxima = np.empty(n_samples, dtype=float) maxima.fill(-np.inf) argmaxima = np.zeros(n_samples, dtype=int) for i, e in enumerate(self.estimators_): if not i in neighbors_set: continue pred = _predict_binary(e, X) np.maximum(maxima, pred, out=maxima) argmaxima[maxima == pred] = i return self.label_binarizer_.classes_[np.array(argmaxima.T)] else: indices = array.array('i') indptr = array.array('i', [0]) for i, e in enumerate(self.estimators_): if not i in neighbors_set: continue indices.extend(np.where(_predict_binary(e, X) > thresh)[0]) indptr.append(len(indices)) data = np.ones(len(indices), dtype=int) indicator = sp.csc_matrix((data, indices, indptr), shape=(n_samples, len(self.estimators_))) return self.label_binarizer_.inverse_transform(indicator)
def get_ovo_estimators_prediction(estimators, classes, X): ''' This function calls predict on the OVO's estimators. Internally, the estimators use the decision_function to decide whether or not to attribute the sample to a class. The result comes back to us as a 0 or 1 (since SVCs are inherently binary). Since this is an OVO, a 1 simply indicates that an {m, n} estimator believes the sample belongs to the n class and a 0 that it belongs to the m class. In accordance with the hybrid algorithm, we check if an equal number of estimators have voted for more than one clas. If this is the case, we return an invalid value, -1. If not, the one class with the uniquely highest number of votes is returned. Parameters ---------- estimators : list of `int(n_classes * code_size)` estimators Estimators used for predictions. classes : numpy array of shape [n_classes] Array containing labels. X : {array-like, sparse matrix}, shape = [n_samples, n_features] Data. Returns ------- Returns -1 if there was a vote tie or the predicted class if there wasn't. ''' n_samples = X.shape[0] n_classes = classes.shape[0] votes = np.zeros((n_samples, n_classes)) k = 0 for i in range(n_classes): for j in range(i + 1, n_classes): pred = estimators[k].predict(X) score = _predict_binary(estimators[k], X) votes[pred == 0, i] += 1 votes[pred == 1, j] += 1 k += 1 # find all places with maximum votes per sample maxima = votes == np.max(votes, axis=1)[:, np.newaxis] # if there are ties, return -1 to signal that we should leave this sample unclassified if np.any(maxima.sum(axis=1) > 1): return -1 else: return classes[votes.argmax(axis=1)]
def decision_function(self, X): """Decision function for the CustomOneVsOneClassifier. By default, the decision values for the samples are computed by adding the normalized sum of pair-wise classification confidence levels to the votes in order to disambiguate between the decision values when the votes for all the classes are equal leading to a tie. Parameters ---------- X : array-like, shape = [n_samples, n_features] Returns ------- Y : array-like, shape = [n_samples, n_classes] """ check_is_fitted(self, 'estimators_') predictions = np.vstack([est.predict(X) for est in self.estimators_]).T confidences = np.vstack([_predict_binary(est, X) for est in self.estimators_]).T n_clases = len(self.classes_) if self.strategy in ('weighted_vote', 'dynamic_vote', 'relative_competence'): # Compute matrix with classes probabilities scores = [_score_matrix(c, n_clases) for c in confidences] if self.strategy == 'dynamic_vote': scores = self._dynamic_ovo(scores, X, n_clases) elif self.strategy == 'relative_competence': scores = self._relative_competence(scores, X, n_clases) # Sum of each probability column representing each class votes = np.vstack([np.sum(m, axis=0) for m in scores]) return votes elif self.strategy == 'vote': return _ovr_decision_function(predictions, confidences, n_clases)
def decision_function(self, X): """Decision function for the CustomOneVsOneClassifier. By default, the decision values for the samples are computed by adding the normalized sum of pair-wise classification confidence levels to the votes in order to disambiguate between the decision values when the votes for all the classes are equal leading to a tie. Parameters ---------- X : array-like, shape = [n_samples, n_features] Returns ------- Y : array-like, shape = [n_samples, n_classes] """ check_is_fitted(self, 'estimators_') predictions = np.vstack([est.predict(X) for est in self.estimators_]).T confidences = np.vstack( [_predict_binary(est, X) for est in self.estimators_]).T n_clases = len(self.classes_) if self.strategy in ('weighted_vote', 'dynamic_vote', 'relative_competence'): # Compute matrix with classes probabilities scores = [_score_matrix(c, n_clases) for c in confidences] if self.strategy == 'dynamic_vote': scores = self._dynamic_ovo(scores, X, n_clases) elif self.strategy == 'relative_competence': scores = self._relative_competence(scores, X, n_clases) # Sum of each probability column representing each class votes = np.vstack([np.sum(m, axis=0) for m in scores]) return votes elif self.strategy == 'vote': return _ovr_decision_function(predictions, confidences, n_clases)
def decision_function(self, X): neighbors = self.nbrs.kneighbors(X, self.n_neighbors, return_distance=False) predictions = [] confidences = [] neighbors_set = get_neighbors_above_threshold(self._fit_y, neighbors[0], self.threshold) n_classes = int(((1 + sqrt(4 * 2 * len(self.estimators_) + 1) ) / 2).real) # n*(n-1)/2 binary classificators k = 0 for i in range(n_classes): for j in range(i + 1, n_classes): if i in neighbors_set or j in neighbors_set: predictions.append(self.estimators_[k].predict(X)) confidences.append(_predict_binary(self.estimators_[k], X)) else: predictions.append(np.nan) confidences.append(np.nan) k += 1 predictions = np.vstack(predictions).T confidences = np.vstack(confidences).T return self._dynamic_ovr_decision_function(predictions, confidences, len(self.classes_))