def short_error(y_true, y_pred, normalize=True, sample_weight=None): """ Error of short classification. False positive rate (FPR) Parameters ---------- y_true : 1d array-like, or label indicator array / sparse matrix Ground truth (correct) labels. y_pred : 1d array-like, or label indicator array / sparse matrix Predicted labels, as returned by a classifier. normalize : bool, optional (default=True) If False, return the number of misclassified samples. Otherwise, return the fraction of misclassified samples. sample_weight : array-like of shape = [n_samples], optional Sample weights. Returns ------- error : float If normalize == True, return the misclassified samples (float), else it returns the number of misclassified samples (int). The best performance is 0 """ short_true = y_true[y_true == 0] short_pred = y_pred[y_true == 0] score = short_pred != short_true return _weighted_sum(score, sample_weight, normalize)
def my_log_loss(y_true, y_pred, eps=1e-15, normalize=True, labels=None, encoder=None, weights_targets=None): transformed_labels = encoder.transform(y_true) y_pred = numpy.clip(y_pred, eps, 1 - eps) y_pred /= y_pred.sum(axis=1)[:, numpy.newaxis] sample_weight = weights_targets[y_true] loss = (transformed_labels * numpy.log(y_pred)).sum(axis=1) return _weighted_sum(loss, sample_weight, normalize)
def accuracy_rounding_score(y_true, y_pred, normalize=True, sample_weight=None): for p in range(0, y_pred.size): labels = [ 500., 1000., 1500., 2000., 2500., 3000., 3500., 4000., 4500., 5000., 6000., 7000., 8000., 9000., 10000., 12500., 15000. ] y_pred[p] = min(labels, key=lambda x: abs(x - y_pred[p])) # Compute accuracy for each possible representation y_type, y_true, y_pred = _check_targets(y_true, y_pred) check_consistent_length(y_true, y_pred, sample_weight) if y_type.startswith('multilabel'): differing_labels = count_nonzero(y_true - y_pred, axis=1) score = differing_labels == 0 else: score = y_true == y_pred return _weighted_sum(score, sample_weight, normalize)
def decision_function(self, X): """ Compute the (weighted) sum of votes. Parameters ---------- X : np.ndarray, List Data in the form of rows x columns = samples x features Returns ------- The (weighted) sum of votes. """ if self.centroid_weighting: votes = robust_scale(abs(X - self.averages[0, :]) - abs(X - self.averages[1, :]), with_centering=False, axis=1) else: votes = (abs(X - self.averages[0, :]) > abs(X - self.averages[1, :])) - 0.5 if self.stats_weighting is None: dec = np.sum(votes, 1) / votes.shape[1] else: dec = _weighted_sum(votes, self.feature_importances_) / votes.shape[1] return dec
def degree_of_agreement(y_true, y_pred, normalize=True, sample_weight=None): # Compute accuracy for each possible representation y_type, y_true, y_pred = _check_targets(y_true, y_pred) check_consistent_length(y_true, y_pred, sample_weight) if y_type.startswith('multilabel'): with np.errstate(divide='ignore', invalid='ignore'): # oddly, we may get an "invalid" rather than a "divide" error here pred_and_true = count_nonzero( y_true.multiply(y_pred), axis=1) # cez inters eez len_true = count_nonzero(y_true) not_ytrue = 1 - y_true pred_and_nottrue = count_nonzero(not_ytrue.multiply(y_pred), axis=1) len_nottrue = count_nonzero(not_ytrue) pred_or_true = count_nonzero(y_true + y_pred, axis=1) # compute the doa statistic score = pred_and_true / len_true - pred_and_nottrue / len_nottrue # score = pred_and_true / pred_or_true print(score) score[pred_or_true == 0.0] = 1.0 print(score) else: # oddly, we may get an "invalid" rather than a "divide" error here pred_and_true = np.count_nonzero( y_true * y_pred, axis=0) # cez inters eez len_true = np.count_nonzero(y_true) not_ytrue = np.subtract(1, y_true) pred_and_nottrue = np.count_nonzero(not_ytrue * y_pred, axis=0) len_nottrue = np.count_nonzero(not_ytrue) # compute the doa statistic score = pred_and_true / len_true - pred_and_nottrue / len_nottrue return _weighted_sum(score, sample_weight, normalize)
def decision_function(self, X): """ Compute the (weighted) sum of votes. Args: X (np.ndarray, List): Data in the form of rows x columns = samples x features. Returns: np.ndarray: The (weighted) sum of votes for each sample in the form 1 x samples. """ X = np.array(X) if self.distance_type == 'manhattan': if self.centroid_weighting: self.votes_ = abs(X - self.prototypes_[0, :]) - abs(X - self.prototypes_[1, :]) else: self.votes_ = (abs(X - self.prototypes_[0, :]) > abs(X - self.prototypes_[1, :])) - 0.5 dec = _weighted_sum(self.votes_, self.feature_importances_) / self.votes_.shape[1] elif self.distance_type == 'euclidean': dec = np.sum((self.feature_importances_ * (X - self.prototypes_[0, :])) ** 2, axis=1) - \ np.sum((self.feature_importances_ * (X - self.prototypes_[1, :])) ** 2, axis=1) return dec