def generate_noise_matrix(s, Y): psx = np.zeros((Y.shape[0], 2)) for i in range(Y.shape[0]): psx[i, int(Y[i])] = 1. py, noise_matrix, inverse_noise_matrix, _ = estimate_py_and_noise_matrices_from_probabilities( s, psx) print(noise_matrix) return noise_matrix
def fit( self, X, s, psx = None, thresholds = None, noise_matrix = None, inverse_noise_matrix = None, ): '''This method implements the confident learning. It counts examples that are likely labeled correctly and incorrectly and uses their ratio to create a predicted confusion matrix. This function fits the classifier (self.clf) to (X, s) accounting for the noise in both the positive and negative sets. Parameters ---------- X : np.array Input feature matrix (N, D), 2D numpy array s : np.array A binary vector of labels, s, which may contain mislabeling. psx : np.array (shape (N, K)) P(s=k|x) is a matrix with K (noisy) probabilities for each of the N examples x. This is the probability distribution over all K classes, for each example, regarding whether the example has label s==k P(s=k|x). psx should have been computed using 3 (or higher) fold cross-validation. If you are not sure, leave psx = None (default) and it will be computed for you using cross-validation. thresholds : iterable (list or np.array) of shape (K, 1) or (K,) P(s^=k|s=k). If an example has a predicted probability "greater" than this threshold, it is counted as having hidden label y = k. This is not used for pruning, only for estimating the noise rates using confident counts. This value should be between 0 and 1. Default is None. noise_matrix : np.array of shape (K, K), K = number of classes A conditional probablity matrix of the form P(s=k_s|y=k_y) containing the fraction of examples in every class, labeled as every other class. Assumes columns of noise_matrix sum to 1. inverse_noise_matrix : np.array of shape (K, K), K = number of classes A conditional probablity matrix of the form P(y=k_y|s=k_s) representing the estimated fraction observed examples in each class k_s, that are mislabeled examples from every other class k_y. If None, the inverse_noise_matrix will be computed from psx and s. Assumes columns of inverse_noise_matrix sum to 1. Output ------ Returns (noise_mask, sample_weight)''' # Check inputs assert_inputs_are_valid(X, s, psx) if noise_matrix is not None and np.trace(noise_matrix) <= 1: t = np.round(np.trace(noise_matrix), 2) raise ValueError("Trace(noise_matrix) is {}, but must exceed 1.".format(t)) if inverse_noise_matrix is not None and np.trace(inverse_noise_matrix) <= 1: t = np.round(np.trace(inverse_noise_matrix), 2) raise ValueError("Trace(inverse_noise_matrix) is {}, but must exceed 1.".format(t)) # Number of classes self.K = len(np.unique(s)) # 'ps' is p(s=k) self.ps = value_counts(s) / float(len(s)) self.confident_joint = None # If needed, compute noise rates (fraction of mislabeling) for all classes. # Also, if needed, compute P(s=k|x), denoted psx. # Set / re-set noise matrices / psx; estimate if not provided. if noise_matrix is not None: self.noise_matrix = noise_matrix if inverse_noise_matrix is None: self.py, self.inverse_noise_matrix = compute_py_inv_noise_matrix(self.ps, self.noise_matrix) if inverse_noise_matrix is not None: self.inverse_noise_matrix = inverse_noise_matrix if noise_matrix is None: self.noise_matrix = compute_noise_matrix_from_inverse(self.ps, self.inverse_noise_matrix) if noise_matrix is None and inverse_noise_matrix is None: if psx is None: self.py, self.noise_matrix, self.inverse_noise_matrix, self.confident_joint, psx = \ estimate_py_noise_matrices_and_cv_pred_proba( X = X, s = s, clf = self.clf, cv_n_folds = self.cv_n_folds, thresholds = thresholds, converge_latent_estimates = self.converge_latent_estimates, seed = self.seed, ) else: # psx is provided by user (assumed holdout probabilities) self.py, self.noise_matrix, self.inverse_noise_matrix, self.confident_joint = \ estimate_py_and_noise_matrices_from_probabilities( s = s, psx = psx, thresholds = thresholds, converge_latent_estimates = self.converge_latent_estimates, ) if psx is None: psx = estimate_cv_predicted_probabilities( X = X, labels = s, clf = self.clf, cv_n_folds = self.cv_n_folds, seed = self.seed, ) # Zero out noise matrix entries if pulearning = the integer specifying the class without noise. if self.pulearning is not None: # pragma: no cover self.noise_matrix = remove_noise_from_class( self.noise_matrix, class_without_noise=self.pulearning, ) # TODO: self.inverse_noise_matrix = remove_noise_from_class(self.inverse_noise_matrix, class_without_noise=self.pulearning) # This is the actual work of this function. # Get the indices of the examples we wish to prune self.noise_mask = get_noise_indices( s, psx, inverse_noise_matrix = self.inverse_noise_matrix, confident_joint = self.confident_joint, prune_method = self.prune_method, ) if self.pulearning is not None: self.noise_mask[s != self.pulearning] = False return self.noise_mask, self.noise_matrix, self.inverse_noise_matrix, self.confident_joint, psx
def fit( self, X, s, psx=None, thresholds=None, noise_matrix=None, inverse_noise_matrix=None, ): """This method implements the confident learning. It counts examples that are likely labeled correctly and incorrectly and uses their ratio to create a predicted confusion matrix. This function fits the classifier (self.clf) to (X, s) accounting for the noise in both the positive and negative sets. Parameters ---------- X : :obj:`np.array` Input feature matrix (N, D), 2D numpy array s : :obj:`np.array` A binary vector of labels, s, which may contain mislabeling. psx : :obj:`np.array` (shape (N, K)) P(s=k|x) is a matrix with K (noisy) probabilities for each of the N examples x. This is the probability distribution over all K classes, for each example, regarding whether the example has label s==k P(s=k|x). psx should have been computed using 3 (or higher) fold cross-validation. If you are not sure, leave psx = None (default) and it will be computed for you using cross-validation. thresholds : :obj:`iterable` (list or np.array) of shape (K, 1) or (K,) P(s^=k|s=k). List of probabilities used to determine the cutoff predicted probability necessary to consider an example as a given class label. Default is ``None``. These are computed for you automatically. If an example has a predicted probability "greater" than this threshold, it is counted as having hidden label y = k. This is not used for pruning, only for estimating the noise rates using confident counts. Values in list should be between 0 and 1. noise_matrix : :obj:`np.array` of shape (K, K), K = number of classes A conditional probablity matrix of the form P(s=k_s|y=k_y) containing the fraction of examples in every class, labeled as every other class. Assumes columns of noise_matrix sum to 1. inverse_noise_matrix : :obj:`np.array` of shape (K, K), K = number of classes A conditional probablity matrix of the form P(y=k_y|s=k_s). Contains the estimated fraction observed examples in each class k_s, that are mislabeled examples from every other class k_y. If None, the inverse_noise_matrix will be computed from psx and s. Assumes columns of inverse_noise_matrix sum to 1. Returns ------- tuple (noise_mask, sample_weight)""" # Check inputs assert_inputs_are_valid(X, s, psx) if noise_matrix is not None and np.trace(noise_matrix) <= 1: t = np.round(np.trace(noise_matrix), 2) raise ValueError( "Trace(noise_matrix) is {}, but must exceed 1.".format(t)) if inverse_noise_matrix is not None and (np.trace(inverse_noise_matrix) <= 1): t = np.round(np.trace(inverse_noise_matrix), 2) raise ValueError( "Trace(inverse_noise_matrix) is {}. Must exceed 1.".format(t)) # Number of classes self.K = len(np.unique(s)) # 'ps' is p(s=k) self.ps = value_counts(s) / float(len(s)) self.confident_joint = None # If needed, compute noise rates (mislabeling) for all classes. # Also, if needed, compute P(s=k|x), denoted psx. # Set / re-set noise matrices / psx; estimate if not provided. if noise_matrix is not None: self.noise_matrix = noise_matrix if inverse_noise_matrix is None: self.py, self.inverse_noise_matrix = ( compute_py_inv_noise_matrix(self.ps, self.noise_matrix)) if inverse_noise_matrix is not None: self.inverse_noise_matrix = inverse_noise_matrix if noise_matrix is None: self.noise_matrix = compute_noise_matrix_from_inverse( self.ps, self.inverse_noise_matrix, ) if noise_matrix is None and inverse_noise_matrix is None: if psx is None: self.py, self.noise_matrix, self.inverse_noise_matrix, \ self.confident_joint, psx = \ estimate_py_noise_matrices_and_cv_pred_proba( X=X, s=s, clf=self.clf, cv_n_folds=self.cv_n_folds, thresholds=thresholds, converge_latent_estimates=( self.converge_latent_estimates), seed=self.seed, ) else: # psx is provided by user (assumed holdout probabilities) self.py, self.noise_matrix, self.inverse_noise_matrix, \ self.confident_joint = \ estimate_py_and_noise_matrices_from_probabilities( s=s, psx=psx, thresholds=thresholds, converge_latent_estimates=( self.converge_latent_estimates), ) if psx is None: psx = estimate_cv_predicted_probabilities( X=X, labels=s, clf=self.clf, cv_n_folds=self.cv_n_folds, seed=self.seed, ) # if pulearning == the integer specifying the class without noise. if self.K == 2 and self.pulearning is not None: # pragma: no cover # pulearning = 1 (no error in 1 class) implies p(s=1|y=0) = 0 self.noise_matrix[self.pulearning][1 - self.pulearning] = 0 self.noise_matrix[1 - self.pulearning][1 - self.pulearning] = 1 # pulearning = 1 (no error in 1 class) implies p(y=0|s=1) = 0 self.inverse_noise_matrix[1 - self.pulearning][self.pulearning] = 0 self.inverse_noise_matrix[self.pulearning][self.pulearning] = 1 # pulearning = 1 (no error in 1 class) implies p(s=1,y=0) = 0 self.confident_joint[self.pulearning][1 - self.pulearning] = 0 self.confident_joint[1 - self.pulearning][1 - self.pulearning] = 1 # This is the actual work of this function. # Get the indices of the examples we wish to prune self.noise_mask = get_noise_indices( s, psx, inverse_noise_matrix=self.inverse_noise_matrix, confident_joint=self.confident_joint, prune_method=self.prune_method, n_jobs=self.n_jobs, ) x_mask = ~self.noise_mask x_pruned = X[x_mask] s_pruned = s[x_mask] # Check if sample_weight in clf.fit(). Compatible with Python 2/3. if hasattr(inspect, 'getfullargspec') and \ 'sample_weight' in inspect.getfullargspec(self.clf.fit).args \ or hasattr(inspect, 'getargspec') and \ 'sample_weight' in inspect.getargspec(self.clf.fit).args: # Re-weight examples in the loss function for the final fitting # s.t. the "apparent" original number of examples in each class # is preserved, even though the pruned sets may differ. self.sample_weight = np.ones(np.shape(s_pruned)) for k in range(self.K): sample_weight_k = 1.0 / self.noise_matrix[k][k] self.sample_weight[s_pruned == k] = sample_weight_k self.clf.fit(x_pruned, s_pruned, sample_weight=self.sample_weight) else: # This is less accurate, but best we can do if no sample_weight. self.clf.fit(x_pruned, s_pruned) return self.clf
def fit( self, X, s, psx=None, thresholds=None, noise_matrix=None, inverse_noise_matrix=None, ): '''This method implements the confident learning. It counts examples that are likely labeled correctly and incorrectly and uses their ratio to create a predicted confusion matrix. This function fits the classifer (self.clf) to (X, s) accounting for the noise in both the positive and negative sets. Parameters ---------- X : np.array Input feature matrix (N, D), 2D numpy array s : np.array A binary vector of labels, s, which may contain mislabeling. psx : np.array (shape (N, K)) P(s=k|x) is a matrix with K (noisy) probabilities for each of the N examples x. This is the probability distribution over all K classes, for each example, regarding whether the example has label s==k P(s=k|x). psx should have been computed using 3 (or higher) fold cross-validation. If you are not sure, leave psx = None (default) and it will be computed for you using cross-validation. thresholds : iterable (list or np.array) of shape (K, 1) or (K,) P(s^=k|s=k). If an example has a predicted probability "greater" than this threshold, it is counted as having hidden label y = k. This is not used for pruning, only for estimating the noise rates using confident counts. This value should be between 0 and 1. Default is None. noise_matrix : np.array of shape (K, K), K = number of classes A conditional probablity matrix of the form P(s=k_s|y=k_y) containing the fraction of examples in every class, labeled as every other class. Assumes columns of noise_matrix sum to 1. inverse_noise_matrix : np.array of shape (K, K), K = number of classes A conditional probablity matrix of the form P(y=k_y|s=k_s) representing the estimated fraction observed examples in each class k_s, that are mislabeled examples from every other class k_y. If None, the inverse_noise_matrix will be computed from psx and s. Assumes columns of inverse_noise_matrix sum to 1. Output ------ Returns (noise_mask, sample_weight)''' # Check inputs assert_inputs_are_valid(X, s, psx) if noise_matrix is not None and np.trace(noise_matrix) <= 1: t = np.round(np.trace(noise_matrix), 2) raise ValueError( "Trace(noise_matrix) is {}, but must exceed 1.".format(t)) if inverse_noise_matrix is not None and np.trace( inverse_noise_matrix) <= 1: t = np.round(np.trace(inverse_noise_matrix), 2) raise ValueError( "Trace(inverse_noise_matrix) is {}, but must exceed 1.".format( t)) # Number of classes self.K = len(np.unique(s)) # 'ps' is p(s=k) self.ps = value_counts(s) / float(len(s)) self.confident_joint = None # If needed, compute noise rates (fraction of mislabeling) for all classes. # Also, if needed, compute P(s=k|x), denoted psx. # Set / re-set noise matrices / psx; estimate if not provided. if noise_matrix is not None: if self.prune_count_method == 'calibrate_confident_joint': w = "Y\nou should not use self.prune_count_method == 'calibrate_confident_joint'." w += "\nwhen .fit(noise_matrix = something) because" w += "\n'calibrate_confident_joint' estimates the noise from scratch and will" w += "\nnot use your 'something' noise matrix information. Instead, use" w += "\nprune_count_method == 'inverse_nm_dot_s' which will find label errors" w += "\nby using the noise matrix you provde." warnings.warn(w) self.noise_matrix = noise_matrix if inverse_noise_matrix is None: self.py, self.inverse_noise_matrix = compute_py_inv_noise_matrix( self.ps, self.noise_matrix) if inverse_noise_matrix is not None: if self.prune_count_method == 'calibrate_confident_joint': w = "\nYou should not use self.prune_count_method == 'calibrate_confident_joint'." w += "\nwhen .fit(inverse_noise_matrix = something) because" w += "\n'calibrate_confident_joint' estimates the noise from scratch and will" w += "\nnot use your 'something' inv noise matrix information. Instead, use" w += "\nprune_count_method == 'inverse_nm_dot_s' which will find label errors" w += "\nby using the inverse noise matrix you provde." warnings.warn(w) self.inverse_noise_matrix = inverse_noise_matrix if noise_matrix is None: self.noise_matrix = compute_noise_matrix_from_inverse( self.ps, self.inverse_noise_matrix) if noise_matrix is None and inverse_noise_matrix is None: if psx is None: self.py, self.noise_matrix, self.inverse_noise_matrix, self.confident_joint, psx = estimate_py_noise_matrices_and_cv_pred_proba( X=X, s=s, clf=self.clf, cv_n_folds=self.cv_n_folds, thresholds=thresholds, converge_latent_estimates=self.converge_latent_estimates, seed=self.seed, ) else: # psx is provided by user (assumed holdout probabilities) self.py, self.noise_matrix, self.inverse_noise_matrix, self.confident_joint = estimate_py_and_noise_matrices_from_probabilities( s=s, psx=psx, thresholds=thresholds, converge_latent_estimates=self.converge_latent_estimates, ) if psx is None: psx = estimate_cv_predicted_probabilities( X=X, labels=s, clf=self.clf, cv_n_folds=self.cv_n_folds, seed=self.seed, ) # Zero out noise matrix entries if pulearning = the integer specifying the class without noise. if self.pulearning is not None: # pragma: no cover self.noise_matrix = remove_noise_from_class( self.noise_matrix, class_without_noise=self.pulearning) # TODO: self.inverse_noise_matrix = remove_noise_from_class(self.inverse_noise_matrix, class_without_noise=self.pulearning) # This is the actual work of this function. # Get the indices of the examples we wish to prune self.noise_mask = get_noise_indices( s, psx, inverse_noise_matrix=self.inverse_noise_matrix, confident_joint=self.confident_joint, prune_method=self.prune_method, prune_count_method=self.prune_count_method, converge_latent_estimates=self.converge_latent_estimates, ) X_mask = ~self.noise_mask X_pruned = X[X_mask] s_pruned = s[X_mask] # Check if sample_weight in clf.fit(). Compatible with Python 2/3. if hasattr( inspect, 'getfullargspec' ) and 'sample_weight' in inspect.getfullargspec( self.clf.fit).args or hasattr( inspect, 'getargspec') and 'sample_weight' in inspect.getargspec( self.clf.fit).args: # Re-weight examples in the loss function for the final fitting # s.t. the "apparent" original number of examples in each class # is preserved, even though the pruned sets may differ. self.sample_weight = np.ones(np.shape(s_pruned)) for k in range(self.K): self.sample_weight[s_pruned == k] = 1.0 / self.noise_matrix[k][k] self.clf.fit(X_pruned, s_pruned, sample_weight=self.sample_weight) else: # This is less accurate, but its all we can do if sample_weight isn't available. self.clf.fit(X_pruned, s_pruned) return self.clf
def get_noise_indices( s, psx, inverse_noise_matrix=None, confident_joint=None, frac_noise=1.0, num_to_remove_per_class=None, prune_method='prune_by_noise_rate', prune_count_method='inverse_nm_dot_s', converge_latent_estimates=False, return_sorted_index=False, multi_label=False, ): '''Returns the indices of most likely (confident) label errors in s. The number of indices returned is specified by frac_of_noise. When frac_of_noise = 1.0, all "confidently" estimated noise indices are returned. Parameters ---------- s : np.array A binary vector of labels, s, which may contain mislabeling. "s" denotes the noisy label instead of \tilde(y), for ASCII encoding reasons. psx : np.array (shape (N, K)) P(s=k|x) is a matrix with K (noisy) probabilities for each of the N examples x. This is the probability distribution over all K classes, for each example, regarding whether the example has label s==k P(s=k|x). psx should have been computed using 3 (or higher) fold cross-validation. inverse_noise_matrix : np.array of shape (K, K), K = number of classes A conditional probablity matrix of the form P(y=k_y|s=k_s) representing the estimated fraction observed examples in each class k_s, that are mislabeled examples from every other class k_y. If None, the inverse_noise_matrix will be computed from psx and s. Assumes columns of inverse_noise_matrix sum to 1. confident_joint : np.array (shape (K, K), type int) (default: None) A K,K integer matrix of count(s=k, y=k). Estimatesa a confident subset of the joint disribution of the noisy and true labels P_{s,y}. Each entry in the matrix contains the number of examples confidently counted into every pair (s=j, y=k) classes. frac_noise : float When frac_of_noise = 1.0, return all "confidently" estimated noise indices. Value in range (0, 1] that determines the fraction of noisy example indices to return based on the following formula for example class k. frac_of_noise * number_of_mislabeled_examples_in_class_k, or equivalently frac_of_noise * inverse_noise_rate_class_k * num_examples_with_s_equal_k num_to_remove_per_class : list of int of length K (# of classes) e.g. if K = 3, num_to_remove_per_class = [5, 0, 1] would return the indices of the 5 most likely mislabeled examples in class s = 0, and the most likely mislabeled example in class s = 1. ***Only set this parameter if prune_method == 'prune_by_class' prune_method : str (default: 'prune_by_noise_rate') 'prune_by_class', 'prune_by_noise_rate', or 'both'. Method used for pruning. 1. 'prune_by_noise_rate': works by removing examples with *high probability* of being mislabeled for every non-diagonal in the prune_counts_matrix (see pruning.py). 2. 'prune_by_class': works by removing the examples with *smallest probability* of belonging to their given class label for every class. 3. 'both': Finds the examples satisfying (1) AND (2) and removes their set conjunction. prune_count_method : str (default 'inverse_nm_dot_s') Options are 'inverse_nm_dot_s' or 'calibrate_confident_joint'. !DO NOT USE! 'calibrate_confident_joint' if you already know the noise matrix and will call .fit(noise_matrix = known_noise_matrix) or .fit(inverse_noise_matrix = known_inverse_noise_matrix) because 'calibrate_confident_joint' will estimate the noise without using this information. !IN ALL OTHER CASES! We recommend always using 'calibrate_confident_joint' because it is faster and more robust when no noise matrix info is given. Determines the method used to estimate the counts of the joint P(s, y) that will be used to determine how many examples to prune for every class that are flipped to every other class, as follows: if prune_count_method == 'inverse_nm_dot_s': prune_count_matrix = inverse_noise_matrix * s_counts # Matrix of counts(y=k and s=l) elif prune_count_method == 'calibrate_confident_joint':# calibrate prune_count_matrix = confident_joint.T / float(confident_joint.sum()) * len(s) converge_latent_estimates : bool (Default: False) If true, forces numerical consistency of estimates. Each is estimated independently, but they are related mathematically with closed form equivalences. This will iteratively enforce mathematically consistency. return_sorted_index : bool If true, returns an array of the label error indices (instead of a bool mask) where error indices are ordered by the normalized margin (p(s = k) - max(p(s != k)))''' # Number of examples in each class of s if multi_label: s_counts = value_counts([l for l in s]) else: s_counts = value_counts(s) # 'ps' is p(s=k) ps = s_counts / float(sum(s_counts)) # Number of classes s K = len(psx.T) # Ensure labels are of type np.array() s = np.asarray(s) # Estimate the number of examples to confidently prune for each (s=j, y=k) pair. if (inverse_noise_matrix is None and prune_count_method == 'inverse_nm_dot_s') or ( confident_joint is None and prune_count_method == 'calibrate_confident_joint'): from cleanlab.latent_estimation import estimate_py_and_noise_matrices_from_probabilities _, _, inverse_noise_matrix, confident_joint = estimate_py_and_noise_matrices_from_probabilities( s, psx, converge_latent_estimates=converge_latent_estimates, ) if prune_count_method == 'inverse_nm_dot_s': prune_count_matrix = inverse_noise_matrix * s_counts # Matrix of counts(y=k and s=l) elif prune_count_method == 'calibrate_confident_joint': prune_count_matrix = confident_joint.T / float( confident_joint.sum()) * len(s) # calibrate else: raise ValueError( "prune_count_method should be 'inverse_nm_dot_s' or " + "'calibrate_confident_joint', but '" + prune_count_method + "' was given.") # Leave at least MIN_NUM_PER_CLASS examples per class. prune_count_matrix = keep_at_least_n_per_class( prune_count_matrix=prune_count_matrix, n=MIN_NUM_PER_CLASS, frac_noise=frac_noise, ) if num_to_remove_per_class is not None: # Estimate joint probability distribution over label errors psy = prune_count_matrix / np.sum(prune_count_matrix, axis=1) noise_per_s = psy.sum(axis=1) - psy.diagonal() # Calibrate s.t. noise rates sum to num_to_remove_per_class tmp = (psy.T * num_to_remove_per_class / noise_per_s).T np.fill_diagonal(tmp, s_counts - num_to_remove_per_class) prune_count_matrix = np.round(tmp).astype(int) # Initialize the boolean mask of noise indices. noise_mask = np.zeros(len(psx), dtype=bool) # Peform Pruning with threshold probabilities from BFPRT algorithm in O(n) if prune_method == 'prune_by_class' or prune_method == 'both': for k in range(K): if s_counts[ k] > MIN_NUM_PER_CLASS: # Don't prune if not MIN_NUM_PER_CLASS num2prune = s_counts[k] - prune_count_matrix[k][k] # num2keep'th smallest probability of class k for examples with noisy label k s_filter = np.array([k in l for l in s]) if multi_label else s == k threshold = np.partition(psx[:, k][s_filter], num2prune)[num2prune] noise_mask = noise_mask | ((s_filter) & (psx[:, k] < threshold)) if prune_method == 'both': noise_mask_by_class = noise_mask if prune_method == 'prune_by_noise_rate' or prune_method == 'both': noise_mask = np.zeros(len(psx), dtype=bool) for k in range(K): # true hidden label index if s_counts[ k] > MIN_NUM_PER_CLASS: # Don't prune if not MIN_NUM_PER_CLASS for j in range(K): # noisy label index if k != j: # Only prune for noise rates num2prune = prune_count_matrix[k][j] if num2prune > 0: # num2prune'th largest p(class k) - p(class j) for x with noisy label j margin = psx[:, k] - psx[:, j] s_filter = np.array([j in l for l in s ]) if multi_label else s == j threshold = -np.partition( -margin[s_filter], num2prune - 1)[num2prune - 1] noise_mask = noise_mask | ((s_filter) & (margin >= threshold)) noise_mask = noise_mask & noise_mask_by_class if prune_method == 'both' else noise_mask if return_sorted_index: return order_label_errors(noise_mask, psx, s) return noise_mask