def dcg_at_k(y_true: np.ndarray, y_score: np.ndarray, k: int, pscore: Optional[np.ndarray] = None) -> float: """Calculate a DCG score for a given user.""" y_true_sorted_by_score = y_true[y_score.argsort()[::-1]] if pscore is not None: pscore_sorted_by_score = np.maximum(pscore[y_score.argsort()[::-1]], eps) else: pscore_sorted_by_score = np.ones_like(y_true_sorted_by_score) dcg_score = 0.0 final_score = 0.0 k = k if y_true.shape[0] >= k else y_true.shape[0] if not np.sum(y_true_sorted_by_score) == 0: dcg_score += y_true_sorted_by_score[0] / pscore_sorted_by_score[0] for i in np.arange(1, k): dcg_score += y_true_sorted_by_score[i] / ( pscore_sorted_by_score[i] * np.log2(i + 1)) final_score = dcg_score / np.sum(y_true_sorted_by_score) if pscore is None \ else dcg_score / np.sum(1. / pscore_sorted_by_score[y_true_sorted_by_score > 0]) return final_score
def get_top_k_matrix(A: np.ndarray, k: int = 16, normalization: str = 'col_one') -> np.ndarray: num_nodes = A.shape[0] row_idx = np.arange(num_nodes) if normalization == 'None': A[A.argsort(axis=0)[:num_nodes - k], row_idx] = 0. elif normalization == 'col_one': A[A.argsort(axis=0)[:num_nodes - k], row_idx] = 0. norm = A.sum(axis=0) norm[norm <= 0] = 1 # avoid dividing by zero A = A/norm elif normalization == 'row_one': A = A.transpose() A[A.argsort(axis=0)[:num_nodes - k], row_idx] = 0. norm = A.sum(axis=0) norm[norm <= 0] = 1 # avoid dividing by zero A = A/norm A = A.transpose() elif normalization == 'col_weights': weights = A.sum(axis=0) A[A.argsort(axis=0)[:num_nodes - k], row_idx] = 0. norm = A.sum(axis=0) norm[norm <= 0] = 1 # avoid dividing by zero A = A*weights/norm elif normalization == 'row_weights': A = A.transpose() weights = A.sum(axis=0) A[A.argsort(axis=0)[:num_nodes - k], row_idx] = 0. norm = A.sum(axis=0) norm[norm <= 0] = 1 # avoid dividing by zero A = A*weights/norm A = A.transpose() else: raise Exception(f'Normalization not known: {normalization}.') return A
def dcg_at_k( y_true: np.ndarray, y_score: np.ndarray, #### k: int, pscore: Optional[np.array] = None) -> float: """Calculate a DCG score for a given user.""" y_true_sorted_by_score = y_true[y_score.argsort()[::-1]] if pscore is not None: pscore_sorted_by_score = np.maximum(pscore[y_score.argsort()[::-1]], eps) else: pscore_sorted_by_score = np.ones_like(y_true_sorted_by_score) dcg_score = 0.0 num_true = np.sum(y_true_sorted_by_score) if not num_true == 0: min_k = min(num_true, k) tp = np.log2(np.arange(2, min_k + 2)) idcg_score = np.sum(1 / tp) for i in np.arange(k): dcg_score += y_true_sorted_by_score[i] / \ (pscore_sorted_by_score[i] * np.log2(i + 2)) / idcg_score final_score = dcg_score if pscore is None \ else dcg_score / np.sum(1. / pscore_sorted_by_score[y_true_sorted_by_score == 1]) else: final_score = 0.0 return final_score
def average_precision_at_k(y_true: np.ndarray, y_score: np.ndarray, k: int, pscore: Optional[np.ndarray] = None) -> float: """Calculate a average precision for a given user""" y_true_sorted_by_score = y_true[y_score.argsort()[::-1]] # If propensity score is provided, put high weight on records whose propensity score is low for unbiased evaluation # Otherwise, we evaluate each record evenly by setting all propensity scores as 1 if pscore is not None: pscore_sorted_by_score = np.maximum(pscore[y_score.argsort()[::-1]], eps) else: pscore_sorted_by_score = np.ones_like(y_true_sorted_by_score) average_precision_score = 0.0 final_score = 0.0 k = k if y_true.shape[0] >= k else y_true.shape[0] if not np.sum(y_true_sorted_by_score) == 0: for i in np.arange(k): if y_true_sorted_by_score[i] > 0: score_ = np.sum(y_true_sorted_by_score[:i + 1] / pscore_sorted_by_score[:i + 1]) / (i + 1) average_precision_score += score_ final_score = average_precision_score / np.sum(y_true_sorted_by_score) if pscore is None \ else average_precision_score / np.sum(1. / pscore_sorted_by_score[y_true_sorted_by_score > 0]) return final_score
def average_precision_at_k(y_true: np.ndarray, y_score: np.ndarray, k: int, pscore: Optional[np.array] = None) -> float: """Calculate a average precision for a given user.""" y_true_sorted_by_score = y_true[y_score.argsort()[::-1]] if pscore is not None: pscore_sorted_by_score = np.maximum(pscore[y_score.argsort()[::-1]], eps) else: pscore_sorted_by_score = np.ones_like(y_true_sorted_by_score) average_precision_score = 0.0 if not np.sum(y_true_sorted_by_score) == 0: for i in np.arange(k): if y_true_sorted_by_score[i] == 1: average_precision_score += \ np.sum(y_true_sorted_by_score[:i + 1] / pscore_sorted_by_score[:i + 1]) / (i + 1) final_score = average_precision_score / np.sum(y_true) if pscore is None \ else average_precision_score / np.sum(1. / pscore_sorted_by_score[y_true_sorted_by_score == 1]) else: final_score = 0.0 return final_score
def _fisher_exact_test_p(y_obs: np.ndarray, y_pred: np.ndarray, se_pred: np.ndarray) -> float: n_half = len(y_obs) // 2 top_obs = y_obs.argsort(axis=0)[-n_half:] top_est = y_pred.argsort(axis=0)[-n_half:] # Construct contingency table tp = len(set(top_est).intersection(top_obs)) fp = n_half - tp fn = n_half - tp tn = (len(y_obs) - n_half) - (n_half - tp) table = np.array([[tp, fp], [fn, tn]]) # Compute the test statistic _, p = fisher_exact(table, alternative="greater") return float(p)
def non_max_suppression(boxes: np.ndarray, scores: np.ndarray, iou_threshold: float) -> np.ndarray: assert boxes.shape[0] == scores.shape[0] if len(scores) == 0: return np.array([]) # bottom-left origin ys1 = boxes[:, 1] xs1 = boxes[:, 0] # top-right target ys2 = boxes[:, 3] xs2 = boxes[:, 2] # box coordinate ranges are inclusive-inclusive areas = (ys2 - ys1) * (xs2 - xs1) scores_indexes = scores.argsort().tolist() boxes_keep_index = [] all_filtered = set() while len(scores_indexes): index = scores_indexes.pop() boxes_keep_index.append(index) if not len(scores_indexes): break ious = compute_iou(boxes[index], boxes[scores_indexes], areas[index], areas[scores_indexes]) filtered_indexes = set((ious > iou_threshold).nonzero()[0]) all_filtered |= filtered_indexes # if there are no more scores_index # then we should pop it scores_indexes = [ v for (i, v) in enumerate(scores_indexes) if i not in filtered_indexes ] return np.array(boxes_keep_index)
def get_top_k_matrix(A: np.ndarray, k: int = 128) -> np.ndarray: num_nodes = A.shape[0] row_idx = np.arange(num_nodes) A[A.argsort(axis=0)[:num_nodes - k], row_idx] = 0. norm = A.sum(axis=0) norm[norm <= 0] = 1 # avoid dividing by zero return A / norm
def apply(self, fitness_all: np.ndarray, population: np.ndarray): population_size = population.shape[0] # argsort:[index lowest val,...,highest], sorts pop bases on indices population = population[fitness_all.argsort()] # flips array on first dimension population = np.flip(population, 0) # keeps best 'offspring' percent of population, the rest is cut off: population = population[0:round(population.shape[0] * self.offspring)] # reproduction: population = population.repeat(round(1 / self.offspring), axis=0) if population.shape[0] < population_size: i = 0 step = round(1 / self.offspring) while population.shape[0] < population_size: population = np.append(population, np.array([population[i]]), axis=0) i += step elif population.shape[0] > population_size: diff = population.shape[0] - population_size del_indices = [] step = round(1 / self.offspring) i = population.shape[0] - 1 while len(del_indices) < diff: del_indices.append(i) i -= step population = np.delete(population, del_indices, axis=0) return population
def quantile_norm(x: np.ndarray, target: Optional[np.ndarray] = None) -> np.ndarray: """Quantile normalize a 2D array. Parameters ---------- x : numpy.array a 2D numpy array target : numpy.array, optional Reference distribution to use for normalization. If not supplied, the mean of x is used. Returns ------- numpy.array Normalized array. """ def quantile(x, y): return y[x.argsort().argsort()] if target is None: sidx = x.argsort(axis=0) target = x[sidx, np.arange(sidx.shape[1])].mean(1) func = partial(quantile, y=target) return np.apply_along_axis(func, 0, x)
def top_n_accuracy( target_ints: np.ndarray, predicted_probas: np.ndarray, n: int, sample_weights: Optional[np.ndarray] = None, ) -> float: """Fraction of test cases where the true target is among the top n predictions.""" assert len(target_ints) == len(predicted_probas) assert np.ndim(target_ints) == 1 assert np.ndim(predicted_probas) == 2 if sample_weights is None: sample_weights = np.ones_like(target_ints) assert_that(sample_weights.shape).is_equal_to(target_ints.shape) np.testing.assert_array_equal(sample_weights >= 0.0, True) # sort predicted class indices by probability (ascending) classes_by_probability = predicted_probas.argsort(axis=1) # take last n columns, because we sorted ascending top_n_predictions = classes_by_probability[:, -n:] # check if target is included is_target_in_top_n_predictions = [ target in top_n for target, top_n in zip(target_ints, top_n_predictions) ] top_n_acc = np.average(is_target_in_top_n_predictions, weights=sample_weights) return top_n_acc
def nms_oneclass(bbox: np.ndarray, score: np.ndarray, thresh: float, method='iou') -> np.ndarray: """Pure Python NMS oneclass baseline. Parameters ---------- bbox : np.ndarray bbox, n*(x1,y1,x2,y2) score : np.ndarray confidence score (n,) thresh : float nms thresh Returns ------- np.ndarray keep index """ order = score.argsort()[::-1] keep = [] while order.size > 0: i = order[0] keep.append(i) iou = bbox_iou(bbox[i], bbox[order[1:]], method=method) inds = np.where(iou <= thresh)[0] order = order[inds + 1] return keep
def cpu_nms(detections: np.ndarray, scores: np.ndarray, threshold: float = .5) \ -> Iterator[int]: """Apply non-maximum suppression Arguments: detections: (tensor, (num, 4)) The location predictions for the image. scores: (tensor, (num)) The class prediction scores for the image. threshold: (float) The overlap thresh for suppressing unnecessary boxes. Return: The indices of the kept boxes with respect to num. """ x1, x2 = detections[:, 0], detections[:, 2] y1, y2 = detections[:, 1], detections[:, 3] areas = (x2 - x1 + 1) * (y2 - y1 + 1) order = scores.argsort()[::-1] while order.size > 0: i = order[0] yield i xx1, xx2 = np.maximum(x1[i], x1[order[1:]]), np.minimum(x2[i], x2[order[1:]]) yy1, yy2 = np.maximum(y1[i], y1[order[1:]]), np.minimum(y2[i], y2[order[1:]]) w = np.maximum(0.0, xx2 - xx1 + 1) h = np.maximum(0.0, yy2 - yy1 + 1) inter = w * h overlap = inter / (areas[i] + areas[order[1:]] - inter) order = order[np.where(overlap <= threshold)[0] + 1]
def dcg_at_k( ct: np.ndarray, cv: np.ndarray, score: np.ndarray, k: int, cv_hat: Optional[np.array] = None, pscore: Optional[np.array] = None, ) -> float: """Calculate DCG score.""" sort_key = score.argsort()[::-1] ct_sorted = ct[sort_key] cv_sorted = cv[sort_key] cv_hat_sorted = cv_hat[sort_key] if cv_hat is not None else np.zeros_like( cv) pscore_sorted = pscore[sort_key] if pscore is not None else np.ones_like( cv) dcg_score = cv_hat_sorted[0] dcg_score += ct_sorted[0] * (cv_sorted[0] - cv_hat_sorted[0]) / pscore_sorted[0] dcg_score_ = cv_hat_sorted[1:k] dcg_score_ += (ct_sorted[1:k] * (cv_sorted[1:k] - cv_hat_sorted[1:k]) / pscore_sorted[1:k]) dcg_score += (dcg_score_ / np.log2(np.arange(1, k) + 1)).sum() denominator = (cv_hat_sorted + ct_sorted * (cv_sorted - cv_hat_sorted) / pscore_sorted).sum() final_score = np.clip(dcg_score / denominator, 0, 1) if denominator != 0 else 0.0 return final_score
def recall_at_k( ct: np.ndarray, cv: np.ndarray, score: np.ndarray, k: int, cv_hat: Optional[np.array] = None, pscore: Optional[np.array] = None, ) -> float: """Calculate recall score.""" sort_key = score.argsort()[::-1] ct_sorted = ct[sort_key] cv_sorted = cv[sort_key] cv_hat_sorted = cv_hat[sort_key] if cv_hat is not None else np.zeros_like( cv) pscore_sorted = pscore[sort_key] if pscore is not None else np.ones_like( cv) recall = cv_hat_sorted[:k].sum() recall += (ct_sorted[:k] * (cv_sorted[:k] - cv_hat_sorted[:k]) / pscore_sorted[:k]).sum() denominator = (cv_hat_sorted + ct_sorted * (cv_sorted - cv_hat_sorted) / pscore_sorted).sum() final_score = np.clip(recall / denominator, 0, 1) if denominator != 0 else 0 return final_score
def top_scores(scores: np.ndarray, top_k: int = 100) -> Tuple[np.ndarray, np.ndarray]: """Return the ``top_k`` class indices and scores in descending order. Args: scores: array of scores, either 1D ``(n_classes,)`` or 2D ``(n_instances, n_classes)``. top_k: The number of top scored classes to return Returns: A tuple containing two arrays, ``(ranked_classes, scores)`` where ranked_classes contains the classes in descending order of score, and ``scores`` contains the corresponding score for each class, i.e. ``ranked_classes[..., i]`` has score ``scores[..., i]``. Examples: >>> top_scores(np.array([0.2, 0.6, 0.1, 0.04, 0.06]), top_k=3) (array([1, 0, 2]), array([0.6, 0.2, 0.1])) """ if scores.ndim == 1: top_k_idx = scores.argsort()[::-1][:top_k] return top_k_idx, scores[top_k_idx] top_k_scores_idx = np.argsort(scores)[..., ::-1][:, :top_k] top_k_scores = scores[np.arange(0, len(scores)).reshape(-1, 1), top_k_scores_idx] return top_k_scores_idx, top_k_scores
def nms(dets: np.ndarray, scores: np.ndarray, thresh: float) -> np.ndarray: x1 = dets[:, 0] y1 = dets[:, 1] x2 = dets[:, 2] y2 = dets[:, 3] areas = (x2 - x1 + 1) * (y2 - y1 + 1) order = scores.argsort()[::-1] # get boxes with more ious first keep = [] while order.size > 0: i = order[0] # pick maxmum iou box keep.append(i) xx1 = np.maximum(x1[i], x1[order[1:]]) yy1 = np.maximum(y1[i], y1[order[1:]]) xx2 = np.minimum(x2[i], x2[order[1:]]) yy2 = np.minimum(y2[i], y2[order[1:]]) w = np.maximum(0.0, xx2 - xx1 + 1) # maximum width h = np.maximum(0.0, yy2 - yy1 + 1) # maxiumum height inter = w * h ovr = inter / (areas[i] + areas[order[1:]] - inter) inds = np.where(ovr <= thresh)[0] order = order[inds + 1] return np.array(keep).astype(np.int)
def get_max_sum_ind(self, indices_list: List[Tuple], distances: np.ndarray, i: Union[str, int], m: Union[str, int]) -> Tuple: '''Get the indices that belong to the maximum distance in `distances` Parameters ---------- indices_list : list list of tuples distances : numpy.ndarray size M i : int m : int Returns ------- list ''' if len(indices_list) != len(distances): msg = "Indices and distances are lists of different length." + \ "Length indices_list = {} and length distances = {}." + \ "In loop i = {} and m = {}" raise ValueError( msg.format(len(indices_list), len(distances), i, m)) max_index = distances.argsort()[-1:][::-1] return tuple(indices_list[max_index[0]])
def _transform(self, queue: List[gates.Gate], remaining_queue: List[gates.Gate], counter: np.ndarray) -> List[gates.Gate]: """Helper recursive method for ``transform``.""" new_remaining_queue = [] for gate in remaining_queue: if isinstance(gate, gates.SpecialGate): gate.swap_reset = list(self.swaps_list) global_targets = set(gate.target_qubits) & self.qubits.set accept = isinstance(gate, gates.SWAP) and len(global_targets) == 1 accept = accept or not global_targets for skipped_gate in new_remaining_queue: accept = accept and skipped_gate.commutes(gate) if not accept: break if accept: queue.append(gate) for q in gate.target_qubits: counter[q] -= 1 else: new_remaining_queue.append(gate) if not new_remaining_queue: return queue # Find which qubits to swap gate = new_remaining_queue[0] target_set = set(gate.target_qubits) global_targets = target_set & self.qubits.set if isinstance(gate, gates.SWAP): # pragma: no cover # special case of swap on two global qubits assert len(global_targets) == 2 global_targets.remove(target_set.pop()) available_swaps = (q for q in counter.argsort() if q not in self.qubits.set | target_set) qubit_map = {} for q in global_targets: qs = next(available_swaps) # Update qubit map that holds the swaps qubit_map[q] = qs qubit_map[qs] = q # Keep SWAPs in memory to reset them in the end self.swaps_list.append((min(q, qs), max(q, qs))) # Add ``SWAP`` gate in ``queue``. queue.append(self.gate_module.SWAP(q, qs)) # Modify ``counter`` to take into account the swaps counter[q], counter[qs] = counter[qs], counter[q] # Modify gates to take into account the swaps for gate in new_remaining_queue: new_target_qubits = tuple(qubit_map[q] if q in qubit_map else q for q in gate.target_qubits) new_control_qubits = tuple(qubit_map[q] if q in qubit_map else q for q in gate.control_qubits) gate.set_targets_and_controls(new_target_qubits, new_control_qubits) return self._transform(queue, new_remaining_queue, counter)
def rank_metric_over_top_k_modes(metric_results: np.ndarray, mode_probabilities: np.ndarray, ranking_func: str) -> np.ndarray: """ Compute a metric over all trajectories ranked by probability of each trajectory. :param metric_results: 1-dimensional array of shape [batch_size, num_modes]. :param mode_probabilities: 1-dimensional array of shape [batch_size, num_modes]. :param ranking_func: Either 'min' or 'max'. How you want to metrics ranked over the top k modes. :return: Array of shape [num_modes]. """ if ranking_func == "min": func = np.minimum.accumulate elif ranking_func == "max": func = np.maximum.accumulate else: raise ValueError( f"Parameter ranking_func must be one of min or max. Received {ranking_func}" ) p_sorted = np.flip(mode_probabilities.argsort(axis=-1), axis=-1) indices = np.indices(metric_results.shape) sorted_metrics = metric_results[indices[0], p_sorted] return func(sorted_metrics, axis=-1)
def get_group_index_sorter(group_index: np.ndarray, ngroups: int | None = None) -> np.ndarray: """ algos.groupsort_indexer implements `counting sort` and it is at least O(ngroups), where ngroups = prod(shape) shape = map(len, keys) that is, linear in the number of combinations (cartesian product) of unique values of groupby keys. This can be huge when doing multi-key groupby. np.argsort(kind='mergesort') is O(count x log(count)) where count is the length of the data-frame; Both algorithms are `stable` sort and that is necessary for correctness of groupby operations. e.g. consider: df.groupby(key)[col].transform('first') """ if ngroups is None: ngroups = 1 + group_index.max() count = len(group_index) alpha = 0.0 # taking complexities literally; there may be beta = 1.0 # some room for fine-tuning these parameters do_groupsort = count > 0 and ((alpha + beta * ngroups) < (count * np.log(count))) if do_groupsort: sorter, _ = algos.groupsort_indexer(ensure_int64(group_index), ngroups) return ensure_platform_int(sorter) else: return group_index.argsort(kind="mergesort")
def _reorder_by_uniques(uniques: np.ndarray, labels: np.ndarray) -> tuple[np.ndarray, np.ndarray]: """ Parameters ---------- uniques : np.ndarray[np.int64] labels : np.ndarray[np.intp] Returns ------- np.ndarray[np.int64] np.ndarray[np.intp] """ # sorter is index where elements ought to go sorter = uniques.argsort() # reverse_indexer is where elements came from reverse_indexer = np.empty(len(sorter), dtype=np.intp) reverse_indexer.put(sorter, np.arange(len(sorter))) mask = labels < 0 # move labels to right locations (ie, unsort ascending labels) labels = reverse_indexer.take(labels) np.putmask(labels, mask, -1) # sort observed ids uniques = uniques.take(sorter) return uniques, labels
def min_n(row_data: np.ndarray, row_indices: np.ndarray, n: int) -> Tuple[np.ndarray, np.ndarray]: """Find the smallest entry and smallest indices of a row """ i = row_data.argsort()[:n] # i = row_data.argpartition(-n)[-n:] top_values = row_data[i] top_indices = row_indices[i] # do the sparse indices matter? return top_values, top_indices
def top_k_acc(k: int, pred: np.ndarray, label: np.ndarray): sorted_ = pred.argsort(axis=-1) top_k = sorted_[:, -k:] acc = 0 for idx in range(top_k.shape[0]): if label[idx] in top_k[idx]: acc += 1 return acc / top_k.shape[0]
def calc_ndcg_at_k(y_true: np.ndarray, y_score: np.ndarray, k: int) -> float: """Calculate a nDCG score for a given user.""" y_max_sorted = y_true[y_true.argsort()[::-1]] y_true_sorted = y_true[y_score.argsort()[::-1]] num_items = y_true.shape[0] k = num_items if num_items < k else k dcg_score = y_true_sorted[0] - 1 for i in np.arange(1, k): dcg_score += y_true_sorted[i] / np.log2(i + 1) max_score = 2**(y_max_sorted[0]) - 1 for i in np.arange(1, k): max_score += y_max_sorted[i] / np.log2(i + 1) return dcg_score / max_score
def precision_top_n(probs_pred: np.ndarray, labels_true: np.ndarray, n: int): # [batch size, n_classes] prob_ids = probs_pred.argsort( axis=1)[:, ::-1][:, :n] # reverse and trunc to only top n top_preds = np.zeros_like(labels_true) for i, prob_id in enumerate(prob_ids): top_preds[i][prob_id] = 1 return precision_score(labels_true, top_preds, average='micro')
def precision_at_k(y_true: np.ndarray, y_score: np.ndarray, k: int) -> float: """Calculate a precision score.""" y_true_sorted_by_score = y_true[y_score.argsort()[::-1]][:k] precision_score = 0.0 if not np.sum(y_true_sorted_by_score) == 0: precision_score = np.mean(y_true_sorted_by_score) return precision_score
def sort_score_and_label(labels: np.ndarray, pred_scores: np.ndarray): labels = np.array(labels) pred_scores = np.array(pred_scores) sort_idx = np.flip(pred_scores.argsort()) sorted_labels = labels[sort_idx] sorted_scores = pred_scores[sort_idx] return sorted_labels, sorted_scores
def shuffle_max_logits(logits: np.ndarray, n: int) -> np.ndarray: # simple defense mechanism that shuffles top n logits logits = logits.squeeze() idx = logits.argsort()[-n:][::-1] max_elems = logits[idx] np.random.shuffle(max_elems) for i, e in zip(idx, max_elems): logits[i] = e return logits
def _compute_ranks(cls, x: np.ndarray) -> np.ndarray: """ Returns ranks in [0, len(x)) Note: This is different from scipy.stats.rankdata, which returns ranks in [1, len(x)]. """ assert x.ndim == 1 ranks = np.empty(len(x), dtype=int) ranks[x.argsort()] = np.arange(len(x)) return ranks