def reciprocal_rank(outputs: torch.Tensor, targets: torch.Tensor, k: int) -> torch.Tensor: """ Calculate the Reciprocal Rank (MRR) score given model outputs and targets Data aggregated in batches. Args: outputs: Tensor weith predicted score size: [batch_size, slate_length] model outputs, logits targets: Binary tensor with ground truth. 1 means the item is relevant and 0 if it's not relevant size: [batch_size, slate_length] ground truth, labels k: Parameter for evaluation on top-k items Returns: MRR score Examples: >>> reciprocal_rank( >>> outputs=torch.Tensor([ >>> [4.0, 2.0, 3.0, 1.0], >>> [1.0, 2.0, 3.0, 4.0], >>> ]), >>> targets=torch.Tensor([ >>> [0, 0, 1.0, 1.0], >>> [0, 0, 1.0, 1.0], >>> ]), >>> k=1, >>> ) tensor([[0.], [1.]]) >>> reciprocal_rank( >>> outputs=torch.Tensor([ >>> [4.0, 2.0, 3.0, 1.0], >>> [1.0, 2.0, 3.0, 4.0], >>> ]), >>> targets=torch.Tensor([ >>> [0, 0, 1.0, 1.0], >>> [0, 0, 1.0, 1.0], >>> ]), >>> k=3, >>> ) tensor([[0.5000], [1.0000]]) """ k = min(outputs.size(1), k) targets_sort_by_outputs_at_k = process_recsys_components(outputs, targets)[:, :k] values, indices = torch.max(targets_sort_by_outputs_at_k, dim=1) indices = indices.type_as(values).unsqueeze(dim=0).t() mrr_score = torch.tensor(1.0) / (indices + torch.tensor(1.0)) zero_sum_mask = values == 0.0 mrr_score[zero_sum_mask] = 0.0 return mrr_score
def hitrate(outputs: torch.Tensor, targets: torch.Tensor, topk: List[int], zero_division: int = 0) -> List[torch.Tensor]: """ Calculate the hit rate (aka recall) score given model outputs and targets. Hit-rate is a metric for evaluating ranking systems. Generate top-N recommendations and if one of the recommendation is actually what user has rated, you consider that a hit. By rate we mean any explicit form of user's interactions. Add up all of the hits for all users and then divide by number of users Compute top-N recommendation for each user in the training stage and intentionally remove one of this items fro the training data. Args: outputs (torch.Tensor): Tensor with predicted score size: [batch_size, slate_length] model outputs, logits targets (torch.Tensor): Binary tensor with ground truth. 1 means the item is relevant for the user and 0 not relevant size: [batch_size, slate_length] ground truth, labels topk (List[int]): Parameter fro evaluation on top-k items zero_division (int): value, returns in the case of the divison by zero should be one of 0 or 1 Returns: hitrate_at_k (List[torch.Tensor]): the hitrate score Example: .. code-block:: python import torch from catalyst import metrics metrics.hitrate( outputs=torch.Tensor([[4.0, 2.0, 3.0, 1.0], [1.0, 2.0, 3.0, 4.0]]), targets=torch.Tensor([[0, 0, 1.0, 1.0], [0, 0, 0.0, 0.0]]), topk=[1, 2, 3, 4], ) # [tensor(0.), tensor(0.2500), tensor(0.2500), tensor(0.5000)] """ results = [] targets_sort_by_outputs = process_recsys_components(outputs, targets) for k in topk: k = min(outputs.size(1), k) hits_score = torch.sum(targets_sort_by_outputs[:, :k], dim=1) / targets.sum(dim=1) hits_score = NAN_TO_NUM_FN(hits_score, zero_division) results.append(torch.mean(hits_score)) return results
def hitrate(outputs: torch.Tensor, targets: torch.Tensor, topk: List[int]) -> List[torch.Tensor]: """ Calculate the hit rate score given model outputs and targets. Hit-rate is a metric for evaluating ranking systems. Generate top-N recommendations and if one of the recommendation is actually what user has rated, you consider that a hit. By rate we mean any explicit form of user's interactions. Add up all of the hits for all users and then divide by number of users Compute top-N recomendation for each user in the training stage and intentionally remove one of this items fro the training data. Args: outputs (torch.Tensor): Tensor with predicted score size: [batch_size, slate_length] model outputs, logits targets (torch.Tensor): Binary tensor with ground truth. 1 means the item is relevant for the user and 0 not relevant size: [batch_size, slate_length] ground truth, labels topk (List[int]): Parameter fro evaluation on top-k items Returns: hitrate_at_k (List[torch.Tensor]): the hit rate score """ results = [] targets_sort_by_outputs = process_recsys_components(outputs, targets) for k in topk: k = min(outputs.size(1), k) hits_score = torch.sum(targets_sort_by_outputs[:, :k], dim=1) / k results.append(torch.mean(hits_score)) return results
def average_precision(outputs: torch.Tensor, targets: torch.Tensor) -> torch.Tensor: """ Calculate the Average Precision for RecSys. The precision metric summarizes the fraction of relevant items out of the whole the recommendation list. To compute the precision at k set the threshold rank k, compute the percentage of relevant items in topK, ignoring the documents ranked lower than k. The average precision at k (AP at k) summarizes the average precision for relevant items up to the k-th one. Wikipedia entry for the Average precision <https://en.wikipedia.org/w/index.php?title=Information_retrieval& oldid=793358396#Average_precision> If a relevant document never gets retrieved, we assume the precision corresponding to that relevant doc to be zero Args: outputs (torch.Tensor): Tensor with predicted score size: [batch_size, slate_length] model outputs, logits targets (torch.Tensor): Binary tensor with ground truth. 1 means the item is relevant and 0 not relevant size: [batch_szie, slate_length] ground truth, labels Returns: ap_score (torch.Tensor): The map score for each batch. size: [batch_size, 1] Examples: >>> average_precision( >>> outputs=torch.tensor([ >>> [9, 8, 7, 6, 5, 4, 3, 2, 1, 0], >>> [9, 8, 7, 6, 5, 4, 3, 2, 1, 0], >>> ]), >>> targets=torch.tensor([ >>> [1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0], >>> [0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0], >>> ]), >>> ) tensor([0.6222, 0.4429]) """ targets_sort_by_outputs = process_recsys_components(outputs, targets) precisions = torch.zeros_like(targets_sort_by_outputs) for index in range(outputs.size(1)): precisions[:, index] = torch.sum(targets_sort_by_outputs[:, : (index + 1)], dim=1) / float( index + 1 ) only_relevant_precision = precisions * targets_sort_by_outputs ap_score = only_relevant_precision.sum(dim=1) / ((only_relevant_precision != 0).sum(dim=1)) ap_score[torch.isnan(ap_score)] = 0 return ap_score
def dcg(outputs: torch.Tensor, targets: torch.Tensor, gain_function="exp_rank") -> torch.Tensor: """ Computes Discounted cumulative gain (DCG) DCG@topk for the specified values of `k`. Graded relevance as a measure of usefulness, or gain, from examining a set of items. Gain may be reduced at lower ranks. Reference: https://en.wikipedia.org/wiki/Discounted_cumulative_gain Args: outputs: model outputs, logits with shape [batch_size; slate_length] targets: ground truth, labels with shape [batch_size; slate_length] gain_function: String indicates the gain function for the ground truth labels. Two options available: - `exp_rank`: torch.pow(2, x) - 1 - `linear_rank`: x On the default, `exp_rank` is used to emphasize on retrieving the relevant documents. Returns: dcg_score (torch.Tensor): The discounted gains tensor Raises: ValueError: gain function can be either `pow_rank` or `rank` Examples: .. code-block:: python from catalyst import metrics metrics.dcg( outputs = torch.tensor([ [3, 2, 1, 0], ]), targets = torch.Tensor([ [2.0, 2.0, 1.0, 0.0], ]), gain_function="linear_rank", ) # tensor([[2.0000, 2.0000, 0.6309, 0.0000]]) .. code-block:: python from catalyst import metrics metrics.dcg( outputs = torch.tensor([ [3, 2, 1, 0], ]), targets = torch.Tensor([ [2.0, 2.0, 1.0, 0.0], ]), gain_function="linear_rank", ).sum() # tensor(4.6309) .. code-block:: python from catalyst import metrics metrics.dcg( outputs = torch.tensor([ [3, 2, 1, 0], ]), targets = torch.Tensor([ [2.0, 2.0, 1.0, 0.0], ]), gain_function="exp_rank", ) # tensor([[3.0000, 1.8928, 0.5000, 0.0000]]) .. code-block:: python from catalyst import metrics metrics.dcg( outputs = torch.tensor([ [3, 2, 1, 0], ]), targets = torch.Tensor([ [2.0, 2.0, 1.0, 0.0], ]), gain_function="exp_rank", ).sum() # tensor(5.3928) """ targets_sort_by_outputs = process_recsys_components(outputs, targets) target_device = targets_sort_by_outputs.device if gain_function == "exp_rank": gain_function = lambda x: torch.pow(2, x) - 1 gains = gain_function(targets_sort_by_outputs) discounts = torch.tensor(1) / torch.log2( torch.arange(targets_sort_by_outputs.shape[1], dtype=torch.float, device=target_device) + 2.0) discounted_gains = gains * discounts elif gain_function == "linear_rank": discounts = torch.tensor(1) / torch.log2( torch.arange(targets_sort_by_outputs.shape[1], dtype=torch.float, device=target_device) + 1.0) discounts[0] = 1 discounted_gains = targets_sort_by_outputs * discounts else: raise ValueError("gain function can be either exp_rank or linear_rank") dcg_score = discounted_gains return dcg_score