コード例 #1
0
def reciprocal_rank(outputs: torch.Tensor, targets: torch.Tensor,
                    k: int) -> torch.Tensor:
    """
    Calculate the Reciprocal Rank (MRR)
    score given model outputs and targets
    Data aggregated in batches.

    Args:
        outputs:
            Tensor weith predicted score
            size: [batch_size, slate_length]
            model outputs, logits
        targets:
            Binary tensor with ground truth.
            1 means the item is relevant
            and 0 if it's not relevant
            size: [batch_size, slate_length]
            ground truth, labels
        k:
            Parameter for evaluation on top-k items

    Returns:
        MRR score

    Examples:
        >>> reciprocal_rank(
        >>>     outputs=torch.Tensor([
        >>>         [4.0, 2.0, 3.0, 1.0],
        >>>         [1.0, 2.0, 3.0, 4.0],
        >>>     ]),
        >>>     targets=torch.Tensor([
        >>>         [0, 0, 1.0, 1.0],
        >>>         [0, 0, 1.0, 1.0],
        >>>     ]),
        >>>     k=1,
        >>> )
        tensor([[0.], [1.]])
        >>> reciprocal_rank(
        >>>     outputs=torch.Tensor([
        >>>         [4.0, 2.0, 3.0, 1.0],
        >>>         [1.0, 2.0, 3.0, 4.0],
        >>>     ]),
        >>>     targets=torch.Tensor([
        >>>         [0, 0, 1.0, 1.0],
        >>>         [0, 0, 1.0, 1.0],
        >>>     ]),
        >>>     k=3,
        >>> )
        tensor([[0.5000], [1.0000]])
    """
    k = min(outputs.size(1), k)
    targets_sort_by_outputs_at_k = process_recsys_components(outputs,
                                                             targets)[:, :k]
    values, indices = torch.max(targets_sort_by_outputs_at_k, dim=1)
    indices = indices.type_as(values).unsqueeze(dim=0).t()
    mrr_score = torch.tensor(1.0) / (indices + torch.tensor(1.0))

    zero_sum_mask = values == 0.0
    mrr_score[zero_sum_mask] = 0.0
    return mrr_score
コード例 #2
0
ファイル: _hitrate.py プロジェクト: zkid18/catalyst
def hitrate(outputs: torch.Tensor,
            targets: torch.Tensor,
            topk: List[int],
            zero_division: int = 0) -> List[torch.Tensor]:
    """
    Calculate the hit rate (aka recall) score given
    model outputs and targets.
    Hit-rate is a metric for evaluating ranking systems.
    Generate top-N recommendations and if one of the recommendation is
    actually what user has rated, you consider that a hit.
    By rate we mean any explicit form of user's interactions.
    Add up all of the hits for all users and then divide by number of users

    Compute top-N recommendation for each user in the training stage
    and intentionally remove one of this items fro the training data.

    Args:
        outputs (torch.Tensor):
            Tensor with predicted score
            size: [batch_size, slate_length]
            model outputs, logits
        targets (torch.Tensor):
            Binary tensor with ground truth.
            1 means the item is relevant
            for the user and 0 not relevant
            size: [batch_size, slate_length]
            ground truth, labels
        topk (List[int]):
            Parameter fro evaluation on top-k items
        zero_division (int):
            value, returns in the case of the divison by zero
            should be one of 0 or 1

    Returns:
        hitrate_at_k (List[torch.Tensor]): the hitrate score

    Example:

    .. code-block:: python

        import torch
        from catalyst import metrics
        metrics.hitrate(
            outputs=torch.Tensor([[4.0, 2.0, 3.0, 1.0], [1.0, 2.0, 3.0, 4.0]]),
            targets=torch.Tensor([[0, 0, 1.0, 1.0], [0, 0, 0.0, 0.0]]),
            topk=[1, 2, 3, 4],
        )
        # [tensor(0.), tensor(0.2500), tensor(0.2500), tensor(0.5000)]
    """
    results = []

    targets_sort_by_outputs = process_recsys_components(outputs, targets)
    for k in topk:
        k = min(outputs.size(1), k)
        hits_score = torch.sum(targets_sort_by_outputs[:, :k],
                               dim=1) / targets.sum(dim=1)
        hits_score = NAN_TO_NUM_FN(hits_score, zero_division)
        results.append(torch.mean(hits_score))

    return results
コード例 #3
0
ファイル: _hitrate.py プロジェクト: rongyinet/catalyst
def hitrate(outputs: torch.Tensor, targets: torch.Tensor, topk: List[int]) -> List[torch.Tensor]:
    """
    Calculate the hit rate score given model outputs and targets.
    Hit-rate is a metric for evaluating ranking systems.
    Generate top-N recommendations and if one of the recommendation is
    actually what user has rated, you consider that a hit.
    By rate we mean any explicit form of user's interactions.
    Add up all of the hits for all users and then divide by number of users

    Compute top-N recomendation for each user in the training stage
    and intentionally remove one of this items fro the training data.

    Args:
        outputs (torch.Tensor):
            Tensor with predicted score
            size: [batch_size, slate_length]
            model outputs, logits
        targets (torch.Tensor):
            Binary tensor with ground truth.
            1 means the item is relevant
            for the user and 0 not relevant
            size: [batch_size, slate_length]
            ground truth, labels
        topk (List[int]):
            Parameter fro evaluation on top-k items

    Returns:
        hitrate_at_k (List[torch.Tensor]):
            the hit rate score
    """
    results = []

    targets_sort_by_outputs = process_recsys_components(outputs, targets)
    for k in topk:
        k = min(outputs.size(1), k)
        hits_score = torch.sum(targets_sort_by_outputs[:, :k], dim=1) / k
        results.append(torch.mean(hits_score))

    return results
コード例 #4
0
def average_precision(outputs: torch.Tensor, targets: torch.Tensor) -> torch.Tensor:
    """
    Calculate the Average Precision for RecSys.
    The precision metric summarizes the fraction of relevant items
    out of the whole the recommendation list.

    To compute the precision at k set the threshold rank k,
    compute the percentage of relevant items in topK,
    ignoring the documents ranked lower than k.

    The average precision at k (AP at k) summarizes the average
    precision for relevant items up to the k-th one.
    Wikipedia entry for the Average precision

    <https://en.wikipedia.org/w/index.php?title=Information_retrieval&
    oldid=793358396#Average_precision>

    If a relevant document never gets retrieved,
    we assume the precision corresponding to that
    relevant doc to be zero

    Args:
        outputs (torch.Tensor):
            Tensor with predicted score
            size: [batch_size, slate_length]
            model outputs, logits
        targets (torch.Tensor):
            Binary tensor with ground truth.
            1 means the item is relevant
            and 0 not relevant
            size: [batch_szie, slate_length]
            ground truth, labels

    Returns:
        ap_score (torch.Tensor):
            The map score for each batch.
            size: [batch_size, 1]

    Examples:
        >>> average_precision(
        >>>     outputs=torch.tensor([
        >>>         [9, 8, 7, 6, 5, 4, 3, 2, 1, 0],
        >>>         [9, 8, 7, 6, 5, 4, 3, 2, 1, 0],
        >>>     ]),
        >>>     targets=torch.tensor([
        >>>         [1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0],
        >>>         [0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0],
        >>>     ]),
        >>> )
        tensor([0.6222, 0.4429])
    """
    targets_sort_by_outputs = process_recsys_components(outputs, targets)
    precisions = torch.zeros_like(targets_sort_by_outputs)

    for index in range(outputs.size(1)):
        precisions[:, index] = torch.sum(targets_sort_by_outputs[:, : (index + 1)], dim=1) / float(
            index + 1
        )

    only_relevant_precision = precisions * targets_sort_by_outputs
    ap_score = only_relevant_precision.sum(dim=1) / ((only_relevant_precision != 0).sum(dim=1))
    ap_score[torch.isnan(ap_score)] = 0
    return ap_score
コード例 #5
0
ファイル: _ndcg.py プロジェクト: Podidiving/catalyst
def dcg(outputs: torch.Tensor,
        targets: torch.Tensor,
        gain_function="exp_rank") -> torch.Tensor:
    """
    Computes Discounted cumulative gain (DCG)
    DCG@topk for the specified values of `k`.
    Graded relevance as a measure of  usefulness,
    or gain, from examining a set of items.
    Gain may be reduced at lower ranks.
    Reference:
    https://en.wikipedia.org/wiki/Discounted_cumulative_gain

    Args:
        outputs: model outputs, logits
            with shape [batch_size; slate_length]
        targets: ground truth, labels
            with shape [batch_size; slate_length]
        gain_function:
            String indicates the gain function for the ground truth labels.
            Two options available:
            - `exp_rank`: torch.pow(2, x) - 1
            - `linear_rank`: x
            On the default, `exp_rank` is used
            to emphasize on retrieving the relevant documents.

    Returns:
        dcg_score (torch.Tensor):
            The discounted gains tensor

    Raises:
        ValueError: gain function can be either `pow_rank` or `rank`

    Examples:

    .. code-block:: python

        from catalyst import metrics
        metrics.dcg(
            outputs = torch.tensor([
                [3, 2, 1, 0],
            ]),
            targets = torch.Tensor([
                [2.0, 2.0, 1.0, 0.0],
            ]),
            gain_function="linear_rank",
        )
        # tensor([[2.0000, 2.0000, 0.6309, 0.0000]])

    .. code-block:: python

        from catalyst import metrics
        metrics.dcg(
            outputs = torch.tensor([
                [3, 2, 1, 0],
            ]),
            targets = torch.Tensor([
                [2.0, 2.0, 1.0, 0.0],
            ]),
            gain_function="linear_rank",
        ).sum()
        # tensor(4.6309)

    .. code-block:: python

        from catalyst import metrics
        metrics.dcg(
            outputs = torch.tensor([
                [3, 2, 1, 0],
            ]),
            targets = torch.Tensor([
                [2.0, 2.0, 1.0, 0.0],
            ]),
            gain_function="exp_rank",
        )
        # tensor([[3.0000, 1.8928, 0.5000, 0.0000]])

    .. code-block:: python

        from catalyst import metrics
        metrics.dcg(
            outputs = torch.tensor([
                [3, 2, 1, 0],
            ]),
            targets = torch.Tensor([
                [2.0, 2.0, 1.0, 0.0],
            ]),
            gain_function="exp_rank",
        ).sum()
        # tensor(5.3928)
    """
    targets_sort_by_outputs = process_recsys_components(outputs, targets)
    target_device = targets_sort_by_outputs.device

    if gain_function == "exp_rank":
        gain_function = lambda x: torch.pow(2, x) - 1
        gains = gain_function(targets_sort_by_outputs)
        discounts = torch.tensor(1) / torch.log2(
            torch.arange(targets_sort_by_outputs.shape[1],
                         dtype=torch.float,
                         device=target_device) + 2.0)
        discounted_gains = gains * discounts

    elif gain_function == "linear_rank":
        discounts = torch.tensor(1) / torch.log2(
            torch.arange(targets_sort_by_outputs.shape[1],
                         dtype=torch.float,
                         device=target_device) + 1.0)
        discounts[0] = 1
        discounted_gains = targets_sort_by_outputs * discounts

    else:
        raise ValueError("gain function can be either exp_rank or linear_rank")

    dcg_score = discounted_gains
    return dcg_score