Ejemplo n.º 1
0
def hinge_loss(
        positive_scores: torch.tensor,
        negative_scores: torch.tensor,
        num_items: Optional[Any] = None,
        positive_items: Optional[torch.tensor] = None,
        negative_items: Optional[torch.tensor] = None,
        metadata: Optional[Dict[str, torch.tensor]] = dict(),
        metadata_weights: Optional[Dict[str, float]] = dict(),
) -> torch.tensor:
    """
    Modified hinge pairwise loss function [2]_.

    See ``ideal_difference_from_metadata`` docstring for more info on how metadata is used.

    Modified from ``Spotlight``:
    https://github.com/maciejkula/spotlight/blob/master/spotlight/losses.py

    Parameters
    -------------
    positive_scores: torch.tensor, 1-d
        Tensor containing scores for known positive items
    negative_scores: torch.tensor, 1-d
        Tensor containing scores for a single sampled negative item
    num_items: Any
        Ignored, included only for compatability with WARP loss
    positive_items: torch.tensor, 1-d
        Tensor containing ids for known positive items of shape ``1 x batch_size``. This is only
        needed if ``metadata`` is provided
    negative_items: torch.tensor, 1-d
        Tensor containing ids for randomly-sampled negative items of shape ``1 x batch_size``. This
        is only needed if ``metadata`` is provided
    metadata: dict
        Keys should be strings identifying each metadata type that match keys in
        ``metadata_weights``. Values should be a ``torch.tensor`` of shape (num_items x 1). Each
        tensor should contain categorical metadata information about items (e.g. a number
        representing the genre of the item)
    metadata_weights: dict
        Keys should be strings identifying each metadata type that match keys in ``metadata``.
        Values should be the amount of weight to place on a match of that type of metadata, with the
        sum of all values ``<= 1``.
        e.g. If ``metadata_weights = {'genre': .3, 'director': .2}``, then an item is:

        * a 100% match if it's the same item,

        * a 50% match if it's a different item with the same genre and same director,

        * a 30% match if it's a different item with the same genre and different director,

        * a 20% match if it's a different item with a different genre and same director,

        * a 0% match if it's a different item with a different genre and different director,
          which is equivalent to the loss without any partial credit

    Returns
    -------------
    loss: torch.tensor

    References
    -------------
    .. [2] "Hinge Loss." Wikipedia, Wikimedia Foundation, 5 Mar. 2021, en.wikipedia.org/wiki/
        Hinge_loss.

    """
    score_difference = (positive_scores - negative_scores)

    if metadata is not None and len(metadata) > 0:
        ideal_difference = ideal_difference_from_metadata(
            positive_items=positive_items,
            negative_items=negative_items,
            metadata=metadata,
            metadata_weights=metadata_weights,
        )
    else:
        ideal_difference = 1

    loss = torch.clamp((ideal_difference - score_difference), min=0)

    return (loss.sum() + loss.pow(2).sum()) / len(positive_scores)
Ejemplo n.º 2
0
def adaptive_hinge_loss(
    positive_scores: torch.tensor,
    many_negative_scores: torch.tensor,
    positive_items: Optional[torch.tensor] = None,
    negative_items: Optional[torch.tensor] = None,
    metadata: Optional[Dict[str, torch.tensor]] = dict(),
    metadata_weights: Optional[Dict[str, float]] = dict(),
    **kwargs,
) -> torch.tensor:
    """
    Modified adaptive hinge pairwise loss function [3]_.

    Approximates WARP loss by taking the maximum of negative predictions for each user and sending
    this to hinge loss.

    See ``ideal_difference_from_metadata`` docstring for more info on how metadata is used.

    Modified from ``Spotlight``:
    https://github.com/maciejkula/spotlight/blob/master/spotlight/losses.py

    Parameters
    -------------
    positive_scores: torch.tensor, 1-d
        Tensor containing scores for known positive items of shape
        ``num_negative_samples x batch_size``
    many_negative_scores: torch.tensor, 2-d
        Iterable of tensors containing scores for many (n > 1) sampled negative items of shape
        ``num_negative_samples x batch_size``. More tensors increase the likelihood of finding
        ranking-violating pairs, but risk overfitting
    positive_items: torch.tensor, 1-d
        Tensor containing ids for known positive items of shape
        ``num_negative_samples x batch_size``. This is only needed if ``metadata`` is provided
    negative_items: torch.tensor, 2-d
        Tensor containing ids for sampled negative items of shape
        ``num_negative_samples x batch_size``. This is only needed if ``metadata`` is provided
    metadata: dict
        Keys should be strings identifying each metadata type that match keys in
        ``metadata_weights``. Values should be a ``torch.tensor`` of shape (num_items x 1). Each
        tensor should contain categorical metadata information about items (e.g. a number
        representing the genre of the item)
    metadata_weights: dict
        Keys should be strings identifying each metadata type that match keys in ``metadata``.
        Values should be the amount of weight to place on a match of that type of metadata, with the
        sum of all values ``<= 1``.
        e.g. If ``metadata_weights = {'genre': .3, 'director': .2}``, then an item is:

        * a 100% match if it's the same item,

        * a 50% match if it's a different item with the same genre and same director,

        * a 30% match if it's a different item with the same genre and different director,

        * a 20% match if it's a different item with a different genre and same director,

        * a 0% match if it's a different item with a different genre and different director,
          which is equivalent to the loss without any partial credit
    **kwargs: keyword arguments
        Ignored, included only for compatability with WARP loss

    Returns
    -------------
    loss: torch.tensor

    References
    -------------
    .. [3] Kula, Maciej. "Loss Functions." Loss Functions - Spotlight Documentation,
        maciejkula.github.io/spotlight/losses.html.

    """
    if len(kwargs) > 0 and [kwargs_key
                            for kwargs_key in kwargs] != ['num_items']:
        raise ValueError(f'Unexpected ``kwargs``: {kwargs}')

    many_positive_scores = positive_scores.repeat(
        [many_negative_scores.shape[0], 1])

    if negative_items is not None and positive_items is not None:
        positive_items = positive_items.repeat(
            [many_negative_scores.shape[0], 1])

    score_difference = (many_positive_scores - many_negative_scores)

    if metadata is not None and len(metadata) > 0:
        ideal_difference = ideal_difference_from_metadata(
            positive_items,
            negative_items,
            metadata,
            metadata_weights,
        )
    else:
        ideal_difference = 1

    loss, _ = torch.max((ideal_difference - score_difference), 0)
    loss = torch.clamp(loss, min=0)

    return (loss.sum() + loss.pow(2).sum()) / len(positive_scores)
Ejemplo n.º 3
0
def warp_loss(
        positive_scores: torch.tensor,
        many_negative_scores: torch.tensor,
        num_items: int,
        positive_items: Optional[torch.tensor] = None,
        negative_items: Optional[torch.tensor] = None,
        metadata: Optional[Dict[str, torch.tensor]] = dict(),
        metadata_weights: Optional[Dict[str, float]] = dict(),
) -> torch.tensor:
    """
    Modified WARP loss function [4]_.

    See http://www.thespermwhale.com/jaseweston/papers/wsabie-ijcai.pdf for loss equation.

    See ``ideal_difference_from_metadata`` docstring for more info on how metadata is used.

    Parameters
    -------------
    positive_scores: torch.tensor, 1-d
        Tensor containing scores for known positive items of shape
        ``num_negative_samples x batch_size``
    many_negative_scores: torch.tensor, 2-d
        Iterable of tensors containing scores for many (n > 1) sampled negative items of shape
        ``num_negative_samples x batch_size``. More tensors increase the likelihood of finding
        ranking-violating pairs, but risk overfitting
    num_items: int
        Total number of items in the dataset
    positive_items: torch.tensor, 1-d
        Tensor containing ids for known positive items of shape
        ``num_negative_samples x batch_size``. This is only needed if ``metadata`` is provided
    negative_items: torch.tensor, 2-d
        Tensor containing ids for sampled negative items of shape
        ``num_negative_samples x batch_size``. This is only needed if ``metadata`` is provided
    metadata: dict
        Keys should be strings identifying each metadata type that match keys in
        ``metadata_weights``. Values should be a ``torch.tensor`` of shape (num_items x 1). Each
        tensor should contain categorical metadata information about items (e.g. a number
        representing the genre of the item)
    metadata_weights: dict
        Keys should be strings identifying each metadata type that match keys in ``metadata``.
        Values should be the amount of weight to place on a match of that type of metadata, with the
        sum of all values ``<= 1``.
        e.g. If ``metadata_weights = {'genre': .3, 'director': .2}``, then an item is:

        * a 100% match if it's the same item,

        * a 50% match if it's a different item with the same genre and same director,

        * a 30% match if it's a different item with the same genre and different director,

        * a 20% match if it's a different item with a different genre and same director,

        * a 0% match if it's a different item with a different genre and different director,
          which is equivalent to the loss without any partial credit

    Returns
    -------------
    loss: torch.tensor

    References
    -------------
    .. [4] Weston et al. WSABIE: Scaling Up To Large Vocabulary Image Annotation.
        www.thespermwhale.com/jaseweston/papers/wsabie-ijcai.pdf.

    """
    if negative_items is not None and positive_items is not None:
        positive_items = positive_items.repeat(
            [many_negative_scores.shape[0], 1])

    if metadata is not None and len(metadata) > 0:
        ideal_difference = ideal_difference_from_metadata(
            positive_items=positive_items,
            negative_items=negative_items,
            metadata=metadata,
            metadata_weights=metadata_weights,
        ).transpose(1, 0)
    else:
        ideal_difference = 1

    # device to put new tensors on
    device = positive_scores.device

    # WARP loss requires a different structure for positive and negative samples
    positive_scores = positive_scores.view(len(positive_scores), 1)
    many_negative_scores = torch.transpose(many_negative_scores, 0, 1)

    batch_size, max_trials = many_negative_scores.size(
        0), many_negative_scores.size(1)

    flattened_new_row_indices = torch.arange(
        0, batch_size, 1).long().to(device) * (max_trials + 1)
    tensor_of_ones = torch.ones(batch_size, 1).float().to(device)

    # ``initial_loss`` is essentially just hinge loss for now
    hinge_loss = ideal_difference - positive_scores + many_negative_scores

    # Add column of ones to so we know when we have used all our attempts. This is used for indexing
    # and computing ``should_count_loss`` if no real value is above 0.
    initial_loss_with_ones = torch.cat([hinge_loss, tensor_of_ones], dim=1)
    # this will be modified in ``_find_first_loss_violation``
    initial_loss_with_ones_binary = torch.cat([hinge_loss, tensor_of_ones],
                                              dim=1)

    number_of_tries = _find_first_loss_violation(initial_loss_with_ones_binary,
                                                 device)

    prediction_index_for_flattened_predictions = number_of_tries + flattened_new_row_indices

    number_of_tries = (number_of_tries + 1).float()

    # IMPORTANT CHANGE: normal WARP weighting has the numerator set to ``num_items - 1``, but we
    # have found this does not penalize when the last item in a negative item sequence ranks above a
    # positive item score. Adjusting the numerator as below penalizes this correctly. Additionally,
    # adding a floor function to the numerator can also have the same negative effect of not
    # not counting loss. See the original implementation as a comment below, and our modified,
    # harsher calculation implemented below.
    # loss_weights = torch.log(torch.floor((num_items - 1) / number_of_tries))
    loss_weights = torch.log((num_items / number_of_tries))

    # don't count loss if we used max number of attempts looking for a violation and didn't find one
    should_we_count_loss = (number_of_tries <= max_trials).float()

    loss = (loss_weights * (initial_loss_with_ones.flatten()
                            [prediction_index_for_flattened_predictions]) *
            should_we_count_loss)

    return (loss.sum() + loss.pow(2).sum()) / len(positive_scores)
Ejemplo n.º 4
0
def bpr_loss(
    positive_scores: torch.tensor,
    negative_scores: torch.tensor,
    positive_items: Optional[torch.tensor] = None,
    negative_items: Optional[torch.tensor] = None,
    metadata: Optional[Dict[str, torch.tensor]] = dict(),
    metadata_weights: Optional[Dict[str, float]] = dict(),
    **kwargs,
) -> torch.tensor:
    """
    Modified Bayesian Personalised Ranking [1]_.

    See ``ideal_difference_from_metadata`` docstring for more info on how metadata is used.

    Modified from ``torchmf`` and ``Spotlight``:

    * https://github.com/EthanRosenthal/torchmf/blob/master/torchmf.py

    * https://github.com/maciejkula/spotlight/blob/master/spotlight/losses.py

    Parameters
    -------------
    positive_scores: torch.tensor, 1-d
        Tensor containing predictions for known positive items of shape ``1 x batch_size``
    negative_scores: torch.tensor, 1-d
        Tensor containing scores for a single sampled negative item of shape ``1 x batch_size``
    positive_items: torch.tensor, 1-d
        Tensor containing ids for known positive items of shape ``1 x batch_size``. This is only
        needed if ``metadata`` is provided
    negative_items: torch.tensor, 1-d
        Tensor containing ids for randomly-sampled negative items of shape ``1 x batch_size``. This
        is only needed if ``metadata`` is provided
    metadata: dict
        Keys should be strings identifying each metadata type that match keys in
        ``metadata_weights``. Values should be a ``torch.tensor`` of shape (num_items x 1). Each
        tensor should contain categorical metadata information about items (e.g. a number
        representing the genre of the item)
    metadata_weights: dict
        Keys should be strings identifying each metadata type that match keys in ``metadata``.
        Values should be the amount of weight to place on a match of that type of metadata, with the
        sum of all values ``<= 1``.
        e.g. If ``metadata_weights = {'genre': .3, 'director': .2}``, then an item is:

        * a 100% match if it's the same item,

        * a 50% match if it's a different item with the same genre and same director,

        * a 30% match if it's a different item with the same genre and different director,

        * a 20% match if it's a different item with a different genre and same director,

        * a 0% match if it's a different item with a different genre and different director,
          which is equivalent to the loss without any partial credit
    **kwargs: keyword arguments
        Ignored, included only for compatability with WARP loss

    Returns
    -------------
    loss: torch.tensor

    References
    -------------
    .. [1] Hildesheim et al. "BPR: Bayesian Personalized Ranking from Implicit Feedback." BPR |
        Proceedings of the Twenty-Fifth Conference on Uncertainty in Artificial Intelligence, 1 June
        2009, dl.acm.org/doi/10.5555/1795114.1795167.

    """
    if len(kwargs) > 0 and [kwargs_key for kwargs_key in kwargs] != ['num_items']:
        raise ValueError(f'Unexpected ``kwargs``: {kwargs}')

    preds = positive_scores - negative_scores

    if metadata is not None and len(metadata) > 0:
        ideal_difference = ideal_difference_from_metadata(
            positive_items,
            negative_items,
            metadata,
            metadata_weights,
        )
    else:
        ideal_difference = 1

    loss = (ideal_difference - torch.sigmoid(preds))

    return (loss.sum() + loss.pow(2).sum()) / len(positive_scores)