Example #1
0
def lambdaRank_loss_diagonal(batch_preds=None, batch_stds=None, sigma=None):
    '''
    This method will impose explicit bias to highly ranked documents that are essentially ties
    :param batch_preds:
    :param batch_stds:
    :return:
    '''

    batch_preds_sorted, batch_preds_sorted_inds = torch.sort(batch_preds, dim=1, descending=True)   # sort documents according to the predicted relevance
    batch_stds_sorted_via_preds = torch.gather(batch_stds, dim=1, index=batch_preds_sorted_inds)    # reorder batch_stds correspondingly so as to make it consistent. BTW, batch_stds[batch_preds_sorted_inds] only works with 1-D tensor

    # get unique document pairs, which is dynamically different per training iteration
    pair_row_inds, pair_col_inds = torch_triu_indice(k=1, pair_type='NoTies', batch_labels=batch_stds_sorted_via_preds)

    batch_std_diffs = torch.unsqueeze(batch_stds_sorted_via_preds, dim=2) - torch.unsqueeze(batch_stds_sorted_via_preds, dim=1)  # standard pairwise differences, i.e., S_{ij}
    batch_std_Sij = torch.clamp(batch_std_diffs, min=-1.0, max=1.0) # ensuring S_{ij} \in {-1, 0, 1}
    batch_std_Sij = batch_std_Sij[:, pair_row_inds, pair_col_inds]  # necessary S_{ij}

    batch_pred_diffs = torch.unsqueeze(batch_preds_sorted, dim=2) - torch.unsqueeze(batch_preds_sorted, dim=1)  # computing pairwise differences, i.e., s_i - s_j
    batch_pred_s_ij = batch_pred_diffs[:, pair_row_inds, pair_col_inds] # unique pairwise comparisons according to a ltr_adhoc of documents

    batch_delta_ndcg = get_delta_ndcg(batch_stds, batch_stds_sorted_via_preds)
    batch_delta_ndcg = batch_delta_ndcg[:, pair_row_inds, pair_col_inds]

    batch_loss_1st = 0.5 * sigma * batch_pred_s_ij * (1.0 - batch_std_Sij) # cf. the 1st equation in page-3
    batch_loss_2nd = torch.log(torch.exp(-sigma * batch_pred_s_ij) + 1.0)  # cf. the 1st equation in page-3

    batch_loss = torch.sum((batch_loss_1st + batch_loss_2nd) * batch_delta_ndcg)    # weighting with delta-nDCG

    return batch_loss
Example #2
0
def lambdaRank_loss_full_soft(batch_preds=None, batch_stds=None, sigma=None):
    '''
    Instead of strictly getting the uppper diagonal entries, here we compute the lambdaloss by fully making use of the properties as follows:
    (1) using the full pairwise difference matrix is twice the loss of using merely the uppper diagonal entries
    (2) for ties, the delta nDCG will be zero, thus no need to explicitly remove pairs of ties
    '''

    batch_preds_sorted, batch_preds_sorted_inds = torch.sort(
        batch_preds, dim=1,
        descending=True)  # sort documents according to the predicted relevance
    batch_stds_sorted_via_preds = torch.gather(
        batch_stds, dim=1, index=batch_preds_sorted_inds
    )  # reorder batch_stds correspondingly so as to make it consistent. BTW, batch_stds[batch_preds_sorted_inds] only works with 1-D tensor

    batch_std_diffs = torch.unsqueeze(
        batch_stds_sorted_via_preds, dim=2) - torch.unsqueeze(
            batch_stds_sorted_via_preds,
            dim=1)  # standard pairwise differences, i.e., S_{ij}
    batch_std_Sij = torch.clamp(batch_std_diffs, min=-1.0,
                                max=1.0)  # ensuring S_{ij} \in {-1, 0, 1}

    batch_pred_s_ij = torch.unsqueeze(
        batch_preds_sorted, dim=2) - torch.unsqueeze(
            batch_preds_sorted,
            dim=1)  # computing pairwise differences, i.e., s_i - s_j

    batch_delta_ndcg = get_delta_ndcg(batch_stds, batch_stds_sorted_via_preds)

    batch_loss = torch.sum(sigma * (F.softplus(batch_pred_s_ij, beta=sigma) -
                                    batch_std_Sij * batch_pred_s_ij) *
                           batch_delta_ndcg)

    return batch_loss
Example #3
0
    def inner_train(self, batch_preds, batch_stds, **kwargs):
        '''
        :param batch_preds: [batch, ranking_size] each row represents the relevance predictions for documents within a ltr_adhoc
        :param batch_stds:  [batch, ranking_size] each row represents the standard relevance grades for documents within a ltr_adhoc
        '''
        label_type = kwargs['label_type']
        assert LABEL_TYPE.MultiLabel == label_type
        assert 'presort' in kwargs and kwargs['presort'] is True  # aiming for direct usage of ideal ranking

        batch_preds_sorted, batch_preds_sorted_inds = torch.sort(batch_preds, dim=1, descending=True)  # sort documents according to the predicted relevance
        batch_stds_sorted_via_preds = torch.gather(batch_stds, dim=1, index=batch_preds_sorted_inds)  # reorder batch_stds correspondingly so as to make it consistent. BTW, batch_stds[batch_preds_sorted_inds] only works with 1-D tensor

        batch_std_diffs = torch.unsqueeze(batch_stds_sorted_via_preds, dim=2) - torch.unsqueeze(batch_stds_sorted_via_preds, dim=1)  # standard pairwise differences, i.e., S_{ij}
        batch_std_Sij = torch.clamp(batch_std_diffs, min=-1.0, max=1.0)  # ensuring S_{ij} \in {-1, 0, 1}
        batch_std_p_ij = 0.5 * (1.0 + batch_std_Sij)

        batch_s_ij = torch.unsqueeze(batch_preds_sorted, dim=2) - torch.unsqueeze(batch_preds_sorted, dim=1)  # computing pairwise differences, i.e., s_i - s_j
        batch_p_ij = 1.0 / (torch.exp(-self.sigma * batch_s_ij) + 1.0)

        batch_delta_ndcg = get_delta_ndcg(batch_ideally_sorted_stds=batch_stds, batch_stds_sorted_via_preds=batch_stds_sorted_via_preds, label_type=label_type, gpu=self.gpu)

        # about reduction, mean leads to poor performance, a probable reason is that the small values due to * lambda_weight * mean
        batch_loss = F.binary_cross_entropy(input=torch.triu(batch_p_ij, diagonal=1),
                                            target=torch.triu(batch_std_p_ij, diagonal=1),
                                            weight=torch.triu(batch_delta_ndcg, diagonal=1), reduction='sum')
        self.optimizer.zero_grad()
        batch_loss.backward()
        self.optimizer.step()

        return batch_loss
Example #4
0
    def custom_loss_function(self, batch_preds, batch_std_labels, **kwargs):
        '''
        @param batch_preds: [batch, ranking_size] each row represents the relevance predictions for documents associated with the same query
        @param batch_std_labels: [batch, ranking_size] each row represents the standard relevance grades for documents associated with the same query
        @param kwargs:
        @return:
        '''
        assert 'label_type' in kwargs and LABEL_TYPE.MultiLabel == kwargs[
            'label_type']
        label_type = kwargs['label_type']
        assert 'presort' in kwargs and kwargs[
            'presort'] is True  # aiming for direct usage of ideal ranking

        # sort documents according to the predicted relevance
        batch_descending_preds, batch_pred_desc_inds = torch.sort(
            batch_preds, dim=1, descending=True)
        # reorder batch_stds correspondingly so as to make it consistent.
        # BTW, batch_stds[batch_preds_sorted_inds] only works with 1-D tensor
        batch_predict_rankings = torch.gather(batch_std_labels,
                                              dim=1,
                                              index=batch_pred_desc_inds)

        batch_p_ij, batch_std_p_ij = get_pairwise_comp_probs(
            batch_preds=batch_descending_preds,
            batch_std_labels=batch_predict_rankings,
            sigma=self.sigma)

        batch_delta_ndcg = get_delta_ndcg(
            batch_ideal_rankings=batch_std_labels,
            batch_predict_rankings=batch_predict_rankings,
            label_type=label_type,
            device=self.device)

        _batch_loss = F.binary_cross_entropy(input=torch.triu(batch_p_ij,
                                                              diagonal=1),
                                             target=torch.triu(batch_std_p_ij,
                                                               diagonal=1),
                                             weight=torch.triu(
                                                 batch_delta_ndcg, diagonal=1),
                                             reduction='none')

        batch_loss = torch.sum(torch.sum(_batch_loss, dim=(2, 1)))

        self.optimizer.zero_grad()
        batch_loss.backward()
        self.optimizer.step()

        return batch_loss
Example #5
0
    def forward(ctx, batch_preds, batch_stds, sigma, gpu):
        batch_preds_sorted, batch_preds_sorted_inds = torch.sort(
            batch_preds, dim=1, descending=True
        )  # sort documents according to the predicted relevance
        batch_stds_sorted_via_preds = torch.gather(
            batch_stds, dim=1, index=batch_preds_sorted_inds
        )  # reorder batch_stds correspondingly so as to make it consistent. BTW, batch_stds[batch_preds_sorted_inds] only works with 1-D tensor

        batch_std_diffs = torch.unsqueeze(
            batch_stds_sorted_via_preds, dim=2) - torch.unsqueeze(
                batch_stds_sorted_via_preds,
                dim=1)  # standard pairwise differences, i.e., S_{ij}
        batch_std_Sij = torch.clamp(batch_std_diffs, min=-1.0,
                                    max=1.0)  # ensuring S_{ij} \in {-1, 0, 1}

        batch_pred_s_ij = torch.unsqueeze(
            batch_preds_sorted, dim=2) - torch.unsqueeze(
                batch_preds_sorted,
                dim=1)  # computing pairwise differences, i.e., s_i - s_j

        batch_delta_ndcg = get_delta_ndcg(batch_stds,
                                          batch_stds_sorted_via_preds)

        batch_loss_1st = 0.5 * sigma * batch_pred_s_ij * (
            1.0 - batch_std_Sij)  # cf. the 1st equation in page-3
        batch_loss_2nd = log_1_add_exp_minus_sigma(
            batch_pred_s_ij, sigma=sigma,
            gpu=gpu)  # cf. the 1st equation in page-3

        batch_loss = torch.sum(
            (batch_loss_1st + batch_loss_2nd) * batch_delta_ndcg * 0.5
        )  # weighting with delta-nDCG, '0.5' is multiplied due to the symmetric property

        #- gradient -#
        batch_grad = sigma * (0.5 *
                              (1 - batch_std_Sij) - reciprocal_1_add_exp_sigma(
                                  batch_pred_s_ij, sigma=sigma, gpu=gpu))

        batch_grad = batch_grad * batch_delta_ndcg
        batch_grad = torch.sum(
            batch_grad, dim=1, keepdim=True
        )  # relying on the symmetric property, i-th row-sum corresponding to the cumulative gradient w.r.t. i-th document.
        ctx.save_for_backward(batch_grad)

        return batch_loss
Example #6
0
def lambdaRank_loss_full(batch_preds=None,
                         batch_stds=None,
                         sigma=None,
                         label_type=None,
                         gpu=False):
    '''
    Instead of strictly getting the uppper diagonal entries, here we compute the lambdaloss by fully making use of the properties as follows:
    (1) using the full pairwise difference matrix is twice the loss of using merely the uppper diagonal entries
    (2) for ties, the delta nDCG will be zero, thus no need to explicitly remove pairs of ties
    '''
    batch_preds_sorted, batch_preds_sorted_inds = torch.sort(
        batch_preds, dim=1,
        descending=True)  # sort documents according to the predicted relevance
    batch_stds_sorted_via_preds = torch.gather(
        batch_stds, dim=1, index=batch_preds_sorted_inds
    )  # reorder batch_stds correspondingly so as to make it consistent. BTW, batch_stds[batch_preds_sorted_inds] only works with 1-D tensor

    batch_std_diffs = torch.unsqueeze(
        batch_stds_sorted_via_preds, dim=2) - torch.unsqueeze(
            batch_stds_sorted_via_preds,
            dim=1)  # standard pairwise differences, i.e., S_{ij}
    batch_std_Sij = torch.clamp(batch_std_diffs, min=-1.0,
                                max=1.0)  # ensuring S_{ij} \in {-1, 0, 1}

    batch_pred_s_ij = torch.unsqueeze(
        batch_preds_sorted, dim=2) - torch.unsqueeze(
            batch_preds_sorted,
            dim=1)  # computing pairwise differences, i.e., s_i - s_j

    batch_delta_ndcg = get_delta_ndcg(batch_stds,
                                      batch_stds_sorted_via_preds,
                                      label_type=label_type,
                                      gpu=gpu)

    batch_loss_1st = 0.5 * sigma * batch_pred_s_ij * (
        1.0 - batch_std_Sij)  # cf. the 1st equation in page-3
    batch_loss_2nd = torch.log(torch.exp(-sigma * batch_pred_s_ij) +
                               1.0)  # cf. the 1st equation in page-3

    # the coefficient of 0.5 is added due to all pairs are used
    batch_loss = torch.sum(0.5 * (batch_loss_1st + batch_loss_2nd) *
                           batch_delta_ndcg)  # weighting with delta-nDCG

    return batch_loss