Exemple #1
0
    def forward(self, p, q, pad_mask=None):
        """
        Args:
            p(obj:`Tensor`): the first forward logits of training examples.
            q(obj:`Tensor`): the second forward logits of training examples.
            pad_mask(obj:`Tensor`, optional): The Tensor containing the binary mask to index with, it's data type is bool.

        Returns:
            loss(obj:`Tensor`): the rdrop loss of p and q
        """
        p_loss = F.kl_div(F.log_softmax(p, axis=-1),
                          F.softmax(q, axis=-1),
                          reduction=self.reduction)
        q_loss = F.kl_div(F.log_softmax(q, axis=-1),
                          F.softmax(p, axis=-1),
                          reduction=self.reduction)

        # pad_mask is for seq-level tasks
        if pad_mask is not None:
            p_loss = paddle.masked_select(p_loss, pad_mask)
            q_loss = paddle.masked_select(q_loss, pad_mask)

        # You can choose whether to use function "sum" and "mean" depending on your task
        p_loss = p_loss.sum()
        q_loss = q_loss.sum()
        loss = (p_loss + q_loss) / 2
        return loss
Exemple #2
0
    def forward(self, out1, out2):
        if self.act is not None:
            out1 = self.act(out1)
            out2 = self.act(out2)

        log_out1 = paddle.log(out1)
        log_out2 = paddle.log(out2)
        loss = (F.kl_div(log_out1, out2, reduction='batchmean') +
                F.kl_div(log_out2, out1, reduction='batchmean')) / 2.0
        return {"DMLLoss": loss}
Exemple #3
0
 def forward(self, out1, out2):
     if self.act is not None:
         out1 = self.act(out1)
         out2 = self.act(out2)
     if len(out1.shape) < 2:
         log_out1 = paddle.log(out1)
         log_out2 = paddle.log(out2)
         loss = (F.kl_div(log_out1, out2, reduction='batchmean') +
                 F.kl_div(log_out2, out1, reduction='batchmean')) / 2.0
     else:
         loss = self.jskl_loss(out1, out2)
     return loss
Exemple #4
0
 def forward(self, out1, out2):
     if self.act is not None:
         out1 = self.act(out1)
         out2 = self.act(out2)
     if self.use_log:
         # for recognition distillation, log is needed for feature map
         log_out1 = paddle.log(out1)
         log_out2 = paddle.log(out2)
         loss = (F.kl_div(
             log_out1, out2, reduction='batchmean') + F.kl_div(
                 log_out2, out1, reduction='batchmean')) / 2.0
     else:
         # for detection distillation log is not needed
         loss = self.jskl_loss(out1, out2)
     return loss
Exemple #5
0
 def label_smooth_loss(self, X, target, smooth_value=0.1):
     """label smooth loss"""
     if self.training:
         logits = paddle.log_softmax(X, axis=1)
         size = X.size()[1]
         one_hot = paddle.full(X.size(),
                               smooth_value / (size - 1)).to(X.device)
         one_hot.scatter_(1, target.unsqueeze(0), 1 - smooth_value)
         loss = F.kl_div(logits, one_hot, reduction="batchmean")
         return loss.unsqueeze(0)
     else:
         return paddle.nn.functional.cross_entropy(X,
                                                   target,
                                                   reduction="none")
Exemple #6
0
    def forward(self, input, label):
        """
        Args:
            input(Tensor): The input tensor.
            label(Tensor): The label tensor. The shape of label is the same as input.
        Returns:
            Tensor: The kl loss.
        """
        assert input.shape == label.shape, \
            "The shape of label should be the same as input."

        if self.act is not None:
            input = self.act(input)
            label = self.act(label)
        log_input = paddle.log(input)

        loss = F.kl_div(log_input, label, reduction=self.reduction)
        return loss
Exemple #7
0
def KL(pred, target):
    pred = F.log_softmax(pred)
    target = F.softmax(target)
    loss = F.kl_div(pred, target)
    return loss