Example #1
0
 def __call__(self, logits, targets, reduce=True):
     pos_similarity, neg_similarity, batch_size = self.get_similarities(logits)
     targets_local = FloatTensor(batch_size)
     targets_local.fill_(1)  # 1: pos_similarity should be higher than neg_similarity
     return F.margin_ranking_loss(
         pos_similarity, neg_similarity, targets_local, self.config.margin
     )
Example #2
0
    def __init__(self, config, weights=None, *args, **kwargs):
        """Args:
            config: Config containing `precision_range_lower`, `precision_range_upper`,
                `num_classes`, `num_anchors`
        """
        nn.Module.__init__(self)
        Loss.__init__(self, config)

        self.num_classes = self.config.num_classes
        self.num_anchors = self.config.num_anchors
        self.precision_range = (
            self.config.precision_range_lower,
            self.config.precision_range_upper,
        )

        # Create precision anchor values and distance between anchors.
        # coresponding to [alpha_t] and [delta_t] in the paper.
        # precision_values: 1D `Tensor` of shape [K], where `K = num_anchors`
        # delta: Scalar (since we use equal distance between anchors)
        self.precision_values, self.delta = loss_utils.range_to_anchors_and_delta(
            self.precision_range, self.num_anchors
        )

        # notation is [b_k] in paper, Parameter of shape [C, K]
        # where `C = number of classes` `K = num_anchors`
        self.biases = nn.Parameter(
            FloatTensor(self.config.num_classes, self.config.num_anchors).zero_()
        )
        self.lambdas = nn.Parameter(
            FloatTensor(self.config.num_classes, self.config.num_anchors).data.fill_(
                1.0
            )
        )
Example #3
0
 def __init__(self, lstm: nn.LSTM):
     """
     Shapes:
         initial_state: (lstm_layers, 1, lstm_hidden_dim) each
     """
     self.lstm = lstm
     initial_state = (
         FloatTensor(lstm.num_layers, 1, lstm.hidden_size).fill_(0),
         FloatTensor(lstm.num_layers, 1, lstm.hidden_size).fill_(0),
     )
     # Stack of (state, (embedding, element))
     self.stack = [(initial_state, (self._lstm_output(initial_state),
                                    Element("Root")))]
Example #4
0
    def __call__(self, logits, targets, reduce=True):
        """
        Computes Kullback-Leibler divergence loss for multiclass classification
        probability distribution computed by BinaryCrossEntropyLoss loss
        """
        hard_targets, _, soft_targets_logits = targets
        # we clamp the probability between (1e-20, 1 - 1e-20) to avoid log(0) problem
        # in the calculation of KLDivergence
        soft_targets = F.sigmoid(FloatTensor(soft_targets_logits) /
                                 self.t).clamp(1e-20, 1 - 1e-20)
        probs = F.sigmoid(logits / self.t).clamp(1e-20, 1 - 1e-20)
        probs_neg = probs.neg().add(1).clamp(1e-20, 1 - 1e-20)
        soft_targets_neg = soft_targets.neg().add(1).clamp(1e-20, 1 - 1e-20)
        if self.weight is not None:
            soft_loss = (
                F.kl_div(probs.log(), soft_targets, reduction="none") *
                self.weight +
                F.kl_div(probs_neg.log(), soft_targets_neg, reduction="none") *
                self.weight)
            if reduce:
                soft_loss = soft_loss.mean()
        else:
            soft_loss = F.kl_div(
                probs.log(),
                soft_targets,
                reduction="mean" if reduce else "none") + F.kl_div(
                    probs_neg.log(),
                    soft_targets_neg,
                    reduction="mean" if reduce else "none",
                )
        soft_loss *= self.t**2  # see https://arxiv.org/pdf/1503.02531.pdf

        hard_loss = 0.0
        if self.hard_weight > 0.0:
            one_hot_targets = (FloatTensor(hard_targets.size(0),
                                           logits.size(1)).zero_().scatter_(
                                               1,
                                               hard_targets.unsqueeze(1).data,
                                               1))
            hard_loss = F.binary_cross_entropy_with_logits(
                logits,
                one_hot_targets,
                reduction="mean" if reduce else "none",
                weight=self.weight,
            )

        return (1.0 -
                self.hard_weight) * soft_loss + self.hard_weight * hard_loss
Example #5
0
    def __call__(self, m_out, targets, reduce=True):
        """
        Computes 1-vs-all binary cross entropy loss for multiclass
        classification.
        """
        # Converts targets to one-hot representation. Dim: [batch, n_classes]
        one_hot_targets = (
            FloatTensor(targets.size(0), m_out.size(1))
            .zero_()
            .scatter_(1, targets.unsqueeze(1).data, 1)
        )

        """
        `F.binary_cross_entropy` or `torch.nn.BCELoss.` requires the
        output of the previous function be already a FloatTensor.
        """
        # This weighting applies uniform class weights.
        # examples_per_class = one_hot_target.sum(0).clamp(min=1)
        # total_positive = examples_per_class.sum()
        # weights = total_positive.unsqueeze(0) / examples_per_class

        loss = F.binary_cross_entropy_with_logits(
            precision.maybe_float(m_out), one_hot_targets, reduction="none"
        )

        if self.config.reweight_negative:
            # This makes sure we have same weights for all negative classes and
            # single positive class. Weight is 1 for the correct class and
            # 1 / (n - 1) for other ones.
            weights = one_hot_targets + (1.0 - one_hot_targets) / max(
                1, one_hot_targets.size(1) - 1.0
            )
            loss = loss * weights

        return loss.sum(1).mean() if reduce else loss.sum(1)
Example #6
0
    def __call__(self, logits, targets, reduce=True):
        """
        Computes soft and hard loss for knowledge distillation
        """
        hard_targets, _, _ = targets

        # hard targets
        one_hot_targets = (
            FloatTensor(hard_targets.size(0), logits.size(1))
            .zero_()
            .scatter_(1, hard_targets.unsqueeze(1).data, 1)
        )

        prob_loss = KLDivergenceBCELoss(self.config, weight=self.weight)
        if self.weight is not None:
            hard_loss = (
                F.binary_cross_entropy_with_logits(
                    logits, one_hot_targets, reduction="none"
                )
                * self.weight
            )
            if reduce:
                hard_loss = hard_loss.mean()
        else:
            hard_loss = F.binary_cross_entropy_with_logits(
                logits, one_hot_targets, reduction="mean" if reduce else "none"
            )
        return self.t * self.t * prob_loss(logits, targets, reduce=reduce) + hard_loss
Example #7
0
    def __call__(self, logits, targets, reduce=True):
        """
        Computes Kullback-Leibler divergence loss for multiclass classification
        probability distribution computed by CrossEntropyLoss loss
        """
        hard_targets, _, soft_targets_logits = targets
        soft_targets = F.softmax(FloatTensor(soft_targets_logits) / self.t, dim=1)
        soft_targets = soft_targets.clamp(1e-10, 1 - 1e-10)
        log_probs = F.log_softmax(logits / self.t, 1)

        if self.weight is not None:
            soft_loss = (
                F.kl_div(log_probs, soft_targets, reduction="none") * self.weight
            )
            if reduce:
                soft_loss = soft_loss.mean()
        else:
            soft_loss = F.kl_div(
                log_probs, soft_targets, reduction="mean" if reduce else "none"
            )
        soft_loss *= self.t ** 2  # see https://arxiv.org/pdf/1503.02531.pdf

        hard_loss = 0.0
        if self.hard_weight > 0.0:
            hard_loss = F.cross_entropy(
                logits,
                hard_targets,
                reduction="mean" if reduce else "none",
                weight=self.weight,
            )

        return (1.0 - self.hard_weight) * soft_loss + self.hard_weight * hard_loss
Example #8
0
 def __call__(self, logits, targets, reduce=True):
     """
     Computes Kullback-Leibler divergence loss for multiclass classification
     probability distribution computed by BinaryCrossEntropyLoss loss
     """
     hard_targets, _, soft_targets_logits = targets
     # we clamp the probability between (1e-20, 1 - 1e-20) to avoid log(0) problem
     # in the calculation of KLDivergence
     soft_targets = F.sigmoid(FloatTensor(soft_targets_logits) / self.t).clamp(
         1e-20, 1 - 1e-20
     )
     probs = F.sigmoid(logits / self.t).clamp(1e-20, 1 - 1e-20)
     probs_neg = probs.neg().add(1).clamp(1e-20, 1 - 1e-20)
     soft_targets_neg = soft_targets.neg().add(1).clamp(1e-20, 1 - 1e-20)
     if self.weight is not None:
         loss = (
             F.kl_div(probs.log(), soft_targets, reduction="none") * self.weight
             + F.kl_div(probs_neg.log(), soft_targets_neg, reduction="none")
             * self.weight
         )
         if reduce:
             loss = loss.mean()
     else:
         loss = F.kl_div(
             probs.log(), soft_targets, reduction="mean" if reduce else "none"
         ) + F.kl_div(
             probs_neg.log(),
             soft_targets_neg,
             reduction="mean" if reduce else "none",
         )
     return loss
Example #9
0
    def __call__(self, logits, targets, reduce=True):
        """
        Computes Kullback-Leibler divergence loss for multiclass classification
        probability distribution computed by CrossEntropyLoss loss
        """
        hard_targets, _, soft_targets_logits = targets
        soft_targets = F.softmax(FloatTensor(soft_targets_logits) / self.t,
                                 dim=1)
        soft_targets = soft_targets.clamp(1e-10, 1 - 1e-10)
        log_probs = F.log_softmax(logits / self.t, 1)
        soft_loss = F.kl_div(log_probs, soft_targets, reduction="none")
        if self.weight is not None:
            soft_loss = soft_loss * self.weight
        if reduce:
            soft_loss = soft_loss.mean()
        else:
            # soft_loss dim is batch_size * num_labels, while hard_loss is just
            # batch size, we have to still reduce soft_loss by the labels
            # dimension in order to be able to add the two losses.
            soft_loss = soft_loss.mean(1)

        soft_loss *= self.t**2  # see https://arxiv.org/pdf/1503.02531.pdf
        hard_loss = 0.0
        if self.hard_weight > 0.0:
            hard_loss = F.cross_entropy(
                logits,
                hard_targets,
                reduction="mean" if reduce else "none",
                weight=self.weight,
            )

        return (1.0 -
                self.hard_weight) * soft_loss + self.hard_weight * hard_loss
 def from_config(cls, config: Config, metadata: FieldMeta):
     label_weights = getattr(metadata, "label_weights", None)
     if label_weights is not None:
         label_weights = FloatTensor(label_weights)
     return cls(
         metadata.vocab.itos, create_loss(config.loss, weight=label_weights), config
     )
 def from_config(cls, config: Config, metadata: FieldMeta = None, labels=None):
     label_weights = getattr(metadata, "label_weights", None)
     if label_weights is not None:
         label_weights = FloatTensor(label_weights)
     vocab = metadata.vocab.itos if metadata else labels
     loss = create_loss(config.loss, weight=label_weights)
     cls = (
         BinaryClassificationOutputLayer
         if isinstance(loss, BinaryCrossEntropyLoss)
         else MulticlassOutputLayer
     )
     return cls(vocab, create_loss(config.loss, weight=label_weights), config)
Example #12
0
    def _prepare_labels_weights(logits, targets, weights=None):
        """
        Args:
            logits: Variable :math:`(N, C)` where `C = number of classes`
            targets: Variable :math:`(N)` where each value is
                `0 <= targets[i] <= C-1`
            weights: Coefficients for the loss. Must be a `Tensor` of shape
                [N] or [N, C], where `N = batch_size`, `C = number of classes`.
        Returns:
            labels: Tensor of shape [N, C], one-hot representation
            weights: Tensor of shape broadcastable to labels
        """
        N, C = logits.size()
        # Converts targets to one-hot representation. Dim: [N, C]
        labels = FloatTensor(N, C).zero_().scatter(1,
                                                   targets.unsqueeze(1).data,
                                                   1)

        if weights is None:
            weights = FloatTensor(N).data.fill_(1.0)

        if weights.dim() == 1:
            weights.unsqueeze_(-1)

        return labels, weights
Example #13
0
    def __call__(self, m_out, targets, reduce=True):
        """
        Computes multi-label classification loss
        see details in torch.nn.MultiLabelSoftMarginLoss
        """

        num_classes = m_out.size()[1]
        target_labels = targets[0]

        #  each label list is padded by -1 to make every
        # observation example has the same length of list of labels
        #  since -1 is out of the index range
        # add 1 to target_labels temporarily
        tmp_target_labels = target_labels + 1

        #  the idea is similar to one_hot_targets
        #  the following encoding supports multi-label task
        #  need to delete the first-column endoing since
        #  it's for the padded label -1
        n_hot_targets = (
            FloatTensor(target_labels.size(0), num_classes + 1)
            .zero_()
            .scatter_(1, tmp_target_labels, 1)
        )[:, 1:]

        """
        `F.multilabel_soft_margin_loss` or `torch.nn.MultiLabelSoftMarginLoss.`
        requires the
        output of the previous function be already a FloatTensor.
        """

        #  default: equal weight for each class
        #  the losses are averaged over observations for each mini-batch

        loss = F.multilabel_soft_margin_loss(
            precision.maybe_float(m_out), n_hot_targets, reduction="mean"
        )

        return loss