Beispiel #1
0
 def __call__(self, logits, targets, reduce=True):
     pos_similarity, neg_similarity, batch_size = self.get_similarities(logits)
     targets_local = FloatTensor(batch_size)
     targets_local.fill_(1)  # 1: pos_similarity should be higher than neg_similarity
     return F.margin_ranking_loss(
         pos_similarity, neg_similarity, targets_local, self.config.margin
     )
Beispiel #2
0
    def __init__(self, config, weights=None, *args, **kwargs):
        """Args:
            config: Config containing `precision_range_lower`, `precision_range_upper`,
                `num_classes`, `num_anchors`
        """
        nn.Module.__init__(self)
        Loss.__init__(self, config)

        self.num_classes = self.config.num_classes
        self.num_anchors = self.config.num_anchors
        self.precision_range = (
            self.config.precision_range_lower,
            self.config.precision_range_upper,
        )

        # Create precision anchor values and distance between anchors.
        # coresponding to [alpha_t] and [delta_t] in the paper.
        # precision_values: 1D `Tensor` of shape [K], where `K = num_anchors`
        # delta: Scalar (since we use equal distance between anchors)
        self.precision_values, self.delta = loss_utils.range_to_anchors_and_delta(
            self.precision_range, self.num_anchors)

        # notation is [b_k] in paper, Parameter of shape [C, K]
        # where `C = number of classes` `K = num_anchors`
        self.biases = nn.Parameter(
            FloatTensor(self.config.num_classes,
                        self.config.num_anchors).zero_())
        self.lambdas = nn.Parameter(
            FloatTensor(self.config.num_classes,
                        self.config.num_anchors).data.fill_(1.0))
Beispiel #3
0
 def __call__(self, logits, targets, reduce=True):
     """
     Computes Kullback-Leibler divergence loss for multiclass classification
     probability distribution computed by BinaryCrossEntropyLoss loss
     """
     hard_targets, soft_targets = targets
     # we clamp the probability between (1e-20, 1 - 1e-20) to avoid log(0) problem
     # in the calculation of KLDivergence
     soft_targets = FloatTensor(soft_targets).exp().clamp(1e-20, 1 - 1e-20)
     probs = F.sigmoid(logits).clamp(1e-20, 1 - 1e-20)
     probs_neg = probs.neg().add(1).clamp(1e-20, 1 - 1e-20)
     soft_targets_neg = soft_targets.neg().add(1).clamp(1e-20, 1 - 1e-20)
     if self.weight is not None:
         loss = (
             F.kl_div(probs.log(), soft_targets, reduction="none") *
             self.weight +
             F.kl_div(probs_neg.log(), soft_targets_neg, reduction="none") *
             self.weight)
         if reduce:
             loss = loss.mean()
     else:
         loss = F.kl_div(probs.log(),
                         soft_targets,
                         reduction="mean" if reduce else "none") + F.kl_div(
                             probs_neg.log(),
                             soft_targets_neg,
                             reduction="mean" if reduce else "none",
                         )
     return loss
Beispiel #4
0
 def __init__(self, lstm: nn.LSTM):
     """
     Shapes:
         initial_state: (lstm_layers, 1, lstm_hidden_dim) each
     """
     self.lstm = lstm
     initial_state = (
         FloatTensor(lstm.num_layers, 1, lstm.hidden_size).fill_(0),
         FloatTensor(lstm.num_layers, 1, lstm.hidden_size).fill_(0),
     )
     # Stack of (state, (embedding, element))
     self.stack = [(initial_state, (self._lstm_output(initial_state),
                                    Element("Root")))]
Beispiel #5
0
    def __call__(self, m_out, targets, reduce=True):
        """
        Computes 1-vs-all binary cross entropy loss for multiclass
        classification.
        """
        # Converts targets to one-hot representation. Dim: [batch, n_classes]
        one_hot_targets = (FloatTensor(targets.size(0),
                                       m_out.size(1)).zero_().scatter_(
                                           1,
                                           targets.unsqueeze(1).data, 1))

        # This weighting applies uniform class weights.
        # examples_per_class = one_hot_target.sum(0).clamp(min=1)
        # total_positive = examples_per_class.sum()
        # weights = total_positive.unsqueeze(0) / examples_per_class

        loss = F.binary_cross_entropy_with_logits(m_out,
                                                  one_hot_targets,
                                                  reduction="none")

        if self.config.reweight_negative:
            # This makes sure we have same weights for all negative classes and
            # single positive class. Weight is 1 for the correct class and
            # 1 / (n - 1) for other ones.
            weights = one_hot_targets + (1.0 - one_hot_targets) / max(
                1,
                one_hot_targets.size(1) - 1.0)
            loss = loss * weights

        return loss.sum(1).mean() if reduce else loss.sum(1)
Beispiel #6
0
    def __call__(self, logits, targets, reduce=True):
        """
        Computes soft and hard loss for knowledge distillation
        """
        hard_targets, _, _ = targets

        # hard targets
        one_hot_targets = (
            FloatTensor(hard_targets.size(0), logits.size(1))
            .zero_()
            .scatter_(1, hard_targets.unsqueeze(1).data, 1)
        )

        prob_loss = KLDivergenceBCELoss(self.config, weight=self.weight)
        if self.weight is not None:
            hard_loss = (
                F.binary_cross_entropy_with_logits(
                    logits, one_hot_targets, reduction="none"
                )
                * self.weight
            )
            if reduce:
                hard_loss = hard_loss.mean()
        else:
            hard_loss = F.binary_cross_entropy_with_logits(
                logits, one_hot_targets, reduction="mean" if reduce else "none"
            )
        return self.t * self.t * prob_loss(logits, targets, reduce=reduce) + hard_loss
Beispiel #7
0
def range_to_anchors_and_delta(precision_range, num_anchors):
    """Calculates anchor points from precision range.
      Args:
        precision_range: an interval (a, b), where 0.0 <= a <= b <= 1.0
        num_anchors: int, number of equally spaced anchor points.
      Returns:
        precision_values: A `Tensor` of [num_anchors] equally spaced values
          in the interval precision_range.
        delta: The spacing between the values in precision_values.
      Raises:
        ValueError: If precision_range is invalid.
    """
    # Validate precision_range.
    if len(precision_range) != 2:
        raise ValueError("length of precision_range (%d) must be 2" %
                         len(precision_range))
    if not 0 <= precision_range[0] <= precision_range[1] <= 1:
        raise ValueError("precision values must follow 0 <= %f <= %f <= 1" %
                         (precision_range[0], precision_range[1]))

    # Sets precision_values uniformly between min_precision and max_precision.
    precision_values = numpy.linspace(start=precision_range[0],
                                      stop=precision_range[1],
                                      num=num_anchors + 1)[1:]

    delta = (precision_range[1] - precision_range[0]) / num_anchors
    return FloatTensor(precision_values), delta
Beispiel #8
0
    def __call__(self, logits, targets, reduce=True):
        """
        Computes Kullback-Leibler divergence loss for multiclass classification
        probability distribution computed by CrossEntropyLoss loss
        """
        hard_targets, _, soft_targets_logits = targets
        soft_targets = F.softmax(FloatTensor(soft_targets_logits) / self.t,
                                 dim=1)
        soft_targets = soft_targets.clamp(1e-10, 1 - 1e-10)
        log_probs = F.log_softmax(logits / self.t, 1)

        if self.weight is not None:
            soft_loss = (F.kl_div(log_probs, soft_targets, reduction="none") *
                         self.weight)
            if reduce:
                soft_loss = soft_loss.mean()
        else:
            soft_loss = F.kl_div(log_probs,
                                 soft_targets,
                                 reduction="mean" if reduce else "none")
        soft_loss *= self.t**2  # see https://arxiv.org/pdf/1503.02531.pdf

        hard_loss = 0.0
        if self.hard_weight > 0.0:
            hard_loss = F.cross_entropy(
                logits,
                hard_targets,
                reduction="mean" if reduce else "none",
                weight=self.weight,
            )

        return (1.0 -
                self.hard_weight) * soft_loss + self.hard_weight * hard_loss
Beispiel #9
0
    def _prepare_labels_weights(logits, targets, weights=None):
        """
        Args:
            logits: Variable :math:`(N, C)` where `C = number of classes`
            targets: Variable :math:`(N)` where each value is
                `0 <= targets[i] <= C-1`
            weights: Coefficients for the loss. Must be a `Tensor` of shape
                [N] or [N, C], where `N = batch_size`, `C = number of classes`.
        Returns:
            labels: Tensor of shape [N, C], one-hot representation
            weights: Tensor of shape broadcastable to labels
        """
        N, C = logits.size()
        # Converts targets to one-hot representation. Dim: [N, C]
        labels = FloatTensor(N, C).zero_().scatter(1,
                                                   targets.unsqueeze(1).data,
                                                   1)

        if weights is None:
            weights = FloatTensor(N).data.fill_(1.0)

        if weights.dim() == 1:
            weights.unsqueeze_(-1)

        return labels, weights
Beispiel #10
0
 def __call__(self, logits, targets, reduce=True):
     """
     Computes Kullback-Leibler divergence loss for multiclass classification
     probability distribution computed by CrossEntropyLoss loss
     """
     hard_targets, soft_targets = targets
     soft_targets = FloatTensor(soft_targets).exp().clamp(1e-20, 1 - 1e-20)
     log_probs = F.log_softmax(logits, 1)
     if self.weight is not None:
         loss = F.kl_div(log_probs, soft_targets,
                         reduction="none") * self.weight
         if reduce:
             loss = loss.mean()
     else:
         loss = F.kl_div(log_probs,
                         soft_targets,
                         reduction="mean" if reduce else "none")
     return loss
Beispiel #11
0
 def from_config(cls, config: Config, metadata: FieldMeta):
     label_weights = getattr(metadata, "label_weights", None)
     if label_weights is not None:
         label_weights = FloatTensor(label_weights)
     return cls(metadata.vocab.itos,
                create_loss(config.loss, weight=label_weights), config)