Example #1
0
 def compute_loss(logits, target, seq_idx, length, regularize):
     """
     Compute negative log-likelihood loss for a batch of predictions.
     :param logits: 2d tensor [batch_size x vocab_size]
     :param target: 1d tensor [batch_size]
     :param seq_idx: an integer represents the current index of the sequences
     :param length: 1d tensor [batch_size], represents each sequences' true length
     :param regularize: boolean, whether use entropy regularization in loss computation
     :return: total loss over the input mini-batch [autograd Variable] and number of loss elements
     """
     losses = -torch.gather(logits, dim=1,
                            index=target.unsqueeze(1)).squeeze()
     mask = helper.mask(length, seq_idx)  # mask: batch x 1
     losses = losses * mask.float()
     num_non_zero_elem = torch.nonzero(mask.data).size()
     if regularize:
         regularized_loss = logits.exp().mul(logits).sum(
             1).squeeze() * regularize
         loss = losses.sum() + regularized_loss.sum()
         if not num_non_zero_elem:
             return loss, 0
         else:
             return loss, num_non_zero_elem[0]
     else:
         if not num_non_zero_elem:
             return losses.sum(), 0
         else:
             return losses.sum(), num_non_zero_elem[0]
Example #2
0
 def compute_decoding_loss(logits, target, seq_idx, length):
     losses = -torch.gather(logits, dim=1,
                            index=target.unsqueeze(1)).squeeze()
     mask = helper.mask(length, seq_idx)  # mask: batch x 1
     losses = losses * mask.float()
     num_non_zero_elem = torch.nonzero(mask.data).size()
     if not num_non_zero_elem:
         return losses.sum(), 0
     else:
         return losses.sum(), num_non_zero_elem[0]
 def compute_loss(logits, target, seq_idx, length):
     # logits: batch x vocab_size, target: batch x 1
     losses = -torch.gather(logits, dim=1, index=target.unsqueeze(1))
     # mask: batch x 1
     mask = helper.mask(length, seq_idx)
     losses = losses * mask.float()
     num_non_zero_elem = torch.nonzero(mask.data).size()
     if not num_non_zero_elem:
         loss = losses.sum()
     else:
         loss = losses.sum() / num_non_zero_elem[0]
     return loss
 def compute_loss(logits, target, seq_idx, length, regularization_param=None):
     # logits: batch x vocab_size, target: batch x 1
     losses = -torch.gather(logits, dim=1, index=target.unsqueeze(1))
     # mask: batch x 1
     mask = helper.mask(length, seq_idx)
     losses = losses * mask.float()
     num_non_zero_elem = torch.nonzero(mask.data).size()
     if not num_non_zero_elem:
         loss = losses.sum()
     else:
         loss = losses.sum() / num_non_zero_elem[0]
     if regularization_param:
         regularized_loss = logits.exp().mul(logits).sum(1).squeeze() * regularization_param
         loss += regularized_loss.mean()
     return loss
 def compute_decoding_loss(logits, target, seq_idx, length):
     """
     Compute negative log-likelihood loss for a batch of predictions.
     :param logits: 2d tensor [batch_size x vocab_size]
     :param target: 2d tensor [batch_size x 1]
     :param seq_idx: an integer represents the current index of the sequences
     :param length: 1d tensor [batch_size], represents each sequences' true length
     :return: total loss over the input mini-batch [autograd Variable] and number of loss elements
     """
     losses = -torch.gather(logits, dim=1, index=target.unsqueeze(1))
     mask = helper.mask(length, seq_idx)  # mask: batch x 1
     losses = losses * mask.float()
     num_non_zero_elem = torch.nonzero(mask.data).size()
     if not num_non_zero_elem:
         return losses.sum(), 0
     else:
         return losses.sum(), num_non_zero_elem[0]