def compute_loss(logits, target, seq_idx, length, regularize): """ Compute negative log-likelihood loss for a batch of predictions. :param logits: 2d tensor [batch_size x vocab_size] :param target: 1d tensor [batch_size] :param seq_idx: an integer represents the current index of the sequences :param length: 1d tensor [batch_size], represents each sequences' true length :param regularize: boolean, whether use entropy regularization in loss computation :return: total loss over the input mini-batch [autograd Variable] and number of loss elements """ losses = -torch.gather(logits, dim=1, index=target.unsqueeze(1)).squeeze() mask = helper.mask(length, seq_idx) # mask: batch x 1 losses = losses * mask.float() num_non_zero_elem = torch.nonzero(mask.data).size() if regularize: regularized_loss = logits.exp().mul(logits).sum( 1).squeeze() * regularize loss = losses.sum() + regularized_loss.sum() if not num_non_zero_elem: return loss, 0 else: return loss, num_non_zero_elem[0] else: if not num_non_zero_elem: return losses.sum(), 0 else: return losses.sum(), num_non_zero_elem[0]
def compute_decoding_loss(logits, target, seq_idx, length): losses = -torch.gather(logits, dim=1, index=target.unsqueeze(1)).squeeze() mask = helper.mask(length, seq_idx) # mask: batch x 1 losses = losses * mask.float() num_non_zero_elem = torch.nonzero(mask.data).size() if not num_non_zero_elem: return losses.sum(), 0 else: return losses.sum(), num_non_zero_elem[0]
def compute_loss(logits, target, seq_idx, length): # logits: batch x vocab_size, target: batch x 1 losses = -torch.gather(logits, dim=1, index=target.unsqueeze(1)) # mask: batch x 1 mask = helper.mask(length, seq_idx) losses = losses * mask.float() num_non_zero_elem = torch.nonzero(mask.data).size() if not num_non_zero_elem: loss = losses.sum() else: loss = losses.sum() / num_non_zero_elem[0] return loss
def compute_loss(logits, target, seq_idx, length, regularization_param=None): # logits: batch x vocab_size, target: batch x 1 losses = -torch.gather(logits, dim=1, index=target.unsqueeze(1)) # mask: batch x 1 mask = helper.mask(length, seq_idx) losses = losses * mask.float() num_non_zero_elem = torch.nonzero(mask.data).size() if not num_non_zero_elem: loss = losses.sum() else: loss = losses.sum() / num_non_zero_elem[0] if regularization_param: regularized_loss = logits.exp().mul(logits).sum(1).squeeze() * regularization_param loss += regularized_loss.mean() return loss
def compute_decoding_loss(logits, target, seq_idx, length): """ Compute negative log-likelihood loss for a batch of predictions. :param logits: 2d tensor [batch_size x vocab_size] :param target: 2d tensor [batch_size x 1] :param seq_idx: an integer represents the current index of the sequences :param length: 1d tensor [batch_size], represents each sequences' true length :return: total loss over the input mini-batch [autograd Variable] and number of loss elements """ losses = -torch.gather(logits, dim=1, index=target.unsqueeze(1)) mask = helper.mask(length, seq_idx) # mask: batch x 1 losses = losses * mask.float() num_non_zero_elem = torch.nonzero(mask.data).size() if not num_non_zero_elem: return losses.sum(), 0 else: return losses.sum(), num_non_zero_elem[0]