def softmax_cross_entropy_with_label_smoothing(pred, label, label_smoothing=0.1): ''' Defines softmax activation followed by Cross entropy loss and label smoothing. Label smoothing loss is added by the following weight: `(1 - label_smoothing) * xent_loss + label_smoothing * label_smoothing_loss` Args: pred (Variable): Logits with a shape of `(batch_size, num_classes)`. label (Variable): A class index for each example if a shape of `(batch_size, 1`) is given, and a one-hot or probability over classes if `(batch_size, num_classes)`. label_smoothing (float): Coefficient of label smoothing loss. If 0, it omits label smoothing. ''' logp = None if label.shape[1] > 1: # If mixup is enabled, we suppose the label shape is (batch_size, num_class) logp = F.log_softmax(pred) l = F.sum(-label * logp, axis=1, keepdims=True) else: l = F.softmax_cross_entropy(pred, label) return apply_label_smoothing(l, pred, label_smoothing, logp)
def apply_label_smoothing(xent, pred, label_smoothing, logp=None): if label_smoothing <= 0: return xent if logp is None: logp = F.log_softmax(pred) return (1 - label_smoothing) * xent - label_smoothing * F.mean( logp, axis=1, keepdims=True)
def loss_function(pred, y_output, N): # calculate cross entropy loss Phi = F.sum(-y_output * F.log_softmax(pred, axis=1)) # calculate l2 norm of affine layer l2 = 0 for param in nn.get_parameters().values(): l2 += F.sum(param**2) loss = l2 * args.lmbd + Phi / N return loss, Phi, l2
def test_obsolete_inplace_option(inplace, func, num_inputs): ''' This test confirms the construction of graph. Since F.log_softmax requires output for backward calculation, graph cannot be constructed if it is inplaced. ''' x0 = nn.Variable((2, 3, 4, 5), need_grad=True) x1 = nn.Variable((2, 3, 4, 5), need_grad=True) if num_inputs == 1: y = F.identity(x0) y = F.log_softmax(y) y = func(y, inplace=inplace) y.forward() y.backward() elif num_inputs == 2: y0 = F.identity(x0) y1 = F.identity(x1) y0 = F.log_softmax(y0) y1 = F.log_softmax(y1) y = func(y0, y1, inplace=inplace) y.forward() y.backward()
def label_smoothing_loss(pred, label, label_smoothing=0.1): loss = F.softmax_cross_entropy(pred, label) if label_smoothing <= 0: return loss return (1 - label_smoothing) * loss - label_smoothing \ * F.mean(F.log_softmax(pred), axis=1, keepdims=True)
def softmax_cross_entropy_loss_vlabel(pred, vlabel): # The shape of vlabel is supposed to be (batch_size, n_class) logp = F.log_softmax(pred) loss = -1.0 * F.mean(F.sum(vlabel * logp, axis=1)) return loss
def loss_function(pred, label, label_smoothing=0.1): l = F.softmax_cross_entropy(pred, label) if label_smoothing <= 0: return l return (1 - label_smoothing) * l - label_smoothing * F.mean( F.log_softmax(pred), axis=1, keepdims=True)