def test_softmax_focal_loss(num_datum, num_classes, alpha, gamma, data, grad, target_type): scores = data.draw( hnp.arrays(shape=(num_datum, num_classes), dtype=float, elements=st.floats(1, 100))) assume((abs(scores.sum(axis=1)) > 0.001).all()) scores_mygrad = Tensor(scores) scores_nn = Tensor(scores) truth = np.zeros((num_datum, num_classes)) targets = data.draw( st.tuples(*(st.integers(0, num_classes - 1) for i in range(num_datum)))) truth[range(num_datum), targets] = 1 targets = target_type(targets) probs = softmax(scores_mygrad) mygrad_focal_loss = sum( truth * (-alpha * (1 - probs + 1e-14)**gamma * log(probs))) / num_datum mygrad_focal_loss.backward(grad) nn_loss = softmax_focal_loss(scores_nn, targets, alpha=alpha, gamma=gamma).mean() nn_loss.backward(grad) assert isinstance(nn_loss, Tensor) and nn_loss.ndim == 0 assert_allclose(nn_loss.data, mygrad_focal_loss.data, atol=1e-4, rtol=1e-4) assert_allclose(scores_nn.grad, scores_mygrad.grad, atol=1e-4, rtol=1e-4) nn_loss.null_gradients() assert scores_nn.grad is None
def focal_loss(scores, targets, *, alpha=1, gamma=0): """ Return the focal loss. Parameters ---------- scores : mygrad.Tensor, shape=(N, C) The C class scores for each of the N pieces of data. targets : Sequence[int], shape=(N,) The correct class indices, in [0, C), for each datum. alpha : Real, optional (default=1) The ɑ weighting factor in the loss formulation. gamma : Real, optional (default=0) The ɣ focusing parameter. Note that for Ɣ=0 and ɑ=1, this is cross-entropy loss. Returns ------- mygrad.Tensor The average focal loss. Notes ----- This function does not perform a softmax before computing the loss. If you ned to take the softmax before computing the loss, see :class:`SoftmaxFocalLoss` instead. """ if isinstance(targets, Tensor): targets = targets.data label_locs = (range(len(targets)), targets) pc = scores[label_locs] return -mean(alpha * (1 - pc + 1e-14)**gamma * log(pc))
def test_softmax_crossentropy(data: st.DataObject, labels_as_tensor: bool): s = data.draw( hnp.arrays( shape=hnp.array_shapes(max_side=10, min_dims=2, max_dims=2), dtype=float, elements=st.floats(-100, 100), )) y_true = data.draw( hnp.arrays( shape=(s.shape[0], ), dtype=hnp.integer_dtypes(), elements=st.integers(min_value=0, max_value=s.shape[1] - 1), ).map(Tensor if labels_as_tensor else lambda x: x)) scores = Tensor(s) softmax_cross = softmax_crossentropy(scores, y_true, constant=False) softmax_cross.backward() mygrad_scores = Tensor(s) probs = softmax(mygrad_scores) correct_labels = (range(len(y_true)), y_true.data if labels_as_tensor else y_true) truth = np.zeros(mygrad_scores.shape) truth[correct_labels] = 1 mygrad_cross = (-1 / s.shape[0]) * (log(probs) * truth).sum() mygrad_cross.backward() assert_allclose(softmax_cross.data, mygrad_cross.data, atol=1e-5, rtol=1e-5) assert_allclose(scores.grad, mygrad_scores.grad, atol=1e-5, rtol=1e-5)
def test_negative_log_likelihood_vs_softmax_cross_entropy( data: st.DataObject, labels_as_tensor: bool): s = data.draw( hnp.arrays( shape=hnp.array_shapes(max_side=10, min_dims=2, max_dims=2), dtype=float, elements=st.floats(-100, 100), )) y_true = data.draw( hnp.arrays( shape=(s.shape[0], ), dtype=hnp.integer_dtypes(), elements=st.integers(min_value=0, max_value=s.shape[1] - 1), ).map(Tensor if labels_as_tensor else lambda x: x)) scores = Tensor(s) nll = negative_log_likelihood(mg.log(mg.nnet.softmax(scores)), y_true) nll.backward() cross_entropy_scores = Tensor(s) ce = softmax_crossentropy(cross_entropy_scores, y_true) ce.backward() assert_allclose(nll.data, ce.data, atol=1e-5, rtol=1e-5) assert_allclose(scores.grad, cross_entropy_scores.grad, atol=1e-5, rtol=1e-5)
def test_weighted_negative_log_likelihood_vs_softmax_cross_entropy( data: st.DataObject, labels_as_tensor: bool): s = data.draw( hnp.arrays( shape=hnp.array_shapes(min_side=1, max_side=10, min_dims=2, max_dims=2), dtype=float, elements=st.floats(-100, 100), )) y_true = data.draw( hnp.arrays( shape=(s.shape[0], ), dtype=hnp.integer_dtypes(), elements=st.integers(min_value=0, max_value=s.shape[1] - 1), ).map(Tensor if labels_as_tensor else lambda x: x)) weights = data.draw( hnp.arrays( shape=(s.shape[1], ), dtype=float, elements=st.floats(1e-8, 100), )) scores = Tensor(s) weights = Tensor(weights) for score, y in zip(scores, y_true): score = mg.log(mg.nnet.softmax(score.reshape(1, -1))) y = y.reshape(-1) nll = negative_log_likelihood(score, y) weighted_nll = negative_log_likelihood(score, y, weights=weights) assert np.isclose(weighted_nll.data, weights[y.data].data * nll.data)
def test_softmax_crossentropy(data): """ Test the built-in implementation of multiclass hinge against the pure pygrad version""" s = data.draw( hnp.arrays( shape=hnp.array_shapes(max_side=10, min_dims=2, max_dims=2), dtype=float, elements=st.floats(-100, 100), )) l = data.draw( hnp.arrays( shape=(s.shape[0], ), dtype=hnp.integer_dtypes(), elements=st.integers(min_value=0, max_value=s.shape[1] - 1), )) scores = Tensor(s) softmax_cross = softmax_crossentropy(scores, l, constant=False) softmax_cross.backward() pygrad_scores = Tensor(s) probs = softmax(pygrad_scores) correct_labels = (range(len(l)), l) truth = np.zeros(pygrad_scores.shape) truth[correct_labels] = 1 pygrad_cross = (-1 / s.shape[0]) * (log(probs) * truth).sum() pygrad_cross.backward() assert_allclose(softmax_cross.data, pygrad_cross.data, atol=1e-5, rtol=1e-5) assert_allclose(scores.grad, pygrad_scores.grad, atol=1e-5, rtol=1e-5)
def binary_cross_entropy(y_pred, y_truth): """ Calculates the binary cross entropy loss for a given set of predictions. Parameters ---------- y_pred: mg.Tensor, shape= The Tensor of class scores output from the model y_truth: mg.Tensor, shape= A constant Tensor or a NumPy array that contains the truth values for each prediction Returns ------- mg.Tensor, shape=() A zero-dimensional tensor that is the loss """ return -mg.mean(y_truth * mg.log(y_pred + 1e-08) + (1 - y_truth) * mg.log(1 - y_pred + 1e-08)) # <COGLINE>
def focal_loss(scores, targets, *, alpha=1, gamma=0, constant=False): r""" Return the per-datum focal loss. Parameters ---------- scores : mygrad.Tensor, shape=(N, C) The C class scores for each of the N pieces of data. targets : Sequence[int], shape=(N,) The correct class indices, in [0, C), for each datum. alpha : Real, optional (default=1) The ɑ weighting factor in the loss formulation. gamma : Real, optional (default=0) The ɣ focusing parameter. Note that for Ɣ=0 and ɑ=1, this is cross-entropy loss. constant : bool, optional(default=False) If ``True``, the returned tensor is a constant (it does not back-propagate a gradient) Returns ------- mygrad.Tensor, shape=(N,) The per-datum focal loss. Notes ----- The formulation for the focal loss introduced in https://arxiv.org/abs/1708.02002. It is given by -ɑ(1-p)ˠlog(p). The focal loss for datum-:math:`i` is given by .. math:: -\alpha \hat{y}_i(1-p_i)^\gamma\log(p_i) where :math:`\hat{y}_i` is one in correspondence to the label associated with the datum and 0 elsewhere. That is, if the label :math:`y_k` is 2 and there are four possible label values, then :math:`\hat{y}_k = (0, 0, 1, 0)`. It is recommended in the paper that you normalize by the number of foreground samples. """ if isinstance(targets, Tensor): targets = targets.data check_loss_inputs(scores, targets) label_locs = (range(len(targets)), targets) pc = scores[label_locs] return -(alpha * (1 - pc + 1e-14)**gamma * log(pc, constant=constant))
def kl_divergence(outputs, targets): ''' Returns the Kullback-Leibler divergence loss from the outputs to the targets. The KL-Divergence loss for a single sample is given by yᵢ⊙(log(yᵢ) - xᵢ) Parameters ---------- outputs : mygrad.Tensor, shape=(N, any) The model outputs for each of the N pieces of data. targets : numpy.ndarray, shape=(N, any) The correct vaue for each datum. Returns ------- mygrad.Tensor, shape=() The mean Kullback-Leibler divergence. ''' return mean(targets * (log(targets) - outputs))
def focal_loss(scores, targets, *, alpha=1, gamma=0, constant=False): """ Return the per-datum focal loss. Parameters ---------- scores : mygrad.Tensor, shape=(N, C) The C class scores for each of the N pieces of data. targets : Sequence[int], shape=(N,) The correct class indices, in [0, C), for each datum. alpha : Real, optional (default=1) The ɑ weighting factor in the loss formulation. gamma : Real, optional (default=0) The ɣ focusing parameter. Note that for Ɣ=0 and ɑ=1, this is cross-entropy loss. constant : bool, optional(default=False) If ``True``, the returned tensor is a constant (it does not back-propagate a gradient) Returns ------- mygrad.Tensor, shape=(N,) The per-datum focal loss. Notes ----- This function does not perform a softmax before computing the loss. If you need to take the softmax before computing the loss, see :class:`SoftmaxFocalLoss` instead. It is recommended in the paper that you normalize by the number of foreground samples. """ if isinstance(targets, Tensor): targets = targets.data check_loss_inputs(scores, targets) label_locs = (range(len(targets)), targets) pc = scores[label_locs] return -(alpha * (1 - pc + 1e-14)**gamma * log(pc, constant=constant))
def CrossEntropy(self,y_real,y_pred,eps=1e-10): y_pred = mg.clip(y_pred, eps, 1. - eps) N = y_pred.shape[0] return -mg.sum(y_real*mg.log(y_pred+1e-9))/N