Example #1
0
    def test_no_label_smoothing(self):
        pad_index = 0
        smoothing = 0.0
        criterion = XentLoss(pad_index=pad_index, smoothing=smoothing)

        # batch x seq_len x vocab_size: 3 x 2 x 5
        predict = torch.FloatTensor([[[0.1, 0.1, 0.6, 0.1, 0.1],
                                      [0.1, 0.1, 0.6, 0.1, 0.1]],
                                     [[0.1, 0.1, 0.6, 0.1, 0.1],
                                      [0.1, 0.1, 0.6, 0.1, 0.1]],
                                     [[0.1, 0.1, 0.6, 0.1, 0.1],
                                      [0.1, 0.1, 0.6, 0.1, 0.1]]])

        # batch x seq_len: 3 x 2
        targets = torch.LongTensor([[2, 1], [2, 0], [1, 0]])

        # test the smoothing function: should still be one-hot
        smoothed_targets = criterion._smooth_targets(
            targets=targets.view(-1), vocab_size=predict.size(-1))

        assert torch.max(smoothed_targets) == 1
        assert torch.min(smoothed_targets) == 0

        self.assertTensorAlmostEqual(
            smoothed_targets,
            torch.Tensor([[0., 0., 1., 0., 0.], [0., 1., 0., 0., 0.],
                          [0., 0., 1., 0., 0.], [0., 0., 0., 0., 0.],
                          [0., 1., 0., 0., 0.], [0., 0., 0., 0., 0.]]))

        v = criterion(predict.log(), targets)
        self.assertTensorAlmostEqual(v, 5.6268)
Example #2
0
    def test_label_smoothing(self):
        pad_index = 0
        smoothing = 0.4
        criterion = XentLoss(pad_index=pad_index, smoothing=smoothing)

        # batch x seq_len x vocab_size: 3 x 2 x 5
        predict = torch.FloatTensor([[[0.1, 0.1, 0.6, 0.1, 0.1],
                                      [0.1, 0.1, 0.6, 0.1, 0.1]],
                                     [[0.1, 0.1, 0.6, 0.1, 0.1],
                                      [0.1, 0.1, 0.6, 0.1, 0.1]],
                                     [[0.1, 0.1, 0.6, 0.1, 0.1],
                                      [0.1, 0.1, 0.6, 0.1, 0.1]]])

        # batch x seq_len: 3 x 2
        targets = torch.LongTensor([[2, 1], [2, 0], [1, 0]])

        # test the smoothing function
        smoothed_targets = criterion._smooth_targets(
            targets=targets.view(-1), vocab_size=predict.size(-1))
        self.assertTensorAlmostEqual(
            smoothed_targets,
            torch.Tensor([[0.0000, 0.1333, 0.6000, 0.1333, 0.1333],
                          [0.0000, 0.6000, 0.1333, 0.1333, 0.1333],
                          [0.0000, 0.1333, 0.6000, 0.1333, 0.1333],
                          [0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
                          [0.0000, 0.6000, 0.1333, 0.1333, 0.1333],
                          [0.0000, 0.0000, 0.0000, 0.0000, 0.0000]]))
        assert torch.max(smoothed_targets) == 1 - smoothing

        # test the loss computation
        v = criterion(predict.log(), targets)
        self.assertTensorAlmostEqual(v, 2.1326)