def test_smooth_l1_loss(self): layer = tl.SmoothL1Loss() sample_input = np.ones((2, 2)) sample_target = np.ones((2, 2)) sample_weights = np.ones((2, 2)) full_signature = shapes.signature( [sample_input, sample_target, sample_weights]) layer.init(full_signature) x = np.array([[1., 1.], [1., 2.]]) target = np.array([[1., 1.], [1., 0.]]) l1_dist = 2 weights = np.array([[1., 1.], [1., 0.]]) loss = layer((x, target, weights)) np.testing.assert_allclose(loss, 0.0) weights = np.array([[1., 0.], [0., 1.]]) sum_weights = 2 loss = layer((x, target, weights)) np.testing.assert_allclose(loss, (l1_dist - 0.5) / sum_weights) x = np.array([[1., 1.], [1., 1.5]]) target = np.array([[1., 1.], [1., 1.]]) l1_dist = 0.5 loss = layer((x, target, weights)) np.testing.assert_allclose(loss, 0.5 * l1_dist**2 / sum_weights)
def f(values, actions, returns, mask): ind_0, ind_1 = np.indices(actions.shape) # We calculate length using the shape of returns # and adequatly remove a superflous slice of values. # An analogous operation is done in value_batches_stream. length = returns.shape[1] values = values[:, :length, :] selected_values = values[ind_0, ind_1, actions] shapes.assert_same_shape(selected_values, returns) shapes.assert_same_shape(selected_values, mask) if self._smoothl1loss: return tl.SmoothL1Loss().forward((selected_values, returns, mask)) else: return tl.L2Loss().forward((selected_values, returns, mask))
def test_smooth_l1_loss(self): layer = tl.SmoothL1Loss() model_outputs = np.array([[1., 1.], [1., 2.]]) targets = np.array([[1., 1.], [1., 0.]]) l1_dist = 2 weights = np.array([[1., 1.], [1., 0.]]) loss = layer([model_outputs, targets, weights]) np.testing.assert_allclose(loss, 0.0) weights = np.array([[1., 0.], [0., 1.]]) sum_weights = 2 loss = layer([model_outputs, targets, weights]) np.testing.assert_allclose(loss, (l1_dist - 0.5) / sum_weights) model_outputs = np.array([[1., 1.], [1., 1.5]]) targets = np.array([[1., 1.], [1., 1.]]) l1_dist = 0.5 loss = layer([model_outputs, targets, weights]) np.testing.assert_allclose(loss, 0.5 * l1_dist**2 / sum_weights)