def test_Rop_numpy(): """check sparsemax Rop, aginst numpy Rop""" z = np.random.uniform(low=-3, high=3, size=(100, 10)) w = np.random.normal(size=(10, 10)) logits = tf.placeholder(tf.float64, name='z') weights = tf.constant(w, name='w') sparsemax = kernel.sparsemax(logits) # tensorflow uses the chainrule forward (left to right), meaning that: # $dS(z)*w/d(z) = dS(z)*w/dS(z) * dS(z)/dz = Rop(S)(z, Rop(*)(w, 1))$ # Thus to test the Rop for sparsemax correctly a weight matrix is # multiplied. This causes the grad (v) in the Rop to be $dS(z)*w/dS(z)$. sparsemax_transform = tf.matmul(sparsemax, weights) sparsemax_transform_grad = tf.gradients(sparsemax_transform, [logits])[0] with tf.Session() as sess: # chain rule grad = np.dot(np.ones_like(z), w.T) # Construct S(z) properbility = sparsemax.eval({logits: z}) support = properbility > 0 # Calculate \hat{v}, which will be a vector (scalar for each z) v_hat = np.sum(grad * support, axis=1) / np.sum(support, axis=1) # Calculates J(z) * v numpy_grad = support * (grad - v_hat[:, np.newaxis]) np.testing.assert_almost_equal( sparsemax_transform_grad.eval({logits: z}), numpy_grad )
def test_Rop_estimated(): """check sparsemax-loss Rop, aginst estimated Rop""" z = np.random.uniform(low=-3, high=3, size=(100, 10)) w = np.random.normal(1) q = np.zeros((100, 10)) q[np.arange(0, 100), np.random.randint(0, 10, size=100)] = 1 logits = tf.placeholder(tf.float64, name='z') labels = tf.constant(q, name='q') weight = tf.constant(w, name='w', dtype=tf.float64) sparsemax = kernel.sparsemax(logits) loss = kernel.sparsemax_loss(logits, sparsemax, labels) loss_transform = loss * weight with tf.Session() as sess: # https://www.tensorflow.org/versions/r0.8/api_docs/python/test.html analytical, numerical = tf.test.compute_gradient(logits, z.shape, loss_transform, (100, ), x_init_value=z, delta=1e-9) np.testing.assert_almost_equal(analytical, numerical, decimal=4)
def sparsemax_loss(z, q): logits = tf.placeholder(tf.float64, name='z') labels = tf.placeholder(tf.float64, name='q') sparsemax = kernel.sparsemax(logits) loss = kernel.sparsemax_loss(logits, sparsemax, labels) with tf.Session() as sess: return loss.eval({logits: z, labels: q})
def test_Rop_numpy(): """check sparsemax-loss Rop, aginst numpy Rop""" z = np.random.uniform(low=-3, high=3, size=(5, 3)) q = np.zeros((5, 3)) q[np.arange(0, 5), np.random.randint(0, 3, size=5)] = 1 logits = tf.placeholder(tf.float64, name='z') labels = tf.placeholder(tf.float64, name='q') sparsemax = kernel.sparsemax(logits) loss = kernel.sparsemax_loss(logits, sparsemax, labels) grad = tf.gradients(loss, [logits])[0] with tf.Session() as sess: np.testing.assert_array_equal(grad.eval({ logits: z, labels: q }), -q + sparsemax.eval({logits: z}))
def test_Rop_estimated(): """check sparsemax Rop, aginst estimated Rop""" z = np.random.uniform(low=-3, high=3, size=(100, 10)) w = np.random.normal(size=(10, 10)) logits = tf.placeholder(tf.float64, name='z') weights = tf.constant(w, name='w') sparsemax = kernel.sparsemax(logits) sparsemax_transform = tf.matmul(sparsemax, weights) with tf.Session() as sess: # https://www.tensorflow.org/versions/r0.8/api_docs/python/test.html analytical, numerical = tf.test.compute_gradient( logits, z.shape, sparsemax_transform, z.shape, x_init_value=z, delta=1e-9 ) np.testing.assert_almost_equal( analytical, numerical, decimal=4 )
def test_Rop_numpy(): """check sparsemax-loss Rop, aginst numpy Rop""" z = np.random.uniform(low=-3, high=3, size=(100, 10)) w = np.random.normal(size=(100, 1)) q = np.zeros((100, 10)) q[np.arange(0, 100), np.random.randint(0, 10, size=100)] = 1 logits = tf.placeholder(tf.float64, name='z') labels = tf.constant(q, name='q') weights = tf.constant(w, name='w') sparsemax = kernel.sparsemax(logits) loss = kernel.sparsemax_loss(logits, sparsemax, labels) loss_transform = tf.expand_dims(loss, 1) * weights loss_transform_grad = tf.gradients(loss_transform, [logits])[0] with tf.Session() as sess: # chain rule grad = np.ones_like(w) * w np.testing.assert_array_equal( loss_transform_grad.eval({logits: z}), grad * (-q + sparsemax.eval({logits: z})))
def sparsemax(z): logits = tf.placeholder(tf.float64, name='z') sparsemax = kernel.sparsemax(logits) with tf.Session() as sess: return sparsemax.eval({logits: z})