def test_Rop_numpy(): """check sparsemax Rop, aginst numpy Rop""" z = np.random.uniform(low=-3, high=3, size=(100, 10)) w = np.random.normal(size=(10, 10)) logits = tf.placeholder(tf.float64, name='z') weights = tf.constant(w, name='w') sparsemax = sparsemax_tf_ops.sparsemax_op(logits) # tensorflow uses the chainrule forward (left to right), meaning that: # $dS(z)*w/d(z) = dS(z)*w/dS(z) * dS(z)/dz = Rop(S)(z, Rop(*)(w, 1))$ # Thus to test the Rop for sparsemax correctly a weight matrix is # multiplied. This causes the grad (v) in the Rop to be $dS(z)*w/dS(z)$. sparsemax_transform = tf.matmul(sparsemax, weights) sparsemax_transform_grad = tf.gradients(sparsemax_transform, [logits])[0] with tf.Session() as sess: # chain rule grad = np.dot(np.ones_like(z), w.T) # Construct S(z) properbility = sparsemax.eval({logits: z}) support = properbility > 0 # Calculate \hat{v}, which will be a vector (scalar for each z) v_hat = np.sum(grad * support, axis=1) / np.sum(support, axis=1) # Calculates J(z) * v numpy_grad = support * (grad - v_hat[:, np.newaxis]) np.testing.assert_almost_equal( sparsemax_transform_grad.eval({logits: z}), numpy_grad )
def test_Rop_estimated(): """check sparsemax-loss Rop, aginst estimated Rop""" z = np.random.uniform(low=-3, high=3, size=(100, 10)) w = np.random.normal(1) q = np.zeros((100, 10)) q[np.arange(0, 100), np.random.randint(0, 10, size=100)] = 1 logits = tf.placeholder(tf.float64, name='z') labels = tf.constant(q, name='q') weight = tf.constant(w, name='w', dtype=tf.float64) sparsemax = ops.sparsemax_op(logits) loss = ops.sparsemax_loss_op(logits, sparsemax, labels) loss_transform = loss * weight with tf.Session() as sess: # https://www.tensorflow.org/versions/r0.8/api_docs/python/test.html analytical, numerical = tf.test.compute_gradient( logits, z.shape, loss_transform, (100, ), x_init_value=z, delta=1e-9 ) np.testing.assert_almost_equal( analytical, numerical, decimal=4 )
def test_Rop_numpy(): """check sparsemax-loss Rop, aginst numpy Rop""" z = np.random.uniform(low=-3, high=3, size=(100, 10)) w = np.random.normal(size=(100, 1)) q = np.zeros((100, 10)) q[np.arange(0, 100), np.random.randint(0, 10, size=100)] = 1 logits = tf.placeholder(tf.float64, name='z') labels = tf.constant(q, name='q') weights = tf.constant(w, name='w') sparsemax = ops.sparsemax_op(logits) loss = ops.sparsemax_loss_op(logits, sparsemax, labels) loss_transform = tf.expand_dims(loss, 1) * weights loss_transform_grad = tf.gradients(loss_transform, [logits])[0] with tf.Session() as sess: # chain rule grad = np.ones_like(w) * w np.testing.assert_array_equal( loss_transform_grad.eval({logits: z}), grad * (-q + sparsemax.eval({logits: z})) )
def sparsemax_loss(z, q): logits = tf.placeholder(tf.float64, name='z') labels = tf.placeholder(tf.float64, name='q') sparsemax = ops.sparsemax_op(logits) loss = ops.sparsemax_loss_op(logits, sparsemax, labels) with tf.Session() as sess: return loss.eval({logits: z, labels: q})
def test_Rop_estimated(): """check sparsemax Rop, aginst estimated Rop""" z = np.random.uniform(low=-3, high=3, size=(100, 10)) w = np.random.normal(size=(10, 10)) logits = tf.placeholder(tf.float64, name='z') weights = tf.constant(w, name='w') sparsemax = sparsemax_tf_ops.sparsemax_op(logits) sparsemax_transform = tf.matmul(sparsemax, weights) with tf.Session() as sess: # https://www.tensorflow.org/versions/r0.8/api_docs/python/test.html analytical, numerical = tf.test.compute_gradient( logits, z.shape, sparsemax_transform, z.shape, x_init_value=z, delta=1e-9 ) np.testing.assert_almost_equal( analytical, numerical, decimal=4 )
def sparsemax(z): logits = tf.placeholder(tf.float64, name='z') sparsemax = sparsemax_tf_ops.sparsemax_op(logits) with tf.Session() as sess: return sparsemax.eval({logits: z})