def test_op_2(self):
        rnd = np.random.RandomState(0)
        with tf.Graph().as_default():
            logits = tf.constant(rnd.uniform(0.0, 1.0, [2, 2]),
                                 dtype=tf.float32)
            logits_v = tf.constant(rnd.uniform(0.0, 1.0, [2, 2]),
                                   dtype=tf.float32)
            r = tf.Variable(0.0, dtype=tf.float32)
            logits = logits_v * r + logits
            t = tf.constant([1, 0])
            t = tf.one_hot(t, 2, dtype=tf.float32)
            # loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
            #     logits=logits, labels=y)
            y = tf.nn.softmax(logits)
            loss = t * tf.log(y + 1e-5)
            loss = tf.reduce_sum(loss)

            g = tf.get_default_graph()
            node_list = g.as_graph_def().node
            inspect = set(["Softmax", "SoftmaxGrad"])
            node_list = filter(lambda x: x.op in inspect, node_list)
            grad_fw = forward_gradients(loss, r, gate_gradients=True)
            grad_bk = tf.gradients(loss, r)[0]
            with self.test_session() as sess:
                sess.run(tf.global_variables_initializer())
                grad_bk_val = sess.run(grad_bk)
                grad_fw_val = sess.run(grad_fw)
                np.testing.assert_allclose(grad_fw_val, grad_bk_val, rtol=5)
 def test_convnet(self):
     with tf.Graph().as_default():
         # Define model.
         r = tf.Variable(1.0)
         x = tf.constant(np.random.uniform(-1.0, 1.0, [1, 5, 5, 2]),
                         dtype=tf.float32)
         w = tf.constant(np.random.uniform(-1.0, 1.0, [2, 2, 2, 3]),
                         dtype=tf.float32)
         h = tf.nn.conv2d(r + x, r * w, [1, 1, 1, 1], "SAME")
         h = tf.nn.max_pool(h, [1, 3, 3, 1], [1, 2, 2, 1], "SAME")
         h = tf.nn.relu(h)
         # First branch.
         w2 = tf.constant(np.random.uniform(-1.0, 1.0, [27, 1]),
                          dtype=tf.float32)
         h2 = tf.matmul(tf.reshape(h, [1, -1]), w2)
         y2 = tf.nn.tanh(h2)
         y2 = tf.reduce_sum(y2)
         # We can take a second branch.
         w3 = tf.constant(np.random.uniform(-1.0, 1.0, [27, 1]),
                          dtype=tf.float32)
         h3 = tf.matmul(tf.reshape(h, [1, -1]), w3)
         y3 = tf.nn.sigmoid(h3)
         y3 = tf.reduce_sum(y3)
         # Take gradients of a list of y wrt. scalar r.
         # Returns [grad_y2_r, grad_y3_r].
         grad_fw = forward_gradients([y2, y3], r, gate_gradients=True)
         # Reverse mode implementation from tensorflow.
         grad_bk = [tf.gradients(y2, r)[0], tf.gradients(y3, r)[0]]
         with self.test_session() as sess:
             sess.run(tf.global_variables_initializer())
             grad_fw_val = sess.run(grad_fw)
             grad_bk_val = sess.run(grad_bk)
             np.testing.assert_allclose(grad_fw_val, grad_bk_val, rtol=5)
 def make_unit_graph(self, x, y, rnd=None, dtype=tf.float32):
     """Makes a computation graph that computes (J^T r)^T v and r^T J v"""
     if rnd is None:
         rnd = np.random.RandomState(0)
     x_shape = [int(ss) for ss in x.get_shape()]
     v = self.get_random_tensor(x_shape, rnd=rnd)
     y_shape = [int(ss) for ss in y.get_shape()]
     r = self.get_random_tensor(y_shape, rnd=rnd)
     jt_r = tf.gradients(y, [x], r, gate_gradients=True)
     jt_r_t_v = self.inner_prod(jt_r, [v])
     j_v = forward_gradients(y, [x], [v], gate_gradients=True)
     r_t_j_v = tf.reduce_sum(r * j_v)
     return jt_r_t_v, r_t_j_v
 def test_manual(self):
     with tf.Graph().as_default(), tf.device("/cpu:0"):
         with self.test_session() as sess:
             x_val = np.random.uniform(0, 1)
             x = tf.constant(x_val)
             y = tf.tanh(x)
             dy_dx = forward_gradients(y, x, gate_gradients=True)
             dy_dx_tf = sess.run(dy_dx)
             eps = 1e-5
             x_val = x_val - eps
             y_val_1 = np.tanh(x_val)
             x_val = x_val + 2 * eps
             y_val_2 = np.tanh(x_val)
             dy_dx_fd = (y_val_2 - y_val_1) / (2 * eps)
             np.testing.assert_allclose(dy_dx_tf, dy_dx_fd, rtol=1e-5)
    def test_grad_graph(self):
        with tf.Graph().as_default():

            # Dummy variable.
            r = tf.Variable(1.0)

            # Input.
            x = tf.constant(np.random.uniform(-1.0, 1.0, [1, 5, 5, 2]),
                            dtype=tf.float32,
                            name="x")

            # First convolution.
            v = tf.constant(np.random.uniform(-1.0, 1.0, [2, 2, 2, 3]),
                            dtype=tf.float32,
                            name="v")
            w = tf.constant(np.random.uniform(-1.0, 1.0, [2, 2, 2, 3]),
                            dtype=tf.float32,
                            name="w")
            wv = w + r * v
            h = tf.nn.conv2d(x, wv, [1, 1, 1, 1], "SAME")
            h = tf.nn.max_pool(h, [1, 3, 3, 1], [1, 2, 2, 1], "SAME")
            h = tf.nn.relu(h)

            # Second convolution.
            v_ = tf.constant(np.random.uniform(-1.0, 1.0, [2, 2, 3, 3]),
                             dtype=tf.float32,
                             name="v_")
            w_ = tf.constant(np.random.uniform(-1.0, 1.0, [2, 2, 3, 3]),
                             dtype=tf.float32,
                             name="w_")
            w_v = w_ + r * v_
            h = tf.nn.conv2d(h, w_v, [1, 1, 1, 1], "SAME")

            # Fully connected.
            w2 = tf.constant(np.random.uniform(-1.0, 1.0, [27, 1]),
                             dtype=tf.float32,
                             name="w2")
            h2 = tf.matmul(tf.reshape(h, [1, -1]), w2)
            y2 = tf.nn.sigmoid(h2)
            y2 = tf.reduce_sum(y2)
            grad_bk = tf.gradients(y2, [w, w_], gate_gradients=True)
            grad_fw = forward_gradients(grad_bk, r, gate_gradients=True)
            with self.test_session() as sess:
                sess.run(tf.global_variables_initializer())
                sess.run(grad_fw)
Example #6
0
def fisher_vec_fw(ys, xs, vs):
    """Implements Fisher vector product using backward and forward AD.

  Args:
    ys: Loss function or output variables.
    xs: Weights, list of tensors.
    vs: List of tensors to multiply, for each weight tensor.

  Returns:
    J'Jv: Fisher vector product.
  """
    # Validate the input
    if type(xs) == list:
        if len(vs) != len(xs):
            raise ValueError("xs and vs must have the same length.")

    jv = forward_gradients(ys, xs, vs, gate_gradients=True)
    jjv = tf.gradients(ys, xs, jv, gate_gradients=True)
    return jjv
Example #7
0
def hessian_vec_fw(ys, xs, vs, grads=None):
    """Implements Hessian vector product using forward on backward AD.

  Args:
    ys: Loss function.
    xs: Weights, list of tensors.
    vs: List of tensors to multiply, for each weight tensor.

  Returns:
    Hv: Hessian vector product, same size, same shape as xs.
  """
    # Validate the input
    if type(xs) == list:
        if len(vs) != len(xs):
            raise ValueError("xs and vs must have the same length.")

    if grads is None:
        grads = tf.gradients(ys, xs, gate_gradients=True)
    return forward_gradients(grads, xs, vs, gate_gradients=True)
Example #8
0
def fisher_vec_z(ys, xs, vs):
    """Implements JJ'v, where v is on the output space.

  Args:
    ys: Loss function or output variables.
    xs: Weights, list of tensors.
    vs: List of tensors to multiply, for each weight tensor.

  Returns:
    JJ'v: Fisher vector product on the output space.
  """
    # Validate the input
    if type(ys) == list:
        if len(vs) != len(ys):
            raise ValueError("ys and vs must have the same length.")

    jv = tf.gradients(ys, xs, vs, gate_gradients=True)
    jjv = forward_gradients(ys, xs, jv, gate_gradients=True)
    return jjv
Example #9
0
def gauss_newton_vec_z(ys, zs, xs, vs):
    """Implements HJJ'v, where v is on the output space.

  Args:
    ys: Loss function or output variables.
    zs: Before output layer (input to softmax).
    xs: Weights, list of tensors.
    vs: List of tensors to multiply, for each weight tensor.

  Returns:
    HJJ'v: Gauss-Newton vector product on the output space.
  """
    # Validate the input
    if type(zs) == list:
        if len(vs) != len(zs):
            raise ValueError("zs and vs must have the same length.")

    grads_z = tf.gradients(ys, zs, gate_gradients=True)
    jv = tf.gradients(zs, xs, vs, gate_gradients=True)
    hjjv = forward_gradients(grads_z, xs, jv, gate_gradients=True)
    return hjjv
Example #10
0
def gauss_newton_vec(ys, zs, xs, vs):
    """Implements Gauss-Newton vector product.

  Args:
    ys: Loss function.
    zs: Before output layer (input to softmax).
    xs: Weights, list of tensors.
    vs: List of perturbation vector for each weight tensor.

  Returns:
    J'HJv: Guass-Newton vector product.
  """
    # Validate the input
    if type(xs) == list:
        if len(vs) != len(xs):
            raise ValueError("xs and vs must have the same length.")

    grads_z = tf.gradients(ys, zs, gate_gradients=True)
    hjv = forward_gradients(grads_z, xs, vs, gate_gradients=True)
    jhjv = tf.gradients(zs, xs, hjv, gate_gradients=True)
    return jhjv, hjv
    def test_forward_mode_cnn(self):
        """Test v^T (J v) = (J^T v) ^T v"""
        rnd = np.random.RandomState(0)
        dtype = tf.float32  # Use float64 and CPU for finite difference checking.
        # tf.nn.conv2d and tf.nn.max_pool does not support float64.
        # with tf.Graph().as_default(), tf.device("/cpu:0"):
        with tf.Graph().as_default():
            # Input.
            x = tf.constant(rnd.uniform(-1.0, 1.0, [2, 5, 5, 2]),
                            dtype=dtype,
                            name="x")

            # First convolution.
            v = tf.constant(rnd.uniform(-1.0, 1.0, [2, 2, 2, 3]),
                            dtype=dtype,
                            name="v")
            w = tf.constant(rnd.uniform(-1.0, 1.0, [2, 2, 2, 3]),
                            dtype=dtype,
                            name="w")
            h = tf.nn.conv2d(x, w, [1, 1, 1, 1], "SAME")
            h = tf.nn.max_pool(h, [1, 3, 3, 1], [1, 2, 2, 1], "SAME")
            h = tf.nn.relu(h)

            # Second convolution.
            v_ = tf.constant(rnd.uniform(-0.1, 0.1, [2, 2, 3, 3]),
                             dtype=dtype,
                             name="v_")
            w_ = tf.constant(rnd.uniform(-1.0, 1.0, [2, 2, 3, 3]),
                             dtype=dtype,
                             name="w_")
            h = tf.nn.conv2d(h, w_, [1, 1, 1, 1], "SAME")
            h = tf.nn.sigmoid(h)

            # Fully connected.
            dim = 27
            v2 = tf.constant(rnd.uniform(-0.1, 0.1, [dim, 2]),
                             dtype=dtype,
                             name="v2")
            w2 = tf.constant(rnd.uniform(-1.0, 1.0, [dim, 2]),
                             dtype=dtype,
                             name="w2")
            h = tf.reshape(h, [-1, dim])
            y = tf.matmul(h, w2)
            r = tf.constant(rnd.uniform(-1.0, 1.0, [2, 2]),
                            dtype=dtype,
                            name="r")

            w_list = [w, w_, w2]
            v_list = [v, v_, v2]

            # Taking inner product of two list of tensors.
            inner_prod = lambda xlist, ylist: tf.reduce_sum(
                tf.stack([tf.reduce_sum(x * y) for x, y in zip(xlist, ylist)]))

            # J^T r
            jt_r = tf.gradients(y, w_list, r, gate_gradients=True)
            # (J^T r)^T v
            jt_r_t_v = inner_prod(jt_r, v_list)

            # J v
            j_v = forward_gradients(y, w_list, v_list, gate_gradients=True)
            # r^T J v
            r_t_j_v = tf.reduce_sum(r * j_v)

            with self.test_session() as sess:
                sess.run(tf.global_variables_initializer())
                bk_val, fw_val = sess.run([jt_r_t_v, r_t_j_v])
                np.testing.assert_allclose(bk_val, fw_val, rtol=1e-5)