Ejemplo n.º 1
0
def test_hv_with_builtin():
    iris = load_iris()
    x = tf.placeholder(tf.float32, name='x')
    y = tf.placeholder(tf.float32, name='y')
    model = LinearModel(x, 4, 3)
    net_w, net_out = vectorize_model(model.var_list, model.inp[-1])

    v = tf.constant(np.ones(net_w.tensor.get_shape()),
                    dtype=tf.float32)  # vector of ones of right shape

    ce_builtin = tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits(logits=net_out, labels=y)
    )  # this is the builtin function advertised on tensorflow for computing cross entropy loss with softmax output

    ce_standard = tf.reduce_mean(
        -tf.reduce_sum(y * tf.log(tf.nn.softmax(net_out)),
                       reduction_indices=[1])  # this is normal CE loss
    )

    hvp_builtin = hvp(
        ce_builtin, net_w.tensor,
        v)  # WITH PREVIOUS VERSIONS (r.0.11) WAS 0. NOW RAISES ERROR
    # UPDATE r1.2 now it's working! yeah!
    hessian_builtin = tf.hessians(ce_builtin, net_w.tensor)[0]

    hvp_standard = hvp(ce_standard, net_w.tensor, v)
    hessian_standard = tf.hessians(ce_standard, net_w.tensor)[0]

    def training_supplier():
        return {x: iris.train.data, y: iris.train.target}

    ts = tf.train.GradientDescentOptimizer(.1).minimize(
        ce_standard, var_list=model.var_list)

    with tf.Session().as_default() as ss:
        tf.global_variables_initializer().run()

        print('builtin, standard:',
              ss.run([ce_builtin, ce_standard], feed_dict=training_supplier()))

        for _ in range(2000):
            ts.run(feed_dict=training_supplier())

        print('builtin',
              ss.run([hvp_builtin, hessian_builtin],
                     feed_dict=training_supplier()))  # output is wrongly 0.

        print(
            'standard',
            ss.run([hvp_standard, hessian_standard],
                   feed_dict=training_supplier()))
Ejemplo n.º 2
0
    def _test_hv(self, param_optimizer, debug_jac=False, iterations=100):
        tf.set_random_seed(0)
        np.random.seed(0)
        iris, x, y, model, model_w, model_y, error, accuracy = iris_logistic_regression(
            param_optimizer.get_augmentation_multiplier())

        eta = tf.Variable(.001, name='eta')
        dyn = param_optimizer.create(model_w,
                                     eta,
                                     loss=error,
                                     _debug_jac_z=debug_jac)

        # # rho = tf.Variable([.1, .01], name='rho')
        # tr_error = error
        #            # + rho[0]*tf.reduce_sum(model_w.tensor**2)\
        #            # + rho[1]*tf.abs(tf.reduce_sum(model_w.tensor))

        tr_sup = lambda s=None: {x: iris.train.data, y: iris.train.target}

        from rfho.utils import hvp

        z = tf.ones(model_w.get_shape())
        hv = hvp(error, model_w.tensor, z)

        with tf.Session().as_default() as ss:
            tf.global_variables_initializer().run()
            for t in range(iterations):
                ss.run(dyn.assign_ops, feed_dict=tr_sup())
            return hv.eval(feed_dict=tr_sup())
Ejemplo n.º 3
0
 def _jac_z(z):
     return ZMergedMatrix(
         hvp(
             integral,
             w,
             # MergedVariable.get_tensor(w),
             z.tensor))
Ejemplo n.º 4
0
            def jac_z(z):
                r, u = z.var_list(Vl_Mode.TENSOR)

                assert loss is not None, 'Should specify loss to use jac_z'

                hessian_r_product = hvp(loss=loss, w=w_base, v=r)

                print('hessian_r_product', hessian_r_product)

                res = [
                    r - lr * mu * u - lr * hessian_r_product,
                    hessian_r_product + mu * u
                ]

                print('res', res)

                return ZMergedMatrix(res)
Ejemplo n.º 5
0
            def _jac_z(z):
                if _debug_jac_z:  # I guess this would take an incredible long time to compile for large systems
                    d = dynamics.get_shape().as_list()[0]
                    d2 = d // 2
                    jac_1_1 = tf.stack([
                        tf.gradients(w_base_k[i], w_base)[0] for i in range(d2)
                    ])
                    jac_2_1 = tf.stack(
                        [tf.gradients(m_k[i], w_base)[0] for i in range(d2)])
                    # jac_1 = tf.concat([jac_1_1, jac_2_1], axis=0)

                    jac_1_2 = tf.stack(
                        [tf.gradients(w_base_k[i], m)[0] for i in range(d2)])
                    jac_2_2 = tf.stack(
                        [tf.gradients(m_k[i], m)[0] for i in range(d2)])
                    # jac_2 = tf.concat([jac_1_2, jac_2_2], axis=0)

                    # jac = tf.concat([jac_1, jac_2], axis=1, name='Jacobian')

                    # mul = tf.matmul(jac, z.tensor)
                    #
                    # return ZMergedMatrix([
                    #     mul[:d2, :],
                    #     mul[d2, :]
                    # ])
                    r, u = z.var_list(VlMode.TENSOR)
                    return ZMergedMatrix([
                        tf.matmul(jac_1_1, r) + tf.matmul(jac_1_2, u),
                        tf.matmul(jac_2_1, r) + tf.matmul(jac_2_2, u)
                    ])
                else:
                    r, u = z.var_list(VlMode.TENSOR)

                    assert loss is not None, 'Should specify loss to use jac_z'

                    hessian_r_product = hvp(loss=loss, w=w_base, v=r)

                    # print('hessian_r_product', hessian_r_product)

                    res = [
                        r - lr * mu * u - lr * hessian_r_product,
                        hessian_r_product + mu * u
                    ]

                    return ZMergedMatrix(res)
Ejemplo n.º 6
0
    def test_hvp(self):
        """
        Test for hessian vector product
        :return:
        """
        print('test 1')
        d = 20
        x = tf.Variable(tf.random_normal([d]))
        # noinspection PyTypeChecker
        fx = 3 * tf.reduce_sum(x**3)
        vec = tf.Variable(tf.ones([d]))
        res = hvp(fx, x, vec)

        with tf.Session().as_default() as ss:

            ss.run(tf.global_variables_initializer())

            hessian = 18. * np.eye(d) * ss.run(x)
            self.assertLess(
                np.linalg.norm(ss.run(res) - hessian.dot(ss.run(vec))), 1e-5)
Ejemplo n.º 7
0
    def test_hv_matrix(self):
        """
        Test for hessian vector product
        :return:
        """
        print('test 2')
        d = 20
        x = tf.Variable(tf.random_normal([d]))
        # noinspection PyTypeChecker
        fx = 3 * tf.reduce_sum(x**3)
        vec = tf.Variable(tf.ones([d, 2]))
        res = tf.stack([
            hvp(fx, x, vec[:, k]) for k in range(vec.get_shape().as_list()[1])
        ],
                       axis=1)

        with tf.Session().as_default() as ss:

            ss.run(tf.global_variables_initializer())

            hessian = np.eye(d) * ss.run(x) * 18.
            self.assertLess(
                np.linalg.norm(ss.run(res) - hessian.dot(ss.run(vec))), 1e-5)
Ejemplo n.º 8
0
            def _jac_z(z):
                if _debug_jac_z:  # I guess this would take an incredible long time to compile for large systems
                    d = dynamics.get_shape().as_list()[0] // 3
                    r, u, s = z.var_list(VlMode.TENSOR)

                    j11 = tf.stack([
                        tf.gradients(w_base_k[i], w_base)[0] for i in range(d)
                    ])
                    j12 = tf.stack(
                        [tf.gradients(w_base_k[i], m)[0] for i in range(d)])
                    j13 = tf.stack(
                        [tf.gradients(w_base_k[i], v)[0] for i in range(d)])
                    j1 = tf.concat([j11, j12, j13], axis=1)
                    jz1 = tf.matmul(j11, r) + tf.matmul(j12, u) + tf.matmul(
                        j13, s)

                    # second block
                    j21 = tf.stack(
                        [tf.gradients(m_k[i], w_base)[0] for i in range(d)])
                    j22 = tf.stack(
                        [tf.gradients(m_k[i], m)[0] for i in range(d)])
                    j23 = tf.stack(
                        [tf.gradients(m_k[i], v)[0] for i in range(d)])
                    j2 = tf.concat([j21, j22, j23], axis=1)
                    jz2 = tf.matmul(j21, r) + tf.matmul(j22, u) + tf.matmul(
                        j23, s)

                    # third block
                    j31 = tf.stack(
                        [tf.gradients(v_k[i], w_base)[0] for i in range(d)])
                    j32 = tf.stack(
                        [tf.gradients(v_k[i], m)[0] for i in range(d)])
                    j33 = tf.stack(
                        [tf.gradients(v_k[i], v)[0] for i in range(d)])
                    j3 = tf.concat([j31, j32, j33], axis=1)
                    jz3 = tf.matmul(j31, r) + tf.matmul(j32, u) + tf.matmul(
                        j33, s)

                    tf.concat([j1, j2, j3], axis=0, name='Jacobian')

                    return ZMergedMatrix([jz1, jz2, jz3])

                else:
                    assert loss is not None, 'Should specify loss to use jac_z'

                    r, u, s = z.var_list(VlMode.TENSOR)

                    with tf.name_scope('Jac_Z'):

                        hessian_r_product = hvp(loss=loss,
                                                w=w_base,
                                                v=r,
                                                name='hessian_r_product')
                        # hessian_r_product = hvp(loss=loss, w=w.tensor, v=z.tensor, name='hessian_r_product')[:d, :d]

                        j_11_r_tilde = l_diag_mul(pre_j_11_out,
                                                  hessian_r_product,
                                                  name='j_11_r_tilde')
                        j_11_r = tf.identity(j_11_r_tilde + r, 'j_11_r')

                        j_12_u_hat = tf.identity(-lr_k * beta1 / v_tilde_k,
                                                 name='j_12_u_hat')
                        j_12_u = l_diag_mul(j_12_u_hat, u, name='j_12_u')

                        j_13_s_hat = tf.identity(lr_k * beta2 * m_k /
                                                 (2 * v_k_eps_32),
                                                 name='j_13_s_hat')
                        j_13_s = l_diag_mul(j_13_s_hat, s, name='j_13_s')

                        jac_z_1 = tf.identity(j_11_r + j_12_u + j_13_s,
                                              name='jac_z_1')
                        # end first bock

                        j_21_r = tf.identity((1. - beta1) * hessian_r_product,
                                             name='j_21_r')
                        j_22_u = tf.identity(beta1 * u, name='j_22_u')
                        # j_23_s = tf.zeros_like(s)  # would be...

                        jac_z_2 = tf.identity(j_21_r + j_22_u, name='jac_z_2')
                        # end second block

                        j_31_r = l_diag_mul(pre_j_31_out,
                                            hessian_r_product,
                                            name='j_31_r')
                        # j_32_u = tf.zeros_like(u)  # would be
                        j_33_s = tf.identity(beta2 * s, name='j_33_s')
                        jac_z_3 = tf.identity(j_31_r + j_33_s, name='jac_z_3')

                        res = [jac_z_1, jac_z_2, jac_z_3]
                        # print('res', res)

                        return ZMergedMatrix(res)