Esempio n. 1
0
  def testMeasurement(self):
    opt = YellowFinOptimizer(zero_debias=False)
    w = tf.Variable(np.ones([n_dim, ]),
                    dtype=tf.float32,
                    name="w",
                    trainable=True)
    b = tf.Variable(np.ones([1, ], dtype=np.float32),
                    dtype=tf.float32,
                    name="b",
                    trainable=True)
    x = tf.constant(np.ones([n_dim,], dtype=np.float32),
                    dtype=tf.float32)
    loss = tf.multiply(w, x) + b
    tvars = tf.trainable_variables()

    w_grad_val = tf.placeholder(tf.float32, shape=(n_dim, ))
    b_grad_val = tf.placeholder(tf.float32, shape=(1, ))
    apply_op = opt.apply_gradients(zip([w_grad_val, b_grad_val], tvars))

    init_op = tf.global_variables_initializer()
    with tf.Session() as sess:
      sess.run(init_op)
      target_h_max = 0.0
      target_h_min = 0.0
      g_norm_squared_avg = 0.0
      g_norm_avg = 0.0
      g_avg = 0.0
      target_dist = 0.0
      for i in range(n_iter):
        feed_dict = {w_grad_val: (i + 1) * np.ones([n_dim, ], dtype=np.float32),
               b_grad_val: (i + 1) * np.ones([1, ], dtype=np.float32)}
        res = sess.run([opt._curv_win,
                        opt._h_max,
                        opt._h_min,
                        opt._grad_var,
                        opt._dist_to_opt_avg,
                        apply_op], feed_dict=feed_dict)

        g_norm_squared_avg = 0.999 * g_norm_squared_avg  \
          + 0.001 * np.sum(((i + 1) * np.ones([n_dim + 1, ]))**2)
        g_norm_avg = 0.999 * g_norm_avg  \
          + 0.001 * np.linalg.norm((i + 1)*np.ones([n_dim + 1, ]))
        g_avg = 0.999 * g_avg + 0.001 * (i + 1)

        target_h_max = 0.999 * target_h_max + 0.001 * (i + 1)**2*(n_dim + 1)
        target_h_min = 0.999 * target_h_min +  \
                       0.001 * max(1, i + 2 - 20)**2 * (n_dim + 1)
        target_var = g_norm_squared_avg - g_avg**2 * (n_dim + 1)
        target_dist = 0.999 * target_dist + \
                      0.001 * g_norm_avg / g_norm_squared_avg

        # print "iter ", i, " h max ", res[1], target_h_max, " h min ", res[2], target_h_min, \
        #   " var ", res[3], target_var, " dist ", res[4], target_dist
        assert np.abs(target_h_max - res[1]) < np.abs(target_h_max) * 1e-3
        assert np.abs(target_h_min - res[2]) < np.abs(target_h_min) * 1e-3
        assert np.abs(target_var - res[3]) < np.abs(res[3]) * 1e-3
        assert np.abs(target_dist - res[4]) < np.abs(res[4]) * 1e-3
    print "[Test-INFO] Sync measurement test passed!"
Esempio n. 2
0
  def testMeasurement(self):
    opt = YellowFinOptimizer(zero_debias=False)
    w = tf.Variable(np.ones([n_dim,]),
                    dtype=tf.float32,
                    name="w",
                    trainable=True)
    b = tf.Variable(np.ones([1,], dtype=np.float32),
                    dtype=tf.float32,
                    name="b",
                    trainable=True)
    x = tf.constant(np.ones([n_dim,], dtype=np.float32),
                    dtype=tf.float32)
    _ = tf.multiply(w, x) + b  # loss
    tvars = tf.trainable_variables()

    w_grad_val = tf.placeholder(tf.float32, shape=(n_dim,))
    b_grad_val = tf.placeholder(tf.float32, shape=(1,))
    apply_op = opt.apply_gradients(zip([w_grad_val, b_grad_val], tvars))

    init_op = tf.global_variables_initializer()
    with tf.Session() as sess:
      sess.run(init_op)
      target_h_max = 0.0
      target_h_min = 0.0
      g_norm_squared_avg = 0.0
      g_norm_avg = 0.0
      g_avg = 0.0
      target_dist = 0.0
      for i in range(n_iter):
        feed_dict = {w_grad_val: (i + 1) * np.ones([n_dim,], dtype=np.float32),
                     b_grad_val: (i + 1) * np.ones([1,], dtype=np.float32)}
        res = sess.run([opt._curv_win,
                        opt._h_max,
                        opt._h_min,
                        opt._grad_var,
                        opt._dist_to_opt_avg,
                        apply_op], feed_dict=feed_dict)

        g_norm_squared_avg = (
            0.999 * g_norm_squared_avg +
            0.001 * np.sum(((i + 1) * np.ones([n_dim + 1,]))**2))
        g_norm_avg = (0.999 * g_norm_avg +
                      0.001 * np.linalg.norm((i + 1)*np.ones([n_dim + 1,])))
        g_avg = 0.999 * g_avg + 0.001 * (i + 1)

        target_h_max = 0.999 * target_h_max + 0.001 * (i + 1)**2*(n_dim + 1)
        target_h_min = (0.999 * target_h_min +
                        0.001 * max(1, i + 2 - 20)**2 * (n_dim + 1))
        target_var = g_norm_squared_avg - g_avg**2 * (n_dim + 1)
        target_dist = (0.999 * target_dist +
                       0.001 * g_norm_avg / g_norm_squared_avg)

        assert np.abs(target_h_max - res[1]) < np.abs(target_h_max) * 1e-3
        assert np.abs(target_h_min - res[2]) < np.abs(target_h_min) * 1e-3
        assert np.abs(target_var - res[3]) < np.abs(res[3]) * 1e-3
        assert np.abs(target_dist - res[4]) < np.abs(res[4]) * 1e-3
Esempio n. 3
0
    def testLrMu(self):
        opt = YellowFinOptimizer(learning_rate=0.5,
                                 momentum=0.5,
                                 zero_debias=False)
        w = tf.Variable(np.ones([
            n_dim,
        ]),
                        dtype=tf.float32,
                        name="w",
                        trainable=True)
        b = tf.Variable(np.ones([
            1,
        ], dtype=np.float32),
                        dtype=tf.float32,
                        name="b",
                        trainable=True)
        x = tf.constant(np.ones([
            n_dim,
        ], dtype=np.float32), dtype=tf.float32)
        _ = tf.multiply(w, x) + b  # loss
        tvars = tf.trainable_variables()

        w_grad_val = tf.Variable(np.zeros([
            n_dim,
        ]),
                                 dtype=tf.float32,
                                 trainable=False)
        b_grad_val = tf.Variable(np.zeros([
            1,
        ]),
                                 dtype=tf.float32,
                                 trainable=False)
        apply_op = opt.apply_gradients(zip([w_grad_val, b_grad_val], tvars))

        init_op = tf.global_variables_initializer()
        with tf.Session() as sess:
            sess.run(init_op)
            target_h_max = 0.0
            target_h_min = 0.0
            g_norm_squared_avg = 0.0
            g_norm_avg = 0.0
            g_avg = 0.0
            target_dist = 0.0
            target_lr = 0.5
            target_mu = 0.5
            for i in range(n_iter):

                sess.run(
                    tf.assign(w_grad_val,
                              (i + 1) * np.ones([
                                  n_dim,
                              ], dtype=np.float32)))
                sess.run(
                    tf.assign(b_grad_val,
                              (i + 1) * np.ones([
                                  1,
                              ], dtype=np.float32)))

                res = sess.run([
                    opt._curv_win, opt._h_max, opt._h_min, opt._grad_var,
                    opt._dist_to_opt_avg, opt._lr_var, opt._mu_var, apply_op
                ])

                res[5] = opt._lr_var.eval()
                res[6] = opt._mu_var.eval()

                g_norm_squared_avg = (0.999 * g_norm_squared_avg +
                                      0.001 * np.sum(((i + 1) * np.ones([
                                          n_dim + 1,
                                      ]))**2))
                g_norm_avg = (0.999 * g_norm_avg + 0.001 * np.linalg.norm(
                    (i + 1) * np.ones([
                        n_dim + 1,
                    ])))
                g_avg = 0.999 * g_avg + 0.001 * (i + 1)

                target_h_max = 0.999 * target_h_max + 0.001 * (i + 1)**2 * (
                    n_dim + 1)
                target_h_min = (0.999 * target_h_min +
                                0.001 * max(1, i + 2 - 20)**2 * (n_dim + 1))
                target_var = g_norm_squared_avg - g_avg**2 * (n_dim + 1)
                target_dist = (0.999 * target_dist +
                               0.001 * g_norm_avg / g_norm_squared_avg)

                if i > 0:
                    lr, mu = self.tune_everything(target_dist**2, target_var,
                                                  1, target_h_min,
                                                  target_h_max)
                    target_lr = 0.999 * target_lr + 0.001 * lr
                    target_mu = 0.999 * target_mu + 0.001 * mu

                assert np.abs(target_h_max -
                              res[1]) < np.abs(target_h_max) * 1e-3
                assert np.abs(target_h_min -
                              res[2]) < np.abs(target_h_min) * 1e-3
                assert np.abs(target_var - res[3]) < np.abs(res[3]) * 1e-3
                assert np.abs(target_dist - res[4]) < np.abs(res[4]) * 1e-3
                assert (target_lr == 0.0 or
                        (np.abs(target_lr - res[5]) < np.abs(res[5]) * 1e-3))
                assert (target_mu == 0.0 or
                        (np.abs(target_mu - res[6]) < np.abs(res[6]) * 5e-3))
Esempio n. 4
0
  def testLrMu(self):
    opt = YellowFinOptimizer(learning_rate=0.5, momentum=0.5, zero_debias=False)
    w = tf.Variable(np.ones([n_dim,]),
                    dtype=tf.float32,
                    name="w",
                    trainable=True)
    b = tf.Variable(np.ones([1,],
                            dtype=np.float32),
                    dtype=tf.float32,
                    name="b",
                    trainable=True)
    x = tf.constant(np.ones([n_dim,], dtype=np.float32), dtype=tf.float32)
    _ = tf.multiply(w, x) + b  # loss
    tvars = tf.trainable_variables()

    w_grad_val = tf.Variable(np.zeros([n_dim,]),
                             dtype=tf.float32,
                             trainable=False)
    b_grad_val = tf.Variable(np.zeros([1,]),
                             dtype=tf.float32,
                             trainable=False)
    apply_op = opt.apply_gradients(zip([w_grad_val, b_grad_val], tvars))

    init_op = tf.global_variables_initializer()
    with tf.Session() as sess:
      sess.run(init_op)
      target_h_max = 0.0
      target_h_min = 0.0
      g_norm_squared_avg = 0.0
      g_norm_avg = 0.0
      g_avg = 0.0
      target_dist = 0.0
      target_lr = 0.5
      target_mu = 0.5
      for i in range(n_iter):

        sess.run(tf.assign(w_grad_val, (i + 1) * np.ones([n_dim,],
                                                         dtype=np.float32)))
        sess.run(tf.assign(b_grad_val, (i + 1) * np.ones([1,],
                                                         dtype=np.float32)))

        res = sess.run([opt._curv_win,
                        opt._h_max,
                        opt._h_min,
                        opt._grad_var,
                        opt._dist_to_opt_avg,
                        opt._lr_var,
                        opt._mu_var,
                        apply_op])

        res[5] = opt._lr_var.eval()
        res[6] = opt._mu_var.eval()

        g_norm_squared_avg = (
            0.999 * g_norm_squared_avg +
            0.001 * np.sum(((i + 1) * np.ones([n_dim + 1,]))**2))
        g_norm_avg = (0.999 * g_norm_avg +
                      0.001 * np.linalg.norm((i + 1)*np.ones([n_dim + 1,])))
        g_avg = 0.999 * g_avg + 0.001 * (i + 1)

        target_h_max = 0.999 * target_h_max + 0.001 * (i + 1)**2 * (n_dim + 1)
        target_h_min = (0.999 * target_h_min +
                        0.001 * max(1, i + 2 - 20)**2 * (n_dim + 1))
        target_var = g_norm_squared_avg - g_avg**2 * (n_dim + 1)
        target_dist = (0.999 * target_dist +
                       0.001 * g_norm_avg / g_norm_squared_avg)

        if i > 0:
          lr, mu = self.tuneEverything(target_dist**2,
                                       target_var,
                                       1,
                                       target_h_min,
                                       target_h_max)
          target_lr = 0.999 * target_lr + 0.001 * lr
          target_mu = 0.999 * target_mu + 0.001 * mu

        assert np.abs(target_h_max - res[1]) < np.abs(target_h_max) * 1e-3
        assert np.abs(target_h_min - res[2]) < np.abs(target_h_min) * 1e-3
        assert np.abs(target_var - res[3]) < np.abs(res[3]) * 1e-3
        assert np.abs(target_dist - res[4]) < np.abs(res[4]) * 1e-3
        assert (target_lr == 0.0 or
                (np.abs(target_lr - res[5]) < np.abs(res[5]) * 1e-3))
        assert (target_mu == 0.0 or
                (np.abs(target_mu - res[6]) < np.abs(res[6]) * 5e-3))