Beispiel #1
0
  def test_step(self):
    """Tests grafting of Adam and SGD steps.

    Derivation of one step of Adam and SGD:
    Gradient value is [2,4].
    Adam Derivation:
    Lr_1 = 0.5(1-0.6)^(0.5)/(1-0.5) = 0.63245553203 - Does not matter
    m_1 = 0.5*G = [1,2]
    v_1 = 0.4*G^2 = [1.6,6.4]
    AdamStep = Lr_1*m_1/(sqrt{v_1}+eps) = [0.5, 0.5]
    Normalized AdamStep = [1.0, 1.0]
    SGDStep = [0.6, 1.2] Norm = [0.6, 1.2]
    TotalStep = 0.9*[0.6, 1.2]
    NewVar = [1.46, 1.92]
    """
    opt1 = tf.train.GradientDescentOptimizer(0.3)
    opt2 = tf.train.AdamOptimizer(0.5, beta1=0.5, beta2=0.6)
    opt = adagraft.AdaGraftOptimizer(0.9, opt1, opt2)
    with self.cached_session() as sess:
      var0 = tf.Variable(2.0, name="var0")
      var1 = tf.Variable(3.0, name="var1")
      loss = (var0 - 1) * (var0 - 1) + (var1 - 1) * (var1 - 1)
      o = opt.minimize(loss)
      self.evaluate(tf.global_variables_initializer())

      correct_values = [[1.058, 1.46, 1.92], [0.22387284, 1.2116001, 1.4232]]

      for i in range(2):
        sess.run(o)
        step_values = sess.run([loss, var0, var1])
        print(step_values)
        self.assertAllClose(correct_values[i], step_values)
Beispiel #2
0
  def GetOptimizer(self, lr):
    params = self.params

    if params.direction_optimizer_lr is None:
      dir_lr = lr
    else:
      dir_lr = params.direction_optimizer_lr

    magnitude_tf_optimizer = params.magnitude_optimizer.GetOptimizer(lr=lr)
    direction_tf_optimizer = params.direction_optimizer.GetOptimizer(lr=dir_lr)

    return adagraft.AdaGraftOptimizer(
        1.0,
        magnitude_tf_optimizer,
        direction_tf_optimizer,
        use_global_norm=params.use_global_norm,
        diagnostic=params.diagnostic)
Beispiel #3
0
 def test_identity(self):
   # AdaGraft(1, opt, opt) should do the same thing as opt.
   opt1 = tf.train.AdamOptimizer(0.5, beta1=0.5, beta2=0.5)
   opt2 = tf.train.AdamOptimizer(0.5, beta1=0.5, beta2=0.5)
   opt3 = tf.train.AdamOptimizer(0.5, beta1=0.5, beta2=0.5)
   opt = adagraft.AdaGraftOptimizer(1.0, opt1, opt2)
   with self.cached_session() as sess:
     var0 = tf.Variable(2.0, name="var0")
     var1 = tf.Variable(3.0, name="var1")
     loss = (var0 - 1) * (var0 - 1) + (var1 - 1) * (var1 - 1)
     o = opt.minimize(loss)
     oo = opt3.minimize(loss)
     self.evaluate(tf.global_variables_initializer())
     sess.run(o)
     l1 = sess.run([loss, var0, var1])
     print(l1)
     sess.run([tf.assign(var0, 2.0), tf.assign(var1, 3.0)])
     sess.run(oo)
     l2 = sess.run([loss, var0, var1])
     print(l2)
     self.assertAllClose(l1, l2)