def update(i, g, state): x, g_sq, m = state g_sq += np.square(g) g_sq_inv_sqrt = np.where(g_sq > 0, 1. / np.sqrt(g_sq), 0.0) m = (1. - momentum) * (g * g_sq_inv_sqrt) + momentum * m x = x - step_size(i) * m return x, g_sq, m
def test(self): tf_var = tf.Variable(2.0) value = np.square(tf_var) self.assertIsInstance(value, np.ndarray) self.assertAllClose(4.0, value) with tf.control_dependencies([tf_var.assign_add(value)]): tf_var_value = tf_var.read_value() self.assertAllClose(6.0, tf_var_value)
def update(i, g, state): x, m, v = state m = (1 - b1) * g + b1 * m # First moment estimate. v = (1 - b2) * np.square(g) + b2 * v # Second moment estimate. mhat = m / (1 - b1 ** (i + 1)) # Bias correction. vhat = v / (1 - b2 ** (i + 1)) x = x - step_size(i) * mhat / (np.sqrt(vhat) + eps) return x, m, v
def update(i, g, state): x, m, vs = state vs = [broadcast_into(g.ndim, v, i) for i, v in enumerate(vs)] accum = functools.reduce(np.minimum, vs) + np.square(g) accum_inv_sqrt = np.where(accum > 0, 1. / np.sqrt(accum), 0) m = (1. - momentum) * (g * accum_inv_sqrt) + momentum * m x = x - step_size(i) * m vs = [accum.max(splice(range(x.ndim), j, [])) for j in range(x.ndim)] return x, m, vs
def testTensorTFNPOp(self): t = tf.constant(10.) sq = np.square(t) self.assertIsInstance(sq, np.ndarray) self.assertEqual(100., sq)
def update(i, g, state): x, avg_sq_grad, mom = state avg_sq_grad = avg_sq_grad * gamma + np.square(g) * (1. - gamma) mom = momentum * mom + step_size(i) * g / np.sqrt(avg_sq_grad + eps) x = x - mom return x, avg_sq_grad, mom
def update(i, g, state): x, avg_sq_grad = state avg_sq_grad = avg_sq_grad * gamma + np.square(g) * (1. - gamma) x = x - step_size(i) * g / np.sqrt(avg_sq_grad + eps) return x, avg_sq_grad