def update(i, g, state):
   x, g_sq, m = state
   g_sq += np.square(g)
   g_sq_inv_sqrt = np.where(g_sq > 0, 1. / np.sqrt(g_sq), 0.0)
   m = (1. - momentum) * (g * g_sq_inv_sqrt) + momentum * m
   x = x - step_size(i) * m
   return x, g_sq, m
 def update(i, g, state):
   x, m, v = state
   m = (1 - b1) * g + b1 * m  # First  moment estimate.
   v = (1 - b2) * np.square(g) + b2 * v  # Second moment estimate.
   mhat = m / (1 - b1 ** (i + 1))  # Bias correction.
   vhat = v / (1 - b2 ** (i + 1))
   x = x - step_size(i) * mhat / (np.sqrt(vhat) + eps)
   return x, m, v
 def update(i, g, state):
   x, m, vs = state
   vs = [broadcast_into(g.ndim, v, i) for i, v in enumerate(vs)]
   accum = functools.reduce(np.minimum, vs) + np.square(g)
   accum_inv_sqrt = np.where(accum > 0, 1. / np.sqrt(accum), 0)
   m = (1. - momentum) * (g * accum_inv_sqrt) + momentum * m
   x = x - step_size(i) * m
   vs = [accum.max(splice(range(x.ndim), j, [])) for j in range(x.ndim)]
   return x, m, vs
Exemple #4
0
def print_summary(name, labels, net_p, lin_p, loss):
    """Print summary information comparing a network with its linearization."""
    print('\nEvaluating Network on {} data.'.format(name))
    print('---------------------------------------')
    print('Network Accuracy = {}'.format(_accuracy(net_p, labels)))
    print('Network Loss = {}'.format(loss(net_p, labels)))
    if lin_p is not None:
        print('Linearization Accuracy = {}'.format(_accuracy(lin_p, labels)))
        print('Linearization Loss = {}'.format(loss(lin_p, labels)))
        print('RMSE of predictions: {}'.format(
            np.sqrt(np.mean((net_p - lin_p)**2))))
    print('---------------------------------------')
Exemple #5
0
 def build(self, input_shape):
     stddev = np.sqrt(self._units).astype(np.float32)
     initial_value = np.random.randn(
         input_shape[1], self._units).astype(np.float32) / stddev
     # Note that TF NumPy can interoperate with tf.Variable.
     self.w = tf.Variable(initial_value, trainable=True)
def l2_norm(tree):
  """Compute the l2 norm of a pytree of arrays. Useful for weight decay."""
  leaves, _ = tree_flatten(tree)
  return np.sqrt(sum(np.vdot(x, x) for x in leaves))
 def update(i, g, state):
   x, avg_sq_grad, mom = state
   avg_sq_grad = avg_sq_grad * gamma + np.square(g) * (1. - gamma)
   mom = momentum * mom + step_size(i) * g / np.sqrt(avg_sq_grad + eps)
   x = x - mom
   return x, avg_sq_grad, mom
 def update(i, g, state):
   x, avg_sq_grad = state
   avg_sq_grad = avg_sq_grad * gamma + np.square(g) * (1. - gamma)
   x = x - step_size(i) * g / np.sqrt(avg_sq_grad + eps)
   return x, avg_sq_grad