def testAverages(self):
        with self.cached_session() as session:
            scale = 2.
            grad = array_ops.ones([3, 4]) * scale
            log_norm = np.log(
                np.sqrt(scale**2 * grad.get_shape().num_elements()))
            grads_and_vars = [(grad, grad)]
            grads_and_vars = optimizers_lib.adaptive_clipping_fn(
                decay=0.5)(grads_and_vars)

            var_dict = {}
            for var in variables.global_variables():
                if var.name.startswith("AdaptiveMaxNorm"):
                    var_dict[var.name.split(":")[0]] = var
            self.assertEqual(2, len(var_dict))
            moving_mean = var_dict["AdaptiveMaxNorm/mean"]
            moving_sq_mean = var_dict["AdaptiveMaxNorm/sq_mean"]
            variables.global_variables_initializer().run()
            mean, sq_mean = session.run([moving_mean, moving_sq_mean])
            self.assertEqual([0], mean)
            self.assertEqual([0], sq_mean)
            for i in range(20):
                mean, sq_mean, _ = session.run(
                    [moving_mean, moving_sq_mean, grads_and_vars[0][0]])
                if i == 0:
                    self.assertLess(mean, 0.9 * log_norm)
                    self.assertLess(sq_mean, 0.9 * log_norm**2)

            self.assertAlmostEqual(float(mean), log_norm, places=4)
            self.assertAlmostEqual(float(sq_mean), log_norm**2, places=4)
Ejemplo n.º 2
0
  def testClip(self):
    with self.test_session() as session:
      spike = 1000.
      multiplier = array_ops.placeholder(dtypes.float32, [], "multiplier")
      step = array_ops.placeholder(dtypes.int32, [], "step")

      grad = array_ops.ones([3, 4]) * multiplier
      grads_and_vars = [(grad, grad)]
      grads_and_vars = optimizers_lib.adaptive_clipping_fn(
          decay=0.9, global_step=step)(grads_and_vars)

      variables.global_variables_initializer().run()

      def run(scale, i):
        return session.run(grads_and_vars[0][0],
                           feed_dict={multiplier: scale,
                                      step: i})

      for i in range(20):
        scale = [1., -2.][i % 2]
        clipped_grad = run(scale, i)
        if i > 3:
          self.assertAllClose(np.ones(clipped_grad.shape) * scale, clipped_grad)

      # assert that the spike will have low influence.
      clipped_grad = run(spike, 20)
      self.assertTrue((clipped_grad < 25.).all())

      # assert that a repeated spike will converge to this new value.
      for i in range(10):
        clipped_grad = run(spike, i + 21)

      self.assertAllClose(np.ones(clipped_grad.shape) * spike, clipped_grad)
Ejemplo n.º 3
0
  def testAverages(self):
    with self.test_session() as session:
      scale = 2.
      grad = array_ops.ones([3, 4]) * scale
      log_norm = np.log(np.sqrt(scale**2 * grad.get_shape().num_elements()))
      grads_and_vars = [(grad, grad)]
      grads_and_vars = optimizers_lib.adaptive_clipping_fn(
          decay=0.5)(grads_and_vars)

      var_dict = {}
      for var in variables.global_variables():
        if var.name.startswith("AdaptiveMaxNorm"):
          var_dict[var.name.split(":")[0]] = var
      self.assertEqual(2, len(var_dict))
      moving_mean = var_dict["AdaptiveMaxNorm/mean"]
      moving_sq_mean = var_dict["AdaptiveMaxNorm/sq_mean"]
      variables.global_variables_initializer().run()
      mean, sq_mean = session.run([moving_mean, moving_sq_mean])
      self.assertEqual([0], mean)
      self.assertEqual([0], sq_mean)
      for i in range(20):
        mean, sq_mean, _ = session.run(
            [moving_mean, moving_sq_mean, grads_and_vars[0][0]])
        if i == 0:
          self.assertLess(mean, 0.9 * log_norm)
          self.assertLess(sq_mean, 0.9 * log_norm**2)

      self.assertAlmostEqual(float(mean), log_norm, places=4)
      self.assertAlmostEqual(float(sq_mean), log_norm**2, places=4)
Ejemplo n.º 4
0
  def testClip(self):
    with self.test_session() as session:
      spike = 1000.
      multiplier = array_ops.placeholder(dtypes.float32, [], "multiplier")
      step = array_ops.placeholder(dtypes.int32, [], "step")

      grad = array_ops.ones([3, 4]) * multiplier
      grads_and_vars = [(grad, grad)]
      grads_and_vars = optimizers_lib.adaptive_clipping_fn(
          decay=0.9, global_step=step)(grads_and_vars)

      variables.global_variables_initializer().run()

      def run(scale, i):
        return session.run(grads_and_vars[0][0],
                           feed_dict={multiplier: scale,
                                      step: i})

      for i in range(20):
        scale = [1., -2.][i % 2]
        clipped_grad = run(scale, i)
        if i > 3:
          self.assertAllClose(np.ones(clipped_grad.shape) * scale, clipped_grad)

      # assert that the spike will have low influence.
      clipped_grad = run(spike, 20)
      self.assertTrue((clipped_grad < 25.).all())

      # assert that a repeated spike will converge to this new value.
      for i in range(10):
        clipped_grad = run(spike, i + 21)

      self.assertAllClose(np.ones(clipped_grad.shape) * spike, clipped_grad)
 def testAdaptiveGradientClip(self):
     with self.cached_session() as session:
         x, var, loss, global_step = _setup_model()
         clip_gradients = optimizers_lib.adaptive_clipping_fn()
         train = optimizers_lib.optimize_loss(loss,
                                              global_step,
                                              learning_rate=0.1,
                                              optimizer="SGD",
                                              clip_gradients=clip_gradients)
         variables.global_variables_initializer().run()
         session.run(train, feed_dict={x: 5})
         var_value, global_step_value = session.run([var, global_step])
         self.assertAlmostEqual(var_value, 9.8916, 4)
         self.assertEqual(global_step_value, 1)
         var_count = 0
         for var in variables.global_variables():
             if var.name.startswith("OptimizeLoss/AdaptiveMaxNorm"):
                 var_count += 1
         self.assertEqual(2, var_count)
Ejemplo n.º 6
0
 def testAdaptiveGradientClip(self):
   with self.test_session() as session:
     x, var, loss, global_step = _setup_model()
     clip_gradients = optimizers_lib.adaptive_clipping_fn()
     train = optimizers_lib.optimize_loss(
         loss,
         global_step,
         learning_rate=0.1,
         optimizer="SGD",
         clip_gradients=clip_gradients)
     variables.global_variables_initializer().run()
     session.run(train, feed_dict={x: 5})
     var_value, global_step_value = session.run([var, global_step])
     self.assertAlmostEqual(var_value, 9.8916, 4)
     self.assertEqual(global_step_value, 1)
     var_count = 0
     for var in variables.global_variables():
       if var.name.startswith("OptimizeLoss/AdaptiveMaxNorm"):
         var_count += 1
     self.assertEqual(2, var_count)
Ejemplo n.º 7
0
 def __init__(self, meta_lr, score_fn, **kwargs):
   super(MetaRLAgent, self).__init__(**kwargs)
   if score_fn == 'simple_linear':
     tf.logging.info('Using simple linear score function.')
     self.score_fn = nn_model.SimpleLinearNN()
   elif score_fn == 'linear':
     tf.logging.info('Using linear score function with priors.')
     self.score_fn = nn_model.LinearNN()
   else:
     raise NotImplementedError
   self._init_score_fn()
   self.score_optimizer = contrib_optimizer_v2.AdamOptimizer(
       learning_rate=meta_lr)
   self._meta_train = True
   # Adaptive gradient clipping
   self._score_grad_clipping = optimizers_lib.adaptive_clipping_fn(
       decay=0.9,
       report_summary=self.log_summaries,
       static_max_norm=self.max_grad_norm / 2.0,
       global_step=self.global_step)
Ejemplo n.º 8
0
def _make_train_op(loss, hparams):
    """Create train op."""
    def learning_rate_decay_fn(learning_rate, global_step):
        learning_rate = tf.train.exponential_decay(learning_rate, global_step,
                                                   hparams.lr_decay_steps,
                                                   hparams.lr_decay_rate)
        learning_rate = learning_rate * tf.minimum(
            tf.cast(global_step / hparams.lr_warmup_steps, tf.float32),
            tf.constant(1.))
        return learning_rate

    return contrib_layers.optimize_loss(
        loss=loss,
        global_step=tf.train.get_global_step(),
        clip_gradients=optimizers_lib.adaptive_clipping_fn(
            decay=hparams.gradient_clipping_decay,
            report_summary=True,
        ),
        learning_rate=hparams.learning_rate,
        learning_rate_decay_fn=learning_rate_decay_fn,
        optimizer='Adam')
Ejemplo n.º 9
0
def simple_model_fn(features, labels, mode, params):
    """Model function for LN model."""
    features['alphas'] = tf.reshape(features['alphas'],
                                    (params['batch_size'], 1))
    features['neuron_ids'] = tf.reshape(
        features['neuron_ids'], (params['batch_size'], params['window_size']))
    features['w'] = tf.reshape(features['w'],
                               (params['batch_size'], params['window_size']))
    features['global_features'] = tf.reshape(
        features['global_features'],
        (params['batch_size'], params['N_global_features']))
    features['X'] = tf.reshape(features['X'],
                               (params['batch_size'], params['window_size'] +
                                2 * params['window_padding']))

    outputs, normalizers = params['network_fn'](features, mode, params)

    if mode == learn.ModeKeys.TRAIN:
        summarize_layer('labels', labels)

    output = outputs['output']
    summarize_layer('output', output)

    if params['use_normalizer']:
        multipliers = tf.gather(normalizers, features['neuron_ids'])
        output_norm = multipliers * output
    else:
        output_norm = tf.identity(output)

    output_norm_pre = output_norm
    output_norm = output_norm * features['alphas']

    loss = None
    train_op = None

    zero_fraction = tf.identity(tf.reduce_mean(
        tf.nn.zero_fraction(output_norm), keep_dims=True),
                                name="zero_fraction_tracker")

    # Calculate Loss (for both TRAIN and EVAL modes)
    if mode != learn.ModeKeys.INFER:
        loss = tf.losses.mean_squared_error(labels=tf.reshape(labels,
                                                              shape=(-1, )),
                                            predictions=tf.reshape(
                                                output_norm, shape=(-1, )),
                                            weights=tf.reshape(features['w'],
                                                               shape=(-1, )))

    # Configure the Training Op (for TRAIN mode)
    if mode == learn.ModeKeys.TRAIN:
        global_step = tf.contrib.framework.get_global_step()
        learning_rate = tf.train.exponential_decay(params['alpha'],
                                                   global_step,
                                                   params['decay_every'],
                                                   params['decay_multiplier'],
                                                   staircase=True,
                                                   name='learning_rate')
        summary.scalar('learning_rate', learning_rate)
        summary.scalar('sum_weights_train', tf.reduce_sum(labels[:, 1]))
        train_op = tf.contrib.layers.optimize_loss(
            loss=loss,
            global_step=global_step,
            learning_rate=learning_rate,
            optimizer="Adam",
            clip_gradients=optimizers_lib.adaptive_clipping_fn())
    elif mode == learn.ModeKeys.EVAL:
        summary.scalar('sum_weights_eval', tf.reduce_sum(labels[:, 1]))

    # Generate Predictions
    predictions = {
        "relu_output":
        tf.identity(output_norm, name='relu_output'),
        "relu_coarse":
        tf.cast(tf.round(tf.reshape(output_norm, (-1, )) * 1e4),
                tf.int32,
                name='relu_coarse'),
    }

    if mode != learn.ModeKeys.INFER:
        eval_metric_ops = {
            "mse":
            tf.metrics.mean_squared_error(labels=tf.reshape(labels, (-1, )),
                                          predictions=tf.reshape(
                                              output_norm, (-1, )),
                                          weights=tf.reshape(features['w'],
                                                             shape=(-1, ))),
        }
    else:
        eval_metric_ops = None

    # Return a ModelFnOps object
    return model_fn_lib.ModelFnOps(mode=mode,
                                   predictions=predictions,
                                   loss=loss,
                                   train_op=train_op,
                                   eval_metric_ops=eval_metric_ops)