예제 #1
0
  def test_autolambda(self, model_fn):
    model = model_fn()
    model.compile(
        adam.Adam(0.001),
        'mse',
        run_eagerly=testing_utils.should_run_eagerly())

    np_inputs = tf.nest.map_structure(
        lambda x: np.ones((2,) + tuple(x.shape[1:]), 'float32'), model.inputs)
    np_outputs = tf.nest.map_structure(
        lambda x: np.ones((2,) + tuple(x.shape[1:]), 'float32'), model.outputs)
    model.fit(np_inputs, np_outputs, batch_size=2)
    model(np_inputs)  # Test calling the model directly on inputs.

    new_model = keras.Model.from_config(
        model.get_config(),
        custom_objects={
            'LayerWithLayer': LayerWithLayer,
            'MyAdd': MyAdd
        })
    new_model.compile(
        adam.Adam(0.001),
        'mse',
        run_eagerly=testing_utils.should_run_eagerly())
    new_model.fit(np_inputs, np_outputs, batch_size=2)
    new_model(np_inputs)  # Test calling the new model directly on inputs.
    # Assert that metrics are preserved and in the right order.
    self.assertAllEqual(model.metrics_names, new_model.metrics_names)
    # Assert that layer names don't change.
    self.assertAllEqual([layer.name for layer in model.layers],
                        [layer.name for layer in new_model.layers])
예제 #2
0
 def testSparseRepeatedIndices(self):
   # TODO(tanzheny, omalleyt): Fix test in eager mode.
   for dtype in [tf.half, tf.float32, tf.float64]:
     with tf.Graph().as_default(), self.cached_session():
       repeated_index_update_var = tf.Variable(
           [[1.0], [2.0]], dtype=dtype)
       aggregated_update_var = tf.Variable(
           [[1.0], [2.0]], dtype=dtype)
       grad_repeated_index = tf.IndexedSlices(
           tf.constant(
               [0.1, 0.1], shape=[2, 1], dtype=dtype),
           tf.constant([1, 1]),
           tf.constant([2, 1]))
       grad_aggregated = tf.IndexedSlices(
           tf.constant(
               [0.2], shape=[1, 1], dtype=dtype),
           tf.constant([1]),
           tf.constant([2, 1]))
       repeated_update = adam.Adam().apply_gradients(
           [(grad_repeated_index, repeated_index_update_var)])
       aggregated_update = adam.Adam().apply_gradients(
           [(grad_aggregated, aggregated_update_var)])
       self.evaluate(tf.compat.v1.global_variables_initializer())
       self.assertAllClose(aggregated_update_var,
                           self.evaluate(repeated_index_update_var))
       for _ in range(3):
         repeated_update.run()
         aggregated_update.run()
         self.assertAllClose(aggregated_update_var,
                             self.evaluate(repeated_index_update_var))
예제 #3
0
  def test_trackable_save_restore(self):
    with self.test_session():
      def _templated():
        v = tf.compat.v1.get_variable(
            "v", shape=[1], initializer=tf.compat.v1.zeros_initializer(),
            use_resource=True)
        v2 = tf.compat.v1.get_variable(
            "v2", shape=[1], initializer=tf.compat.v1.zeros_initializer(),
            use_resource=True)
        manual = _ManualScope()
        return v, v + 1., v2, manual, manual()

      save_template = tf.compat.v1.make_template("s1", _templated)
      v1_save, _, v2_save, manual_scope, manual_scope_v = save_template()
      six.assertCountEqual(
          self,
          [id(v1_save), id(v2_save), id(manual_scope),
           id(manual_scope_v), id(save_template)],
          map(id, trackable_utils.list_objects(save_template)))
      manual_dep, = manual_scope._checkpoint_dependencies
      self.assertEqual("in_manual_scope", manual_dep.name)
      self.assertIs(manual_scope_v, manual_dep.ref)
      optimizer = adam.Adam(0.0)
      save_root = tf.train.Checkpoint(
          my_template=save_template, optimizer=optimizer)
      optimizer.minimize(v1_save.read_value,
                         var_list=[v1_save])
      self.evaluate([v.initializer for v in save_template.variables])
      optimizer_variables = optimizer.variables() + list(
          optimizer._hyper.values())
      self.evaluate([v.initializer for v in optimizer_variables])
      self.evaluate(v1_save.assign([12.]))
      self.evaluate(v2_save.assign([14.]))
      checkpoint_directory = self.get_temp_dir()
      checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
      save_path = save_root.save(checkpoint_prefix)

      load_template = tf.compat.v1.make_template("s2", _templated)
      load_optimizer = adam.Adam(0.0)
      load_root = tf.train.Checkpoint(
          my_template=load_template, optimizer=load_optimizer)
      status = load_root.restore(save_path)
      var, var_plus_one, var2, _, _ = load_template()
      load_optimizer.minimize(var.read_value, var_list=[var])
      self.assertLen(load_template._checkpoint_dependencies, 3)
      self.assertEqual("v", load_template._checkpoint_dependencies[0].name)
      self.assertEqual("v2", load_template._checkpoint_dependencies[1].name)
      self.assertEqual("ManualScope",
                       load_template._checkpoint_dependencies[2].name)
      status.assert_consumed().run_restore_ops()
      self.assertAllEqual([12.], self.evaluate(var))
      self.assertAllEqual([13.], self.evaluate(var_plus_one))
      self.assertAllEqual([14.], self.evaluate(var2))
예제 #4
0
  def testConstructAdamWithLR(self):
    opt = adam.Adam(lr=1.0)
    opt_2 = adam.Adam(learning_rate=0.1, lr=1.0)
    opt_3 = adam.Adam(learning_rate=0.1)
    self.assertIsInstance(opt.lr, tf.Variable)
    self.assertIsInstance(opt_2.lr, tf.Variable)
    self.assertIsInstance(opt_3.lr, tf.Variable)

    self.evaluate(tf.compat.v1.global_variables_initializer())
    self.assertAllClose(self.evaluate(opt.lr), (1.0))
    self.assertAllClose(self.evaluate(opt_2.lr), (1.0))
    self.assertAllClose(self.evaluate(opt_3.lr), (0.1))
예제 #5
0
  def test_stack_preserves_correct_shape(self):
    ## Test stack([x])
    inp = keras.Input(shape=(), dtype='float32')

    out = tf.stack([inp])
    model = keras.Model(
        inputs=inp,
        outputs=out)
    model.compile(
        adam.Adam(0.001),
        'mse',
        run_eagerly=testing_utils.should_run_eagerly())

    x = tf.ones(shape=(4, 4))
    expected = tf.stack([x])
    self.assertAllEqual(expected.shape, (1, 4, 4))

    self.assertAllEqual(model(x).shape, (1, 4, 4))
    self.assertAllEqual(model(x), expected)

    config = model.get_config()
    model = keras.Model.from_config(config)

    self.assertAllEqual(model(x).shape, (1, 4, 4))
    self.assertAllEqual(model(x), expected)

    ## Test stack(x)
    inp = keras.Input(shape=(), dtype='float32')

    out = tf.stack(inp)
    model = keras.Model(
        inputs=inp,
        outputs=out)
    model.compile(
        adam.Adam(0.001),
        'mse',
        run_eagerly=testing_utils.should_run_eagerly())

    x = tf.ones(shape=(4, 4))
    expected = tf.stack(x)
    self.assertAllEqual(expected.shape, (4, 4))

    self.assertAllEqual(model(x).shape, (4, 4))
    self.assertAllEqual(model(x), expected)

    config = model.get_config()
    model = keras.Model.from_config(config)

    self.assertAllEqual(model(x).shape, (4, 4))
    self.assertAllEqual(model(x), expected)
예제 #6
0
  def testSparseWithAmsgrad(self):
    # dtypes.half does not work on gpu + eager.
    for dtype in [tf.float32, tf.float64]:
      with self.cached_session():
        m0 = np.array([[0.0], [0.0]])
        v0 = np.array([[0.0], [0.0]])
        v0hat = np.array([[0.0], [0.0]])
        indices_np = np.array([1])
        indices = tf.constant(indices_np, dtype=tf.int32)
        var0_np = np.array([[1.0], [2.0]], dtype=dtype.as_numpy_dtype)
        repeated_index_update_var = tf.Variable(var0_np, dtype=dtype)
        aggregated_update_var = tf.Variable(var0_np, dtype=dtype)
        grads0_np = np.array([[0.2]], dtype=dtype.as_numpy_dtype)
        grad_repeated_index = tf.IndexedSlices(
            tf.constant([0.1, 0.1], shape=[2, 1], dtype=dtype),
            tf.constant([1, 1]), tf.constant([2, 1]))
        grad_aggregated = tf.IndexedSlices(grads0_np, indices,
                                            tf.constant([2, 1]))
        opt_repeated = adam.Adam(amsgrad=True)
        opt_aggregated = adam.Adam(amsgrad=True)
        if not tf.executing_eagerly():
          repeated_update = opt_repeated.apply_gradients(
              [(grad_repeated_index, repeated_index_update_var)])
          aggregated_update = opt_aggregated.apply_gradients(
              [(grad_aggregated, aggregated_update_var)])
        self.evaluate(tf.compat.v1.global_variables_initializer())
        self.assertAllClose(
            self.evaluate(aggregated_update_var),
            self.evaluate(repeated_index_update_var))
        for t in range(3):
          if not tf.executing_eagerly():
            self.evaluate(repeated_update)
            self.evaluate(aggregated_update)
          else:
            opt_repeated.apply_gradients(
                [(grad_repeated_index, repeated_index_update_var)])
            opt_aggregated.apply_gradients(
                [(grad_aggregated, aggregated_update_var)])

          var0_np, m0, v0, v0hat = adam_sparse_update_numpy_amsgrad(
              var0_np, indices_np, grads0_np, t, m0, v0, v0hat)

          # Validate updated params
          self.assertAllCloseAccordingToType(
              var0_np, self.evaluate(aggregated_update_var))
          self.assertAllCloseAccordingToType(
              self.evaluate(aggregated_update_var),
              self.evaluate(repeated_index_update_var))
예제 #7
0
  def testAnonymousVarsInInit(self):

    class Model(training.Model):

      def __init__(self):
        super(Model, self).__init__()
        self.w = resource_variable_ops.ResourceVariable(0.0)
        self.b = resource_variable_ops.ResourceVariable(0.0)
        self.vars = [self.w, self.b]

      def call(self, x):
        return x * self.w + self.b

    model = Model()
    optimizer = adam.Adam(learning_rate=0.05)
    checkpoint_directory = self.get_temp_dir()
    checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
    checkpoint = tf.train.Checkpoint(
        model=model, optimizer=optimizer)
    for _ in range(2):
      checkpoint.save(checkpoint_prefix)
      with tf.GradientTape() as tape:
        loss = (tf.constant(1.)
                - model(tf.constant(1.))) ** 2
      grad = tape.gradient(loss, model.vars)
      optimizer.apply_gradients(
          [(g, v) for g, v in zip(grad, model.vars)])
예제 #8
0
 def testAgnosticUsage(self):
   """Graph/eager agnostic usage."""
   # Does create garbage when executing eagerly due to ops.Graph() creation.
   with self.test_session():
     num_training_steps = 10
     checkpoint_directory = self.get_temp_dir()
     optimizer = adam.Adam(0.001)
     def _train_fn(model, input_value):
       with tf.GradientTape() as tape:
         loss = model(input_value)
       variables = model.trainable_variables
       gradients = tape.gradient(loss, variables)
       return optimizer.apply_gradients(zip(gradients, variables))
     for training_continuation in range(3):
       with testing_utils.device(should_use_gpu=True):
         model = MyModel()
         root = tf.train.Checkpoint(
             optimizer=optimizer, model=model)
         manager = tf.train.CheckpointManager(
             root, checkpoint_directory, max_to_keep=1)
         status = root.restore(save_path=manager.latest_checkpoint)
         input_value = tf.constant([[3.]])
         train_fn = functools.partial(_train_fn, model, input_value)
         if not tf.executing_eagerly():
           train_fn = functools.partial(self.evaluate, train_fn())
         status.initialize_or_restore()
         for _ in range(num_training_steps):
           train_fn()
         manager.save()
         self.assertEqual((training_continuation + 1) * num_training_steps,
                          self.evaluate(root.optimizer.iterations))
         self.assertEqual(training_continuation + 1,
                          self.evaluate(root.save_counter))
예제 #9
0
  def test_getitem_slice_real_tensor(self):
    if not tf.executing_eagerly():
      self.skipTest('Complex slicing like this fails in v1')
    x = tf.range(10.0)
    slice_stop = keras.Input(shape=(), dtype='int32')

    out = x[:slice_stop[0]]
    model = keras.Model(
        inputs=slice_stop,
        outputs=out)
    model.compile(
        adam.Adam(0.001),
        'mse',
        run_eagerly=testing_utils.should_run_eagerly())
    batch_size = 7
    stop = 6
    args = tf.constant(stop, shape=(batch_size,))
    expected = x[:stop]

    if tf.compat.v1.executing_eagerly_outside_functions():
      self.assertIn('tf.__operators__.getitem', (
          x.name for x in model.layers))
      # TODO(b/161925288): Fix the dispatch triggering then uncomment:
      # self.assertNotIn('tf.strided_slice', (
      #     x.name for x in model.layers))
    self.assertAllEqual(model(args), expected)
    self.assertAllEqual(model.predict(args, batch_size=batch_size), expected)

    config = model.get_config()
    model = keras.Model.from_config(config)

    self.assertAllEqual(model(args), expected)
    self.assertAllEqual(model.predict(args, batch_size=batch_size), expected)
    def test_validate_callbacks_predefined_callbacks(self):
        supported_predefined_callbacks = [
            callbacks.TensorBoard(),
            callbacks.CSVLogger(filename='./log.csv'),
            callbacks.EarlyStopping(),
            callbacks.ModelCheckpoint(filepath='./checkpoint'),
            callbacks.TerminateOnNaN(),
            callbacks.ProgbarLogger(),
            callbacks.History(),
            callbacks.RemoteMonitor()
        ]

        distributed_training_utils_v1.validate_callbacks(
            supported_predefined_callbacks, adam.Adam())

        unsupported_predefined_callbacks = [
            callbacks.ReduceLROnPlateau(),
            callbacks.LearningRateScheduler(schedule=lambda epoch: 0.001)
        ]

        for callback in unsupported_predefined_callbacks:
            with self.assertRaisesRegex(
                    ValueError, 'You must specify a Keras Optimizer V2'):
                distributed_training_utils_v1.validate_callbacks(
                    [callback], tf.compat.v1.train.AdamOptimizer())
예제 #11
0
  def testOptimizerWithCallableVarList(self):
    train_samples = 20
    input_dim = 1
    num_classes = 2
    (x, y), _ = testing_utils.get_test_data(
        train_samples=train_samples,
        test_samples=10,
        input_shape=(input_dim,),
        num_classes=num_classes)
    y = np_utils.to_categorical(y)

    num_hidden = 1
    model = testing_utils.get_small_sequential_mlp(
        num_hidden=num_hidden, num_classes=num_classes)
    opt = adam.Adam()

    loss = lambda: losses.mean_squared_error(model(x), y)
    var_list = lambda: model.trainable_weights

    with self.assertRaisesRegex(
        ValueError, 'Weights for model .* have not yet been created'):
      var_list()
    train_op = opt.minimize(loss, var_list)
    if not tf.executing_eagerly():
      self.evaluate(tf.compat.v1.global_variables_initializer())
      self.assertEqual(
          [[0.]], self.evaluate(opt.get_slot(var_list()[0], 'm')))
      self.evaluate(train_op)
    self.assertNotEqual(
        [[0.]], self.evaluate(opt.get_slot(var_list()[0], 'm')))
    self.assertLen(var_list(), 4)
예제 #12
0
  def test_getitem_slice_with_stop_only(self):
    if not tf.executing_eagerly():
      self.skipTest('Complex slicing like this fails in v1')
    inp = keras.Input(shape=(8,))
    slice_stop = keras.Input(shape=(), dtype='int32')

    out = inp[:slice_stop[0]]
    model = keras.Model(
        inputs=[inp, slice_stop],
        outputs=out)
    model.compile(
        adam.Adam(0.001),
        'mse',
        run_eagerly=testing_utils.should_run_eagerly())
    batch_size = 7
    stop = 6
    x = tf.stack([
        tf.range(8) for _ in range(batch_size)])
    args = [x, tf.constant(stop, shape=(batch_size,))]
    expected = x[:stop]

    if keras_tensor.keras_tensors_enabled():
      self.assertIn('tf.__operators__.getitem', (
          x.name for x in model.layers))
      self.assertNotIn('tf.strided_slice', (
          x.name for x in model.layers))
    self.assertAllEqual(model(args), expected)
    self.assertAllEqual(model.predict(args, batch_size=batch_size), expected)

    # Make sure it can be successfully saved and loaded
    config = model.get_config()
    model = keras.Model.from_config(config)

    self.assertAllEqual(model(args), expected)
    self.assertAllEqual(model.predict(args, batch_size=batch_size), expected)
예제 #13
0
  def test_getitem_index_real_tensor(self):
    if not tf.executing_eagerly():
      self.skipTest('Complex slicing like this fails in v1')
    x = tf.range(10.0)
    slice_stop = keras.Input(shape=(), dtype='int32')

    out = x[slice_stop[0]]
    model = keras.Model(
        inputs=slice_stop,
        outputs=out)
    model.compile(
        adam.Adam(0.001),
        'mse',
        run_eagerly=testing_utils.should_run_eagerly())
    batch_size = 7
    index = 6
    args = tf.constant(index, shape=(batch_size,))
    expected = x[index]

    if keras_tensor.keras_tensors_enabled():
      self.assertIn('tf.__operators__.getitem', (
          x.name for x in model.layers))
      # TODO(b/161925288): Fix the bug then uncomment:
      # self.assertNotIn('tf.strided_slice', (
      #     x.name for x in model.layers))
    self.assertAllEqual(model(args), expected)
    self.assertAllEqual(model.predict(args, batch_size=batch_size), expected)

    # Make sure it can be successfully saved and loaded
    config = model.get_config()
    model = keras.Model.from_config(config)

    self.assertAllEqual(model(args), expected)
    self.assertAllEqual(model.predict(args, batch_size=batch_size), expected)
예제 #14
0
 def testSlotsUniqueEager(self):
   v1 = tf.Variable(1.)
   v2 = tf.Variable(1.)
   opt = adam.Adam(1.)
   opt.minimize(lambda: v1 + v2, var_list=[v1, v2])
   # There should be iteration, and two unique slot variables for v1 and v2.
   self.assertLen(set(v.ref() for v in opt.variables()), 5)
   self.assertEqual(
       self.evaluate(opt.variables()[0]), self.evaluate(opt.iterations))
예제 #15
0
 def testSetWeightsFromV1AdamWithoutMinimize(self):
   keras_v1_adam = optimizer_v1.Adam()
   keras_v2_adam = adam.Adam()
   keras_v2_adam.set_weights(keras_v1_adam.get_weights())
   keras_v1_iteration = keras_v1_adam.iterations
   keras_v2_iteration = keras_v2_adam.iterations
   self.evaluate(tf.compat.v1.global_variables_initializer())
   self.assertEqual(
       self.evaluate(keras_v1_iteration), self.evaluate(keras_v2_iteration))
예제 #16
0
    def test_getitem_complex_slicing(self):
        if not tf.executing_eagerly():
            self.skipTest('Complex slicing like this fails in v1')
        inp = keras.Input(shape=(4, 3, 8))
        first_dim = keras.Input(shape=(), dtype='int32')
        slice_start = keras.Input(shape=(), dtype='int32')
        slice_stop = keras.Input(shape=(), dtype='int32')
        slice_stride = keras.Input(shape=(), dtype='int32')

        out = inp[..., first_dim[0],
                  slice_start[0]:slice_stop[0]:slice_stride[0]]
        model = keras.Model(
            inputs=[inp, first_dim, slice_start, slice_stop, slice_stride],
            outputs=out)
        model.compile(adam.Adam(0.001),
                      'mse',
                      run_eagerly=testing_utils.should_run_eagerly())
        batch_size = 7
        start = 1
        stop = 6
        step = 2
        x = tf.stack([
            tf.stack(
                [tf.stack([tf.range(8) for _ in range(3)]) for _ in range(4)])
            for _ in range(batch_size)
        ])
        args = [
            x,
            tf.constant(0, shape=(batch_size, )),
            tf.constant(start, shape=(batch_size, )),
            tf.constant(stop, shape=(batch_size, )),
            tf.constant(step, shape=(batch_size, ))
        ]
        # Slice the innermost dim. only grab one index from the second-to-innermost
        # dim, removing that dim from the shape.
        expected = tf.stack([
            tf.stack([tf.range(8)[start:stop:step] for _ in range(4)])
            for _ in range(batch_size)
        ])

        if keras_tensor.keras_tensors_enabled():
            self.assertIn('tf.__operators__.getitem',
                          (x.name for x in model.layers))
            self.assertNotIn('tf.strided_slice',
                             (x.name for x in model.layers))
        self.assertAllEqual(model(args), expected)
        self.assertAllEqual(model.predict(args, batch_size=batch_size),
                            expected)

        # Make sure it can be successfully saved and loaded
        config = model.get_config()
        model = keras.Model.from_config(config)

        self.assertAllEqual(model(args), expected)
        self.assertAllEqual(model.predict(args, batch_size=batch_size),
                            expected)
예제 #17
0
  def testBasicWithConstantDecay(self):
    var = tf.Variable([1.0, 2.0], dtype=tf.float32)
    loss = lambda: 3 * var
    opt = adam.Adam(learning_rate=1.0)

    @tf.function
    def fn():
      opt.minimize(loss, [var])
      return var

    self.assertAllClose([0., 1.], fn(), atol=1e-4)
    self.assertAllClose([-1, 0.], fn(), atol=1e-4)
예제 #18
0
 def testOptimizerSetIterations(self):
   global_step = tf.compat.v1.train.get_or_create_global_step()
   opt = adam.Adam(learning_rate=1.0)
   opt.iterations = global_step
   var = tf.Variable([1.0, 2.0], dtype=tf.float32)
   self.evaluate(tf.compat.v1.global_variables_initializer())
   init_step_value = self.evaluate(global_step)
   loss = lambda: 3 * var
   opt_op = opt.minimize(loss, [var])
   self.evaluate(tf.compat.v1.global_variables_initializer())
   self.evaluate(opt_op)
   new_step_value = self.evaluate(global_step)
   self.assertEqual(new_step_value, init_step_value + 1)
예제 #19
0
    def testKerasOptimizerWithUnequalInput(self, distribution):
        with distribution.scope():
            var = tf.Variable(2.0,
                              name='var',
                              aggregation=tf.compat.v1.VariableAggregation.SUM)
            optimizer = adam.Adam(learning_rate=0.01, beta_1=0.2, beta_2=0.2)
            all_vars = []

            def model_fn():
                def loss_fn():
                    replica_id = _replica_id()
                    return tf.cast(replica_id + 1,
                                   dtype=tf.float32) * 0.5 * var

                train_op = optimizer.minimize(loss_fn, var_list=[var])

                return train_op, optimizer

            def train_fn():
                train_op, optimizer = distribution.extended.call_for_each_replica(
                    model_fn)
                if not all_vars:
                    all_vars.append(var)
                    all_vars.append(optimizer.get_slot(var, 'm'))
                    all_vars.append(optimizer.get_slot(var, 'v'))
                return distribution.group(train_op)

            if not tf.executing_eagerly():
                with self.cached_session() as sess:
                    train_fn = sess.make_callable(train_fn())
            self.evaluate(tf.compat.v1.global_variables_initializer())

            # first step.
            train_fn()
            # var(1) = var(0) - lr * m(1) * sqrt(1 - beta2) / sqrt(v(1)) / (1 - beta1)
            #        = 2.0 - 0.01 * 1.2 * sqrt(0.8) / sqrt(1.8) / 0.8
            self.assertAllClose(1.99, self.evaluate(all_vars[0]))
            # m(1) = beta1 * m(0) + (1-beta1) * grad = 0.2 * 0 + 0.8 * (1 + 2) / 2
            self.assertAllClose(1.2, self.evaluate(all_vars[1]))
            # v(1) = beta2 * v(0) + (1-beta2) * grad^2 = 0.2 * 0 + 0.8 * 2.25
            self.assertAllClose(1.8, self.evaluate(all_vars[2]))

            # second step.
            train_fn()
            # var(1) = var(0) - lr * 2 = 1.98
            self.assertAllClose(1.98, self.evaluate(all_vars[0]))
            # m(2) = beta1 * m(1) + (1-beta1) * grad = 0.2 * 1.2 + 0.8 * 1.5
            self.assertAllClose(1.44, self.evaluate(all_vars[1]))
            # v(2) = beta2 * v(1) + (1-beta2) * grad^2 = 0.2 * 1.8 + 0.8 * 2.25
            self.assertAllClose(2.16, self.evaluate(all_vars[2]))
예제 #20
0
 def testSparseDevicePlacement(self):
   # TODO(tanzheny, omalleyt): Fix test in eager mode.
   for index_dtype in [tf.int32, tf.int64]:
     with tf.Graph().as_default(), self.cached_session(
         force_gpu=tf.test.is_gpu_available()):
       # If a GPU is available, tests that all optimizer ops can be placed on
       # it (i.e. they have GPU kernels).
       var = tf.Variable([[1.0], [2.0]])
       indices = tf.constant([0, 1], dtype=index_dtype)
       g_sum = lambda: tf.reduce_sum(tf.gather(var, indices))  # pylint: disable=cell-var-from-loop
       optimizer = adam.Adam(3.0)
       minimize_op = optimizer.minimize(g_sum, var_list=[var])
       self.evaluate(tf.compat.v1.global_variables_initializer())
       minimize_op.run()
예제 #21
0
  def doTestBasic(self, use_callable_params=False):
    for i, dtype in enumerate([tf.half, tf.float32, tf.float64]):
      with self.cached_session():
        # Initialize variables for numpy implementation.
        m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0
        var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype)
        grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype)
        var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype)
        grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype)

        var0 = tf.Variable(var0_np, name="var0_%d" % i)
        var1 = tf.Variable(var1_np, name="var1_%d" % i)
        grads0 = tf.constant(grads0_np)
        grads1 = tf.constant(grads1_np)

        learning_rate = lambda: 0.001
        beta1 = lambda: 0.9
        beta2 = lambda: 0.999
        epsilon = lambda: 1e-8
        if not use_callable_params:
          learning_rate = learning_rate()
          beta1 = beta1()
          beta2 = beta2()
          epsilon = epsilon()

        opt = adam.Adam(learning_rate=learning_rate)
        if not tf.executing_eagerly():
          update = opt.apply_gradients(zip([grads0, grads1], [var0, var1]))

        self.evaluate(tf.compat.v1.global_variables_initializer())
        # Run 3 steps of Adam
        for t in range(3):
          beta_1_power, beta_2_power = get_beta_accumulators(opt, dtype)
          self.assertAllCloseAccordingToType(0.9**(t + 1),
                                             self.evaluate(beta_1_power))
          self.assertAllCloseAccordingToType(0.999**(t + 1),
                                             self.evaluate(beta_2_power))
          if not tf.executing_eagerly():
            self.evaluate(update)
          else:
            opt.apply_gradients(zip([grads0, grads1], [var0, var1]))

          var0_np, m0, v0 = adam_update_numpy(var0_np, grads0_np, t, m0, v0)
          var1_np, m1, v1 = adam_update_numpy(var1_np, grads1_np, t, m1, v1)

          # Validate updated params
          self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0))
          self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1))
예제 #22
0
        def state():
            with distribution.scope():
                v = tf.Variable(tf.random.normal([]))
            opt = adam.Adam(0.001)

            @tf.function
            def step():
                def f():
                    with tf.GradientTape() as tape:
                        loss = v + v
                    gradients = tape.gradient(loss, [v])
                    opt.apply_gradients(zip(gradients, [v]))

                distribution.run(f)

            return v, opt, step
예제 #23
0
 def testManySavesGraph(self):
     """Saves after the first should not modify the graph."""
     with context.graph_mode():
         graph = tf.Graph()
         with graph.as_default(), self.session(graph):
             checkpoint_directory = self.get_temp_dir()
             checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
             obj = tf.train.Checkpoint()
             obj.var = tf.Variable(0., name="v")
             obj.opt = adam.Adam(0.1)
             variables = [obj.var]
             gradients = [1.]
             obj.opt.apply_gradients(zip(gradients, variables))
             self.evaluate(trackable_utils.gather_initializers(obj))
             obj.save(checkpoint_prefix)
             graph.finalize()
             obj.save(checkpoint_prefix)
예제 #24
0
  def testBasicWithLearningRateInverseTimeDecay(self):
    # TODO(tanzheny, omalleyt): Fix test in eager mode.
    for i, dtype in enumerate([tf.half, tf.float32, tf.float64]):
      with tf.Graph().as_default(), self.cached_session():
        # Initialize variables for numpy implementation.
        m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0
        var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype)
        grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype)
        var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype)
        grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype)

        var0 = tf.Variable(var0_np, name="var0_%d" % i)
        var1 = tf.Variable(var1_np, name="var1_%d" % i)
        grads0 = tf.constant(grads0_np)
        grads1 = tf.constant(grads1_np)

        learning_rate = 0.001
        decay = 0.5
        lr_schedule = learning_rate_schedule.InverseTimeDecay(
            learning_rate, decay_steps=1.0, decay_rate=decay)
        beta_1 = 0.9
        beta_2 = 0.999
        epsilon = 1e-7

        opt = adam.Adam(
            learning_rate=lr_schedule,
            beta_1=beta_1,
            beta_2=beta_2,
            epsilon=epsilon)
        update = opt.apply_gradients(zip([grads0, grads1], [var0, var1]))

        self.evaluate(tf.compat.v1.global_variables_initializer())
        # Run 3 steps of Adam
        for t in range(3):
          self.evaluate(update)

          lr_np = learning_rate / (1 + decay * t)

          var0_np, m0, v0 = adam_update_numpy(
              var0_np, grads0_np, t, m0, v0, lr=lr_np)
          var1_np, m1, v1 = adam_update_numpy(
              var1_np, grads1_np, t, m1, v1, lr=lr_np)

          # Validate updated params
          self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0))
          self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1))
예제 #25
0
 def testVarName(self):
   with tf.compat.v1.get_default_graph().as_default():
     var = tf.Variable([1., 2.], name='var')
     loss = var + 1.
     opt = adam.Adam()
     opt.get_updates(loss, [var])
     opt_vars = opt.variables()
     self.assertLen(opt_vars, 3)
     self.assertEqual('Adam/iter:0', opt_vars[0].name)
     self.assertEqual('Adam/var/m:0', opt_vars[1].name)
     var_2 = tf.Variable([1., 2.], name='var_2')
     loss = var_2 + 1.
     with backend.name_scope('outter'):
       opt.get_updates(loss, [var_2])
     opt_vars = opt.variables()
     self.assertLen(opt_vars, 5)
     self.assertEqual('outter/Adam/var_2/m:0', opt_vars[3].name)
예제 #26
0
  def testSparse(self):
    # TODO(tanzheny, omalleyt): Fix test in eager mode.
    for dtype in [tf.half, tf.float32, tf.float64]:
      with tf.Graph().as_default(), self.cached_session():
        # Initialize variables for numpy implementation.
        m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0
        var0_np = np.array([1.0, 1.0, 2.0], dtype=dtype.as_numpy_dtype)
        grads0_np = np.array([0.1, 0.0, 0.1], dtype=dtype.as_numpy_dtype)
        var1_np = np.array([3.0, 3.0, 4.0], dtype=dtype.as_numpy_dtype)
        grads1_np = np.array([0.01, 0.0, 0.01], dtype=dtype.as_numpy_dtype)

        var0 = tf.Variable(var0_np)
        var1 = tf.Variable(var1_np)
        grads0_np_indices = np.array([0, 2], dtype=np.int32)
        grads0 = tf.IndexedSlices(
            tf.constant(grads0_np[grads0_np_indices]),
            tf.constant(grads0_np_indices), tf.constant([3]))
        grads1_np_indices = np.array([0, 2], dtype=np.int32)
        grads1 = tf.IndexedSlices(
            tf.constant(grads1_np[grads1_np_indices]),
            tf.constant(grads1_np_indices), tf.constant([3]))
        opt = adam.Adam()
        update = opt.apply_gradients(zip([grads0, grads1], [var0, var1]))
        self.evaluate(tf.compat.v1.global_variables_initializer())

        # Fetch params to validate initial values
        self.assertAllClose([1.0, 1.0, 2.0], self.evaluate(var0))
        self.assertAllClose([3.0, 3.0, 4.0], self.evaluate(var1))

        beta_1_power, beta_2_power = get_beta_accumulators(opt, dtype)
        # Run 3 steps of Adam
        for t in range(3):
          self.assertAllCloseAccordingToType(0.9**(t + 1),
                                             self.evaluate(beta_1_power))
          self.assertAllCloseAccordingToType(0.999**(t + 1),
                                             self.evaluate(beta_2_power))
          update.run()

          var0_np, m0, v0 = adam_update_numpy(var0_np, grads0_np, t, m0, v0)
          var1_np, m1, v1 = adam_update_numpy(var1_np, grads1_np, t, m1, v1)

          # Validate updated params
          self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0))
          self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1))
예제 #27
0
    def testUsageGraph(self):
        """Expected usage when graph building."""
        with context.graph_mode():
            num_training_steps = 10
            checkpoint_directory = self.get_temp_dir()
            checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
            for training_continuation in range(3):
                with tf.Graph().as_default():
                    model = MyModel()
                    optimizer = adam.Adam(0.001)
                    root = tf.compat.v1.train.Checkpoint(optimizer=optimizer,
                                                         model=model)
                    input_value = tf.constant([[3.]])
                    with tf.GradientTape() as tape:
                        loss = model(input_value)
                    variables = model.trainable_variables
                    gradients = tape.gradient(loss, variables)
                    train_op = optimizer.apply_gradients(
                        zip(gradients, variables))

                    checkpoint_path = tf.train.latest_checkpoint(
                        checkpoint_directory)
                    with self.session(
                            graph=tf.compat.v1.get_default_graph()) as session:
                        status = root.restore(save_path=checkpoint_path)
                        status.initialize_or_restore(session=session)
                        if checkpoint_path is None:
                            self.assertEqual(0, training_continuation)
                            with self.assertRaises(AssertionError):
                                status.assert_consumed()
                            with self.assertRaises(AssertionError):
                                status.assert_existing_objects_matched()
                        else:
                            status.assert_consumed()
                            status.assert_existing_objects_matched()
                        for _ in range(num_training_steps):
                            session.run(train_op)
                        root.save(file_prefix=checkpoint_prefix,
                                  session=session)
                        self.assertEqual(
                            (training_continuation + 1) * num_training_steps,
                            session.run(root.optimizer.iterations))
                        self.assertEqual(training_continuation + 1,
                                         session.run(root.save_counter))
예제 #28
0
    def testWithDefun(self):
        with self.test_session():
            num_training_steps = 2
            checkpoint_directory = self.get_temp_dir()
            checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
            for training_continuation in range(3):
                with testing_utils.device(should_use_gpu=True):
                    model = MyModel()
                    # Don't actually train so we can test variable values
                    optimizer = adam.Adam(0.)
                    root = tf.train.Checkpoint(optimizer=optimizer,
                                               model=model)
                    checkpoint_path = tf.train.latest_checkpoint(
                        checkpoint_directory)
                    status = root.restore(save_path=checkpoint_path)

                    def train_fn():
                        @tf.function
                        def _call_model(x):
                            return model(x)

                        with tf.GradientTape() as tape:
                            loss = _call_model(tf.constant([[3.]]))
                        gradients = tape.gradient(loss, model.variables)
                        return optimizer.apply_gradients(
                            zip(gradients, model.variables))

                    if not tf.executing_eagerly():
                        train_fn = functools.partial(self.evaluate, train_fn())
                    status.initialize_or_restore()
                    for _ in range(num_training_steps):
                        train_fn()
                    if training_continuation > 0:
                        status.assert_consumed()
                        self.assertAllClose([[42.]],
                                            self.evaluate(model.variables[0]))
                    else:
                        self.evaluate(model.variables[0].assign([[42.]]))
                    root.save(file_prefix=checkpoint_prefix)
                    self.assertEqual(
                        (training_continuation + 1) * num_training_steps,
                        self.evaluate(optimizer.iterations))
                    self.assertEqual(training_continuation + 1,
                                     self.evaluate(root.save_counter))
예제 #29
0
  def testWeights(self):
    with testing_utils.use_gpu():
      opt1 = adam.Adam(learning_rate=1.0)
      var1 = tf.Variable([1.0, 2.0], dtype=tf.float32)
      loss1 = lambda: 3 * var1
      opt_op_1 = opt1.minimize(loss1, [var1])
      self.evaluate(tf.compat.v1.global_variables_initializer())
      config = opt1.get_config()
      opt2 = adam.Adam.from_config(config)
      var2 = tf.Variable([1.0, 2.0], dtype=tf.float32)
      loss2 = lambda: 3 * var2
      opt_op_2 = opt2.minimize(loss2, [var2])
      weights = opt1.get_weights()

      # Assert set_weights and both variables get updated to same value.
      self.evaluate(tf.compat.v1.global_variables_initializer())
      opt2.set_weights(weights)
      self.evaluate([opt_op_1, opt_op_2])
      self.assertAllClose(self.evaluate(var1), self.evaluate(var2))
      self.assertEqual(1, self.evaluate(opt1.iterations))
      self.assertEqual(1, self.evaluate(opt2.iterations))

      var3 = tf.Variable([1.0, 2.0, 3.0], dtype=tf.float32)
      var4 = tf.Variable([4.0, 5.0, 6.0], dtype=tf.float32)
      loss3 = lambda: 3 * var3 + 5 * var4
      opt_op_3 = opt1.minimize(loss3, [var3, var4])

      # Assert set_weights with ValueError since weight list does not match.
      self.evaluate(tf.compat.v1.global_variables_initializer())
      weights = opt1.get_weights()
      with self.assertRaisesRegex(ValueError, 'but the optimizer was'):
        opt2.set_weights(weights)

      # Assert set_weights and variables get updated to same value.
      var5 = tf.Variable([1.0, 2.0, 3.0], dtype=tf.float32)
      var6 = tf.Variable([4.0, 5.0, 6.0], dtype=tf.float32)
      loss4 = lambda: 3 * var5 + 5 * var6
      opt_op_4 = opt2.minimize(loss4, [var5, var6])
      self.evaluate(tf.compat.v1.global_variables_initializer())
      opt2.set_weights(weights)
      self.evaluate([opt_op_3, opt_op_4])
      self.assertAllClose(
          self.evaluate([var3, var4]), self.evaluate([var5, var6]))
예제 #30
0
  def testNumericEquivalenceForAmsgrad(self):
    if tf.executing_eagerly():
      self.skipTest(
          'v1 optimizer does not run in eager mode')
    np.random.seed(1331)
    with testing_utils.use_gpu():
      train_samples = 20
      input_dim = 3
      num_classes = 2
      (x, y), _ = testing_utils.get_test_data(
          train_samples=train_samples,
          test_samples=10,
          input_shape=(input_dim,),
          num_classes=num_classes)
      y = np_utils.to_categorical(y)

      num_hidden = 5
      model_k_v1 = testing_utils.get_small_sequential_mlp(
          num_hidden=num_hidden, num_classes=num_classes, input_dim=input_dim)
      model_k_v2 = testing_utils.get_small_sequential_mlp(
          num_hidden=num_hidden, num_classes=num_classes, input_dim=input_dim)
      model_k_v2.set_weights(model_k_v1.get_weights())

      opt_k_v1 = optimizer_v1.Adam(amsgrad=True)
      opt_k_v2 = adam.Adam(amsgrad=True)

      model_k_v1.compile(
          opt_k_v1,
          loss='categorical_crossentropy',
          metrics=[],
          run_eagerly=testing_utils.should_run_eagerly())
      model_k_v2.compile(
          opt_k_v2,
          loss='categorical_crossentropy',
          metrics=[],
          run_eagerly=testing_utils.should_run_eagerly())

      hist_k_v1 = model_k_v1.fit(x, y, batch_size=5, epochs=10, shuffle=False)
      hist_k_v2 = model_k_v2.fit(x, y, batch_size=5, epochs=10, shuffle=False)

      self.assertAllClose(model_k_v1.get_weights(), model_k_v2.get_weights())
      self.assertAllClose(opt_k_v1.get_weights(), opt_k_v2.get_weights())
      self.assertAllClose(hist_k_v1.history['loss'], hist_k_v2.history['loss'])