def test_autolambda(self, model_fn):
        model = model_fn()
        model.compile(adam.Adam(0.001),
                      'mse',
                      run_eagerly=test_utils.should_run_eagerly())

        np_inputs = tf.nest.map_structure(
            lambda x: np.ones((2, ) + tuple(x.shape[1:]), 'float32'),
            model.inputs)
        np_outputs = tf.nest.map_structure(
            lambda x: np.ones((2, ) + tuple(x.shape[1:]), 'float32'),
            model.outputs)
        model.fit(np_inputs, np_outputs, batch_size=2)
        model(np_inputs)  # Test calling the model directly on inputs.

        new_model = keras.Model.from_config(model.get_config(),
                                            custom_objects={
                                                'LayerWithLayer':
                                                LayerWithLayer,
                                                'MyAdd': MyAdd
                                            })
        new_model.compile(adam.Adam(0.001),
                          'mse',
                          run_eagerly=test_utils.should_run_eagerly())
        new_model.fit(np_inputs, np_outputs, batch_size=2)
        new_model(np_inputs)  # Test calling the new model directly on inputs.
        # Assert that metrics are preserved and in the right order.
        self.assertAllEqual(model.metrics_names, new_model.metrics_names)
        # Assert that layer names don't change.
        self.assertAllEqual([layer.name for layer in model.layers],
                            [layer.name for layer in new_model.layers])
Ejemplo n.º 2
0
 def testSparseRepeatedIndices(self):
   # TODO(tanzheny, omalleyt): Fix test in eager mode.
   for dtype in [tf.half, tf.float32, tf.float64]:
     with tf.Graph().as_default(), self.cached_session():
       repeated_index_update_var = tf.Variable(
           [[1.0], [2.0]], dtype=dtype)
       aggregated_update_var = tf.Variable(
           [[1.0], [2.0]], dtype=dtype)
       grad_repeated_index = tf.IndexedSlices(
           tf.constant(
               [0.1, 0.1], shape=[2, 1], dtype=dtype),
           tf.constant([1, 1]),
           tf.constant([2, 1]))
       grad_aggregated = tf.IndexedSlices(
           tf.constant(
               [0.2], shape=[1, 1], dtype=dtype),
           tf.constant([1]),
           tf.constant([2, 1]))
       repeated_update = adam.Adam().apply_gradients(
           [(grad_repeated_index, repeated_index_update_var)])
       aggregated_update = adam.Adam().apply_gradients(
           [(grad_aggregated, aggregated_update_var)])
       self.evaluate(tf.compat.v1.global_variables_initializer())
       self.assertAllClose(aggregated_update_var,
                           self.evaluate(repeated_index_update_var))
       for _ in range(3):
         repeated_update.run()
         aggregated_update.run()
         self.assertAllClose(aggregated_update_var,
                             self.evaluate(repeated_index_update_var))
Ejemplo n.º 3
0
    def test_trackable_save_restore(self):
        with self.test_session():

            def _templated():
                v = tf.compat.v1.get_variable(
                    "v",
                    shape=[1],
                    initializer=tf.compat.v1.zeros_initializer(),
                    use_resource=True)
                v2 = tf.compat.v1.get_variable(
                    "v2",
                    shape=[1],
                    initializer=tf.compat.v1.zeros_initializer(),
                    use_resource=True)
                manual = _ManualScope()
                return v, v + 1., v2, manual, manual()

            save_template = tf.compat.v1.make_template("s1", _templated)
            v1_save, _, v2_save, manual_scope, manual_scope_v = save_template()
            self.assertEqual(
                set([
                    id(v1_save),
                    id(v2_save),
                    id(manual_scope),
                    id(manual_scope_v),
                    id(save_template)
                ]), set(map(id, trackable_utils.list_objects(save_template))))
            self.assertDictEqual({"in_manual_scope": manual_scope_v},
                                 manual_scope._trackable_children())
            optimizer = adam.Adam(0.0)
            save_root = tf.train.Checkpoint(my_template=save_template,
                                            optimizer=optimizer)
            optimizer.minimize(v1_save.read_value, var_list=[v1_save])
            self.evaluate([v.initializer for v in save_template.variables])
            optimizer_variables = optimizer.variables() + list(
                optimizer._hyper.values())
            self.evaluate([v.initializer for v in optimizer_variables])
            self.evaluate(v1_save.assign([12.]))
            self.evaluate(v2_save.assign([14.]))
            checkpoint_directory = self.get_temp_dir()
            checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
            save_path = save_root.save(checkpoint_prefix)

            load_template = tf.compat.v1.make_template("s2", _templated)
            load_optimizer = adam.Adam(0.0)
            load_root = tf.train.Checkpoint(my_template=load_template,
                                            optimizer=load_optimizer)
            status = load_root.restore(save_path)
            var, var_plus_one, var2, _, _ = load_template()
            load_optimizer.minimize(var.read_value, var_list=[var])

            children = load_template._trackable_children()
            self.assertEqual({"v", "v2", "ManualScope"}, children.keys())
            status.assert_consumed().run_restore_ops()
            self.assertAllEqual([12.], self.evaluate(var))
            self.assertAllEqual([13.], self.evaluate(var_plus_one))
            self.assertAllEqual([14.], self.evaluate(var2))
Ejemplo n.º 4
0
  def testConstructAdamWithLR(self):
    opt = adam.Adam(lr=1.0)
    opt_2 = adam.Adam(learning_rate=0.1, lr=1.0)
    opt_3 = adam.Adam(learning_rate=0.1)
    self.assertIsInstance(opt.lr, tf.Variable)
    self.assertIsInstance(opt_2.lr, tf.Variable)
    self.assertIsInstance(opt_3.lr, tf.Variable)

    self.evaluate(tf.compat.v1.global_variables_initializer())
    self.assertAllClose(self.evaluate(opt.lr), (1.0))
    self.assertAllClose(self.evaluate(opt_2.lr), (1.0))
    self.assertAllClose(self.evaluate(opt_3.lr), (0.1))
Ejemplo n.º 5
0
  def testSparseWithAmsgrad(self):
    # dtypes.half does not work on gpu + eager.
    for dtype in [tf.float32, tf.float64]:
      with self.cached_session():
        m0 = np.array([[0.0], [0.0]])
        v0 = np.array([[0.0], [0.0]])
        v0hat = np.array([[0.0], [0.0]])
        indices_np = np.array([1])
        indices = tf.constant(indices_np, dtype=tf.int32)
        var0_np = np.array([[1.0], [2.0]], dtype=dtype.as_numpy_dtype)
        repeated_index_update_var = tf.Variable(var0_np, dtype=dtype)
        aggregated_update_var = tf.Variable(var0_np, dtype=dtype)
        grads0_np = np.array([[0.2]], dtype=dtype.as_numpy_dtype)
        grad_repeated_index = tf.IndexedSlices(
            tf.constant([0.1, 0.1], shape=[2, 1], dtype=dtype),
            tf.constant([1, 1]), tf.constant([2, 1]))
        grad_aggregated = tf.IndexedSlices(grads0_np, indices,
                                            tf.constant([2, 1]))
        opt_repeated = adam.Adam(amsgrad=True)
        opt_aggregated = adam.Adam(amsgrad=True)
        if not tf.executing_eagerly():
          repeated_update = opt_repeated.apply_gradients(
              [(grad_repeated_index, repeated_index_update_var)])
          aggregated_update = opt_aggregated.apply_gradients(
              [(grad_aggregated, aggregated_update_var)])
        self.evaluate(tf.compat.v1.global_variables_initializer())
        self.assertAllClose(
            self.evaluate(aggregated_update_var),
            self.evaluate(repeated_index_update_var))
        for t in range(3):
          if not tf.executing_eagerly():
            self.evaluate(repeated_update)
            self.evaluate(aggregated_update)
          else:
            opt_repeated.apply_gradients(
                [(grad_repeated_index, repeated_index_update_var)])
            opt_aggregated.apply_gradients(
                [(grad_aggregated, aggregated_update_var)])

          var0_np, m0, v0, v0hat = adam_sparse_update_numpy_amsgrad(
              var0_np, indices_np, grads0_np, t, m0, v0, v0hat)

          # Validate updated params
          self.assertAllCloseAccordingToType(
              var0_np, self.evaluate(aggregated_update_var))
          self.assertAllCloseAccordingToType(
              self.evaluate(aggregated_update_var),
              self.evaluate(repeated_index_update_var))
  def test_validate_callbacks_predefined_callbacks(self):
    supported_predefined_callbacks = [
        callbacks.TensorBoard(),
        callbacks.CSVLogger(filename='./log.csv'),
        callbacks.EarlyStopping(),
        callbacks.ModelCheckpoint(filepath='./checkpoint'),
        callbacks.TerminateOnNaN(),
        callbacks.ProgbarLogger(),
        callbacks.History(),
        callbacks.RemoteMonitor()
    ]

    distributed_training_utils_v1.validate_callbacks(
        supported_predefined_callbacks, adam.Adam())

    unsupported_predefined_callbacks = [
        callbacks.ReduceLROnPlateau(),
        callbacks.LearningRateScheduler(schedule=lambda epoch: 0.001)
    ]

    for callback in unsupported_predefined_callbacks:
      with self.assertRaisesRegex(ValueError,
                                  'You must specify a Keras Optimizer V2'):
        distributed_training_utils_v1.validate_callbacks(
            [callback], tf.compat.v1.train.AdamOptimizer())
Ejemplo n.º 7
0
 def testDeferredRestorationUsageEager(self):
     """An idiomatic eager execution example."""
     num_training_steps = 10
     checkpoint_directory = self.get_temp_dir()
     for training_continuation in range(3):
         with self.test_scope():
             model = Subclassed()
             optimizer = adam.Adam(0.001)
             root = tf.train.Checkpoint(optimizer=optimizer, model=model)
             manager = tf.train.CheckpointManager(root,
                                                  checkpoint_directory,
                                                  max_to_keep=2)
             root.restore(manager.latest_checkpoint)
             for _ in range(num_training_steps):
                 input_value = tf.constant([[3.0]])
                 with tf.GradientTape() as tape:
                     loss = model(input_value)
                 variables = model.trainable_variables
                 gradients = tape.gradient(loss, variables)
                 optimizer.apply_gradients(zip(gradients, variables))
             manager.save()
             self.assertEqual(
                 (training_continuation + 1) * num_training_steps,
                 root.optimizer.iterations.numpy(),
             )
    def test_getitem_slice_real_tensor(self):
        if not tf.executing_eagerly():
            self.skipTest("Complex slicing like this fails in v1")
        x = tf.range(10.0)
        slice_stop = keras.Input(shape=(), dtype="int32")

        out = x[:slice_stop[0]]
        model = keras.Model(inputs=slice_stop, outputs=out)
        model.compile(adam.Adam(0.001),
                      "mse",
                      run_eagerly=test_utils.should_run_eagerly())
        batch_size = 7
        stop = 6
        args = tf.constant(stop, shape=(batch_size, ))
        expected = x[:stop]

        if tf.compat.v1.executing_eagerly_outside_functions():
            self.assertIn("tf.__operators__.getitem",
                          (x.name for x in model.layers))
            # TODO(b/161925288): Fix the dispatch triggering then uncomment:
            # self.assertNotIn('tf.strided_slice', (
            #     x.name for x in model.layers))
        self.assertAllEqual(model(args), expected)
        self.assertAllEqual(model.predict(args, batch_size=batch_size),
                            expected)

        config = model.get_config()
        model = keras.Model.from_config(config)

        self.assertAllEqual(model(args), expected)
        self.assertAllEqual(model.predict(args, batch_size=batch_size),
                            expected)
    def test_getitem_index_real_tensor(self):
        if not tf.executing_eagerly():
            self.skipTest('Complex slicing like this fails in v1')
        x = tf.range(10.0)
        slice_stop = keras.Input(shape=(), dtype='int32')

        out = x[slice_stop[0]]
        model = keras.Model(inputs=slice_stop, outputs=out)
        model.compile(adam.Adam(0.001),
                      'mse',
                      run_eagerly=test_utils.should_run_eagerly())
        batch_size = 7
        index = 6
        args = tf.constant(index, shape=(batch_size, ))
        expected = x[index]

        if tf.compat.v1.executing_eagerly_outside_functions():
            self.assertIn('tf.__operators__.getitem',
                          (x.name for x in model.layers))
            # TODO(b/161925288): Fix the bug then uncomment:
            # self.assertNotIn('tf.strided_slice', (
            #     x.name for x in model.layers))
        self.assertAllEqual(model(args), expected)
        self.assertAllEqual(model.predict(args, batch_size=batch_size),
                            expected)

        # Make sure it can be successfully saved and loaded
        config = model.get_config()
        model = keras.Model.from_config(config)

        self.assertAllEqual(model(args), expected)
        self.assertAllEqual(model.predict(args, batch_size=batch_size),
                            expected)
Ejemplo n.º 10
0
    def test_getitem_slice_with_stop_and_ellipsis_only(self):
        if not tf.executing_eagerly():
            self.skipTest('Complex slicing like this fails in v1')
        inp = keras.Input(shape=(8, ))
        slice_stop = keras.Input(shape=(), dtype='int32')

        out = inp[..., :slice_stop[0]]
        model = keras.Model(inputs=[inp, slice_stop], outputs=out)
        model.compile(adam.Adam(0.001),
                      'mse',
                      run_eagerly=test_utils.should_run_eagerly())
        batch_size = 7
        stop = 6
        x = tf.stack([tf.range(8) for _ in range(batch_size)])
        args = [x, tf.constant(stop, shape=(batch_size, ))]
        expected = tf.stack([tf.range(8)[:stop] for _ in range(batch_size)])

        if tf.compat.v1.executing_eagerly_outside_functions():
            self.assertIn('tf.__operators__.getitem',
                          (x.name for x in model.layers))
            self.assertNotIn('tf.strided_slice',
                             (x.name for x in model.layers))
        self.assertAllEqual(model(args), expected)
        self.assertAllEqual(model.predict(args, batch_size=batch_size),
                            expected)

        # Make sure it can be successfully saved and loaded
        config = model.get_config()
        model = keras.Model.from_config(config)

        self.assertAllEqual(model(args), expected)
        self.assertAllEqual(model.predict(args, batch_size=batch_size),
                            expected)
Ejemplo n.º 11
0
  def testOptimizerWithCallableVarList(self):
    train_samples = 20
    input_dim = 1
    num_classes = 2
    (x, y), _ = test_utils.get_test_data(
        train_samples=train_samples,
        test_samples=10,
        input_shape=(input_dim,),
        num_classes=num_classes)
    y = np_utils.to_categorical(y)

    num_hidden = 1
    model = test_utils.get_small_sequential_mlp(
        num_hidden=num_hidden, num_classes=num_classes)
    opt = adam.Adam()

    loss = lambda: losses.mean_squared_error(model(x), y)
    var_list = lambda: model.trainable_weights

    with self.assertRaisesRegex(
        ValueError, 'Weights for model .* have not yet been created'):
      var_list()
    train_op = opt.minimize(loss, var_list)
    if not tf.executing_eagerly():
      self.evaluate(tf.compat.v1.global_variables_initializer())
      self.assertEqual(
          [[0.]], self.evaluate(opt.get_slot(var_list()[0], 'm')))
      self.evaluate(train_op)
    self.assertNotEqual(
        [[0.]], self.evaluate(opt.get_slot(var_list()[0], 'm')))
    self.assertLen(var_list(), 4)
Ejemplo n.º 12
0
    def test_getitem_complex_slicing(self):
        if not tf.executing_eagerly():
            self.skipTest("Complex slicing like this fails in v1")
        inp = keras.Input(shape=(4, 3, 8))
        first_dim = keras.Input(shape=(), dtype="int32")
        slice_start = keras.Input(shape=(), dtype="int32")
        slice_stop = keras.Input(shape=(), dtype="int32")
        slice_stride = keras.Input(shape=(), dtype="int32")

        out = inp[..., first_dim[0],
                  slice_start[0]:slice_stop[0]:slice_stride[0]]
        model = keras.Model(
            inputs=[inp, first_dim, slice_start, slice_stop, slice_stride],
            outputs=out,
        )
        model.compile(adam.Adam(0.001),
                      "mse",
                      run_eagerly=test_utils.should_run_eagerly())
        batch_size = 7
        start = 1
        stop = 6
        step = 2
        x = tf.stack([
            tf.stack(
                [tf.stack([tf.range(8) for _ in range(3)]) for _ in range(4)])
            for _ in range(batch_size)
        ])
        args = [
            x,
            tf.constant(0, shape=(batch_size, )),
            tf.constant(start, shape=(batch_size, )),
            tf.constant(stop, shape=(batch_size, )),
            tf.constant(step, shape=(batch_size, )),
        ]
        # Slice the innermost dim. only grab one index from the second-to-innermost
        # dim, removing that dim from the shape.
        expected = tf.stack([
            tf.stack([tf.range(8)[start:stop:step] for _ in range(4)])
            for _ in range(batch_size)
        ])

        if tf.compat.v1.executing_eagerly_outside_functions():
            self.assertIn("tf.__operators__.getitem",
                          (x.name for x in model.layers))
            self.assertNotIn("tf.strided_slice",
                             (x.name for x in model.layers))
        self.assertAllEqual(model(args), expected)
        self.assertAllEqual(model.predict(args, batch_size=batch_size),
                            expected)

        # Make sure it can be successfully saved and loaded
        config = model.get_config()
        model = keras.Model.from_config(config)

        self.assertAllEqual(model(args), expected)
        self.assertAllEqual(model.predict(args, batch_size=batch_size),
                            expected)
Ejemplo n.º 13
0
 def testSetWeightsFromV1AdamWithoutMinimize(self):
   keras_v1_adam = optimizer_v1.Adam()
   keras_v2_adam = adam.Adam()
   keras_v2_adam.set_weights(keras_v1_adam.get_weights())
   keras_v1_iteration = keras_v1_adam.iterations
   keras_v2_iteration = keras_v2_adam.iterations
   self.evaluate(tf.compat.v1.global_variables_initializer())
   self.assertEqual(
       self.evaluate(keras_v1_iteration), self.evaluate(keras_v2_iteration))
Ejemplo n.º 14
0
 def testSlotsUniqueEager(self):
   v1 = tf.Variable(1.)
   v2 = tf.Variable(1.)
   opt = adam.Adam(1.)
   opt.minimize(lambda: v1 + v2, var_list=[v1, v2])
   # There should be iteration, and two unique slot variables for v1 and v2.
   self.assertLen(set(v.ref() for v in opt.variables()), 5)
   self.assertEqual(
       self.evaluate(opt.variables()[0]), self.evaluate(opt.iterations))
Ejemplo n.º 15
0
    def test_stack_preserves_correct_shape(self):
        ## Test stack([x])
        inp = keras.Input(shape=(), dtype='float32')

        out = tf.stack([inp])
        model = keras.Model(inputs=inp, outputs=out)
        model.compile(adam.Adam(0.001),
                      'mse',
                      run_eagerly=test_utils.should_run_eagerly())

        x = tf.ones(shape=(4, 4))
        expected = tf.stack([x])
        self.assertAllEqual(expected.shape, (1, 4, 4))

        self.assertAllEqual(model(x).shape, (1, 4, 4))
        self.assertAllEqual(model(x), expected)

        config = model.get_config()
        model = keras.Model.from_config(config)

        self.assertAllEqual(model(x).shape, (1, 4, 4))
        self.assertAllEqual(model(x), expected)

        ## Test stack(x)
        inp = keras.Input(shape=(), dtype='float32')

        out = tf.stack(inp)
        model = keras.Model(inputs=inp, outputs=out)
        model.compile(adam.Adam(0.001),
                      'mse',
                      run_eagerly=test_utils.should_run_eagerly())

        x = tf.ones(shape=(4, 4))
        expected = tf.stack(x)
        self.assertAllEqual(expected.shape, (4, 4))

        self.assertAllEqual(model(x).shape, (4, 4))
        self.assertAllEqual(model(x), expected)

        config = model.get_config()
        model = keras.Model.from_config(config)

        self.assertAllEqual(model(x).shape, (4, 4))
        self.assertAllEqual(model(x), expected)
Ejemplo n.º 16
0
    def testKerasOptimizerWithUnequalInput(self, distribution):
        with distribution.scope():
            var = tf.Variable(2.0,
                              name="var",
                              aggregation=tf.VariableAggregation.SUM)
            optimizer = adam.Adam(learning_rate=0.01, beta_1=0.2, beta_2=0.2)
            all_vars = []

            def model_fn():
                def loss_fn():
                    replica_id = _replica_id()
                    return tf.cast(replica_id + 1,
                                   dtype=tf.float32) * 0.5 * var

                train_op = optimizer.minimize(loss_fn, var_list=[var])

                return train_op, optimizer

            def train_fn():
                (
                    train_op,
                    optimizer,
                ) = distribution.extended.call_for_each_replica(model_fn)
                if not all_vars:
                    all_vars.append(var)
                    all_vars.append(optimizer.get_slot(var, "m"))
                    all_vars.append(optimizer.get_slot(var, "v"))
                return distribution.group(train_op)

            if not tf.executing_eagerly():
                with self.cached_session() as sess:
                    train_fn = sess.make_callable(train_fn())
            self.evaluate(tf.compat.v1.global_variables_initializer())

            # first step.
            train_fn()
            # var(1) = var(0) - lr * m(1) * sqrt(1 - beta2) / sqrt(v(1)) / (1 -
            # beta1)
            #        = 2.0 - 0.01 * 1.2 * sqrt(0.8) / sqrt(1.8) / 0.8
            self.assertAllClose(1.99, self.evaluate(all_vars[0]))
            # m(1) = beta1 * m(0) + (1-beta1) * grad = 0.2 * 0 + 0.8 * (1 + 2) /
            # 2
            self.assertAllClose(1.2, self.evaluate(all_vars[1]))
            # v(1) = beta2 * v(0) + (1-beta2) * grad^2 = 0.2 * 0 + 0.8 * 2.25
            self.assertAllClose(1.8, self.evaluate(all_vars[2]))

            # second step.
            train_fn()
            # var(1) = var(0) - lr * 2 = 1.98
            self.assertAllClose(1.98, self.evaluate(all_vars[0]))
            # m(2) = beta1 * m(1) + (1-beta1) * grad = 0.2 * 1.2 + 0.8 * 1.5
            self.assertAllClose(1.44, self.evaluate(all_vars[1]))
            # v(2) = beta2 * v(1) + (1-beta2) * grad^2 = 0.2 * 1.8 + 0.8 * 2.25
            self.assertAllClose(2.16, self.evaluate(all_vars[2]))
Ejemplo n.º 17
0
  def testBasicWithConstantDecay(self):
    var = tf.Variable([1.0, 2.0], dtype=tf.float32)
    loss = lambda: 3 * var
    opt = adam.Adam(learning_rate=1.0)

    @tf.function
    def fn():
      opt.minimize(loss, [var])
      return var

    self.assertAllClose([0., 1.], fn(), atol=1e-4)
    self.assertAllClose([-1, 0.], fn(), atol=1e-4)
Ejemplo n.º 18
0
 def testOptimizerSetIterations(self):
   global_step = tf.compat.v1.train.get_or_create_global_step()
   opt = adam.Adam(learning_rate=1.0)
   opt.iterations = global_step
   var = tf.Variable([1.0, 2.0], dtype=tf.float32)
   self.evaluate(tf.compat.v1.global_variables_initializer())
   init_step_value = self.evaluate(global_step)
   loss = lambda: 3 * var
   opt_op = opt.minimize(loss, [var])
   self.evaluate(tf.compat.v1.global_variables_initializer())
   self.evaluate(opt_op)
   new_step_value = self.evaluate(global_step)
   self.assertEqual(new_step_value, init_step_value + 1)
Ejemplo n.º 19
0
 def testSparseDevicePlacement(self):
   # TODO(tanzheny, omalleyt): Fix test in eager mode.
   for index_dtype in [tf.int32, tf.int64]:
     with tf.Graph().as_default(), self.cached_session(
         force_gpu=tf.test.is_gpu_available()):
       # If a GPU is available, tests that all optimizer ops can be placed on
       # it (i.e. they have GPU kernels).
       var = tf.Variable([[1.0], [2.0]])
       indices = tf.constant([0, 1], dtype=index_dtype)
       g_sum = lambda: tf.reduce_sum(tf.gather(var, indices))  # pylint: disable=cell-var-from-loop
       optimizer = adam.Adam(3.0)
       minimize_op = optimizer.minimize(g_sum, var_list=[var])
       self.evaluate(tf.compat.v1.global_variables_initializer())
       minimize_op.run()
Ejemplo n.º 20
0
  def doTestBasic(self, use_callable_params=False):
    for i, dtype in enumerate([tf.half, tf.float32, tf.float64]):
      with self.cached_session():
        # Initialize variables for numpy implementation.
        m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0
        var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype)
        grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype)
        var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype)
        grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype)

        var0 = tf.Variable(var0_np, name="var0_%d" % i)
        var1 = tf.Variable(var1_np, name="var1_%d" % i)
        grads0 = tf.constant(grads0_np)
        grads1 = tf.constant(grads1_np)

        learning_rate = lambda: 0.001
        beta1 = lambda: 0.9
        beta2 = lambda: 0.999
        epsilon = lambda: 1e-8
        if not use_callable_params:
          learning_rate = learning_rate()
          beta1 = beta1()
          beta2 = beta2()
          epsilon = epsilon()

        opt = adam.Adam(learning_rate=learning_rate)
        if not tf.executing_eagerly():
          update = opt.apply_gradients(zip([grads0, grads1], [var0, var1]))

        self.evaluate(tf.compat.v1.global_variables_initializer())
        # Run 3 steps of Adam
        for t in range(3):
          beta_1_power, beta_2_power = get_beta_accumulators(opt, dtype)
          self.assertAllCloseAccordingToType(0.9**(t + 1),
                                             self.evaluate(beta_1_power))
          self.assertAllCloseAccordingToType(0.999**(t + 1),
                                             self.evaluate(beta_2_power))
          if not tf.executing_eagerly():
            self.evaluate(update)
          else:
            opt.apply_gradients(zip([grads0, grads1], [var0, var1]))

          var0_np, m0, v0 = adam_update_numpy(var0_np, grads0_np, t, m0, v0)
          var1_np, m1, v1 = adam_update_numpy(var1_np, grads1_np, t, m1, v1)

          # Validate updated params
          self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0))
          self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1))
Ejemplo n.º 21
0
        def state():
            with distribution.scope():
                v = tf.Variable(tf.random.normal([]))
            opt = adam.Adam(0.001)

            @tf.function
            def step():
                def f():
                    with tf.GradientTape() as tape:
                        loss = v + v
                    gradients = tape.gradient(loss, [v])
                    opt.apply_gradients(zip(gradients, [v]))

                distribution.run(f)

            return v, opt, step
Ejemplo n.º 22
0
    def testWithDefun(self):
        with self.test_session():
            num_training_steps = 2
            checkpoint_directory = self.get_temp_dir()
            checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
            for training_continuation in range(3):
                with test_utils.device(should_use_gpu=True):
                    model = MyModel()
                    # Don't actually train so we can test variable values
                    optimizer = adam.Adam(0.0)
                    root = tf.train.Checkpoint(optimizer=optimizer,
                                               model=model)
                    checkpoint_path = tf.train.latest_checkpoint(
                        checkpoint_directory)
                    status = root.restore(save_path=checkpoint_path)

                    def train_fn():
                        @tf.function
                        def _call_model(x):
                            return model(x)

                        with tf.GradientTape() as tape:
                            loss = _call_model(tf.constant([[3.0]]))
                        gradients = tape.gradient(loss, model.variables)
                        return optimizer.apply_gradients(
                            zip(gradients, model.variables))

                    if not tf.executing_eagerly():
                        train_fn = functools.partial(self.evaluate, train_fn())
                    status.initialize_or_restore()
                    for _ in range(num_training_steps):
                        train_fn()
                    if training_continuation > 0:
                        status.assert_consumed()
                        self.assertAllClose([[42.0]],
                                            self.evaluate(model.variables[0]))
                    else:
                        self.evaluate(model.variables[0].assign([[42.0]]))
                    root.save(file_prefix=checkpoint_prefix)
                    self.assertEqual(
                        (training_continuation + 1) * num_training_steps,
                        self.evaluate(optimizer.iterations),
                    )
                    self.assertEqual(
                        training_continuation + 1,
                        self.evaluate(root.save_counter),
                    )
Ejemplo n.º 23
0
    def testUsageGraph(self):
        """Expected usage when graph building."""
        with context.graph_mode():
            num_training_steps = 10
            checkpoint_directory = self.get_temp_dir()
            checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
            for training_continuation in range(3):
                with tf.Graph().as_default():
                    model = MyModel()
                    optimizer = adam.Adam(0.001)
                    root = tf.compat.v1.train.Checkpoint(optimizer=optimizer,
                                                         model=model)
                    input_value = tf.constant([[3.0]])
                    with tf.GradientTape() as tape:
                        loss = model(input_value)
                    variables = model.trainable_variables
                    gradients = tape.gradient(loss, variables)
                    train_op = optimizer.apply_gradients(
                        zip(gradients, variables))

                    checkpoint_path = tf.train.latest_checkpoint(
                        checkpoint_directory)
                    with self.session(
                            graph=tf.compat.v1.get_default_graph()) as session:
                        status = root.restore(save_path=checkpoint_path)
                        status.initialize_or_restore(session=session)
                        if checkpoint_path is None:
                            self.assertEqual(0, training_continuation)
                            with self.assertRaises(AssertionError):
                                status.assert_consumed()
                            with self.assertRaises(AssertionError):
                                status.assert_existing_objects_matched()
                        else:
                            status.assert_consumed()
                            status.assert_existing_objects_matched()
                        for _ in range(num_training_steps):
                            session.run(train_op)
                        root.save(file_prefix=checkpoint_prefix,
                                  session=session)
                        self.assertEqual(
                            (training_continuation + 1) * num_training_steps,
                            session.run(root.optimizer.iterations),
                        )
                        self.assertEqual(
                            training_continuation + 1,
                            session.run(root.save_counter),
                        )
Ejemplo n.º 24
0
 def testManySavesGraph(self):
     """Saves after the first should not modify the graph."""
     with context.graph_mode():
         graph = tf.Graph()
         with graph.as_default(), self.session(graph):
             checkpoint_directory = self.get_temp_dir()
             checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
             obj = tf.train.Checkpoint()
             obj.var = tf.Variable(0.0, name="v")
             obj.opt = adam.Adam(0.1)
             variables = [obj.var]
             gradients = [1.0]
             obj.opt.apply_gradients(zip(gradients, variables))
             self.evaluate(trackable_utils.gather_initializers(obj))
             obj.save(checkpoint_prefix)
             graph.finalize()
             obj.save(checkpoint_prefix)
Ejemplo n.º 25
0
 def testVarName(self):
   with tf.compat.v1.get_default_graph().as_default():
     var = tf.Variable([1., 2.], name='var')
     loss = var + 1.
     opt = adam.Adam()
     opt.get_updates(loss, [var])
     opt_vars = opt.variables()
     self.assertLen(opt_vars, 3)
     self.assertEqual('Adam/iter:0', opt_vars[0].name)
     self.assertEqual('Adam/var/m:0', opt_vars[1].name)
     var_2 = tf.Variable([1., 2.], name='var_2')
     loss = var_2 + 1.
     with backend.name_scope('outter'):
       opt.get_updates(loss, [var_2])
     opt_vars = opt.variables()
     self.assertLen(opt_vars, 5)
     self.assertEqual('outter/Adam/var_2/m:0', opt_vars[3].name)
Ejemplo n.º 26
0
  def testBasicWithLearningRateInverseTimeDecay(self):
    # TODO(tanzheny, omalleyt): Fix test in eager mode.
    for i, dtype in enumerate([tf.half, tf.float32, tf.float64]):
      with tf.Graph().as_default(), self.cached_session():
        # Initialize variables for numpy implementation.
        m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0
        var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype)
        grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype)
        var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype)
        grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype)

        var0 = tf.Variable(var0_np, name="var0_%d" % i)
        var1 = tf.Variable(var1_np, name="var1_%d" % i)
        grads0 = tf.constant(grads0_np)
        grads1 = tf.constant(grads1_np)

        learning_rate = 0.001
        decay = 0.5
        lr_schedule = learning_rate_schedule.InverseTimeDecay(
            learning_rate, decay_steps=1.0, decay_rate=decay)
        beta_1 = 0.9
        beta_2 = 0.999
        epsilon = 1e-7

        opt = adam.Adam(
            learning_rate=lr_schedule,
            beta_1=beta_1,
            beta_2=beta_2,
            epsilon=epsilon)
        update = opt.apply_gradients(zip([grads0, grads1], [var0, var1]))

        self.evaluate(tf.compat.v1.global_variables_initializer())
        # Run 3 steps of Adam
        for t in range(3):
          self.evaluate(update)

          lr_np = learning_rate / (1 + decay * t)

          var0_np, m0, v0 = adam_update_numpy(
              var0_np, grads0_np, t, m0, v0, lr=lr_np)
          var1_np, m1, v1 = adam_update_numpy(
              var1_np, grads1_np, t, m1, v1, lr=lr_np)

          # Validate updated params
          self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0))
          self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1))
Ejemplo n.º 27
0
  def testSparse(self):
    # TODO(tanzheny, omalleyt): Fix test in eager mode.
    for dtype in [tf.half, tf.float32, tf.float64]:
      with tf.Graph().as_default(), self.cached_session():
        # Initialize variables for numpy implementation.
        m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0
        var0_np = np.array([1.0, 1.0, 2.0], dtype=dtype.as_numpy_dtype)
        grads0_np = np.array([0.1, 0.0, 0.1], dtype=dtype.as_numpy_dtype)
        var1_np = np.array([3.0, 3.0, 4.0], dtype=dtype.as_numpy_dtype)
        grads1_np = np.array([0.01, 0.0, 0.01], dtype=dtype.as_numpy_dtype)

        var0 = tf.Variable(var0_np)
        var1 = tf.Variable(var1_np)
        grads0_np_indices = np.array([0, 2], dtype=np.int32)
        grads0 = tf.IndexedSlices(
            tf.constant(grads0_np[grads0_np_indices]),
            tf.constant(grads0_np_indices), tf.constant([3]))
        grads1_np_indices = np.array([0, 2], dtype=np.int32)
        grads1 = tf.IndexedSlices(
            tf.constant(grads1_np[grads1_np_indices]),
            tf.constant(grads1_np_indices), tf.constant([3]))
        opt = adam.Adam()
        update = opt.apply_gradients(zip([grads0, grads1], [var0, var1]))
        self.evaluate(tf.compat.v1.global_variables_initializer())

        # Fetch params to validate initial values
        self.assertAllClose([1.0, 1.0, 2.0], self.evaluate(var0))
        self.assertAllClose([3.0, 3.0, 4.0], self.evaluate(var1))

        beta_1_power, beta_2_power = get_beta_accumulators(opt, dtype)
        # Run 3 steps of Adam
        for t in range(3):
          self.assertAllCloseAccordingToType(0.9**(t + 1),
                                             self.evaluate(beta_1_power))
          self.assertAllCloseAccordingToType(0.999**(t + 1),
                                             self.evaluate(beta_2_power))
          update.run()

          var0_np, m0, v0 = adam_update_numpy(var0_np, grads0_np, t, m0, v0)
          var1_np, m1, v1 = adam_update_numpy(var1_np, grads1_np, t, m1, v1)

          # Validate updated params
          self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0))
          self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1))
Ejemplo n.º 28
0
  def testWeights(self):
    with test_utils.use_gpu():
      opt1 = adam.Adam(learning_rate=1.0)
      var1 = tf.Variable([1.0, 2.0], dtype=tf.float32)
      loss1 = lambda: 3 * var1
      opt_op_1 = opt1.minimize(loss1, [var1])
      self.evaluate(tf.compat.v1.global_variables_initializer())
      config = opt1.get_config()
      opt2 = adam.Adam.from_config(config)
      var2 = tf.Variable([1.0, 2.0], dtype=tf.float32)
      loss2 = lambda: 3 * var2
      opt_op_2 = opt2.minimize(loss2, [var2])
      weights = opt1.get_weights()

      # Assert set_weights and both variables get updated to same value.
      self.evaluate(tf.compat.v1.global_variables_initializer())
      opt2.set_weights(weights)
      self.evaluate([opt_op_1, opt_op_2])
      self.assertAllClose(self.evaluate(var1), self.evaluate(var2))
      self.assertEqual(1, self.evaluate(opt1.iterations))
      self.assertEqual(1, self.evaluate(opt2.iterations))

      var3 = tf.Variable([1.0, 2.0, 3.0], dtype=tf.float32)
      var4 = tf.Variable([4.0, 5.0, 6.0], dtype=tf.float32)
      loss3 = lambda: 3 * var3 + 5 * var4
      opt_op_3 = opt1.minimize(loss3, [var3, var4])

      # Assert set_weights with ValueError since weight list does not match.
      self.evaluate(tf.compat.v1.global_variables_initializer())
      weights = opt1.get_weights()
      with self.assertRaisesRegex(ValueError, 'but the optimizer was'):
        opt2.set_weights(weights)

      # Assert set_weights and variables get updated to same value.
      var5 = tf.Variable([1.0, 2.0, 3.0], dtype=tf.float32)
      var6 = tf.Variable([4.0, 5.0, 6.0], dtype=tf.float32)
      loss4 = lambda: 3 * var5 + 5 * var6
      opt_op_4 = opt2.minimize(loss4, [var5, var6])
      self.evaluate(tf.compat.v1.global_variables_initializer())
      opt2.set_weights(weights)
      self.evaluate([opt_op_3, opt_op_4])
      self.assertAllClose(
          self.evaluate([var3, var4]), self.evaluate([var5, var6]))
Ejemplo n.º 29
0
  def testNumericEquivalenceForAmsgrad(self):
    if tf.executing_eagerly():
      self.skipTest(
          'v1 optimizer does not run in eager mode')
    np.random.seed(1331)
    with test_utils.use_gpu():
      train_samples = 20
      input_dim = 3
      num_classes = 2
      (x, y), _ = test_utils.get_test_data(
          train_samples=train_samples,
          test_samples=10,
          input_shape=(input_dim,),
          num_classes=num_classes)
      y = np_utils.to_categorical(y)

      num_hidden = 5
      model_k_v1 = test_utils.get_small_sequential_mlp(
          num_hidden=num_hidden, num_classes=num_classes, input_dim=input_dim)
      model_k_v2 = test_utils.get_small_sequential_mlp(
          num_hidden=num_hidden, num_classes=num_classes, input_dim=input_dim)
      model_k_v2.set_weights(model_k_v1.get_weights())

      opt_k_v1 = optimizer_v1.Adam(amsgrad=True)
      opt_k_v2 = adam.Adam(amsgrad=True)

      model_k_v1.compile(
          opt_k_v1,
          loss='categorical_crossentropy',
          metrics=[],
          run_eagerly=test_utils.should_run_eagerly())
      model_k_v2.compile(
          opt_k_v2,
          loss='categorical_crossentropy',
          metrics=[],
          run_eagerly=test_utils.should_run_eagerly())

      hist_k_v1 = model_k_v1.fit(x, y, batch_size=5, epochs=10, shuffle=False)
      hist_k_v2 = model_k_v2.fit(x, y, batch_size=5, epochs=10, shuffle=False)

      self.assertAllClose(model_k_v1.get_weights(), model_k_v2.get_weights())
      self.assertAllClose(opt_k_v1.get_weights(), opt_k_v2.get_weights())
      self.assertAllClose(hist_k_v1.history['loss'], hist_k_v2.history['loss'])
Ejemplo n.º 30
0
 def _initialized_model(self):
     input_value = tf.constant([[3.0]])
     model = MyModel()
     optimizer = adam.Adam(0.001)
     root_trackable = tf.train.Checkpoint(optimizer=optimizer, model=model)
     with tf.GradientTape() as tape:
         loss = model(input_value)
     variables = model.trainable_variables
     gradients = tape.gradient(loss, variables)
     train_op = optimizer.apply_gradients(zip(gradients, variables))
     self.evaluate(trackable_utils.gather_initializers(root_trackable))
     self.evaluate(train_op)
     # A regular variable, a slot variable, and a non-slot Optimizer variable
     # with known values to check when loading.
     self.evaluate(model._named_dense.bias.assign([1.0]))
     self.evaluate(
         optimizer.get_slot(var=model._named_dense.bias,
                            slot_name="m").assign([2.0]))
     self.evaluate(optimizer.beta_1.assign(3.0))
     return root_trackable