Exemple #1
0
 def testSparseRepeatedIndices(self):
     for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
         with self.cached_session():
             repeated_index_update_var = variables.Variable([[1.0], [2.0]],
                                                            dtype=dtype)
             aggregated_update_var = variables.Variable([[1.0], [2.0]],
                                                        dtype=dtype)
             grad_repeated_index = ops.IndexedSlices(
                 constant_op.constant([0.1, 0.1], shape=[2, 1],
                                      dtype=dtype),
                 constant_op.constant([1, 1]), constant_op.constant([2, 1]))
             grad_aggregated = ops.IndexedSlices(
                 constant_op.constant([0.2], shape=[1, 1], dtype=dtype),
                 constant_op.constant([1]), constant_op.constant([2, 1]))
             repeated_update = adam.AdamOptimizer().apply_gradients([
                 (grad_repeated_index, repeated_index_update_var)
             ])
             aggregated_update = adam.AdamOptimizer().apply_gradients([
                 (grad_aggregated, aggregated_update_var)
             ])
             variables.global_variables_initializer().run()
             self.assertAllClose(aggregated_update_var.eval(),
                                 repeated_index_update_var.eval())
             for _ in range(3):
                 repeated_update.run()
                 aggregated_update.run()
                 self.assertAllClose(aggregated_update_var.eval(),
                                     repeated_index_update_var.eval())
Exemple #2
0
  def testDeferredSlotRestoration(self):
    checkpoint_directory = self.get_temp_dir()

    root = util.Checkpoint()
    root.var = util.add_variable(
        root, name="var", initializer=0.)
    optimizer = adam.AdamOptimizer(0.1)
    if context.executing_eagerly():
      optimizer.minimize(root.var.read_value)
    else:
      train_op = optimizer.minimize(root.var)
      # Note that `optimizer` has not been added as a dependency of
      # `root`. Create a one-off grouping so that slot variables for `root.var`
      # get initialized too.
      self.evaluate(util.gather_initializers(
          util.Checkpoint(root=root, optimizer=optimizer)))
      self.evaluate(train_op)
    self.evaluate(state_ops.assign(root.var, 12.))
    no_slots_path = root.save(os.path.join(checkpoint_directory, "no_slots"))
    root.optimizer = optimizer
    self.evaluate(state_ops.assign(root.var, 13.))
    self.evaluate(state_ops.assign(optimizer.get_slot(name="m", var=root.var),
                                   14.))
    slots_path = root.save(os.path.join(checkpoint_directory, "with_slots"))
    new_root = util.Checkpoint()
    # Load the slot-containing checkpoint (deferred), then immediately overwrite
    # the non-slot variable (also deferred).
    slot_status = new_root.restore(slots_path)
    no_slot_status = new_root.restore(no_slots_path)
    with self.assertRaises(AssertionError):
      no_slot_status.assert_consumed()
    new_root.var = util.add_variable(
        new_root, name="var", shape=[])
    no_slot_status.assert_consumed()
    no_slot_status.run_restore_ops()
    self.assertEqual(12., self.evaluate(new_root.var))
    new_root.optimizer = adam.AdamOptimizer(0.1)
    with self.assertRaisesRegexp(AssertionError, "beta1_power"):
      slot_status.assert_consumed()
    self.assertEqual(12., self.evaluate(new_root.var))
    if context.executing_eagerly():
      # Slot variables are only created with restoring initializers when
      # executing eagerly.
      self.assertEqual(14., self.evaluate(
          new_root.optimizer.get_slot(name="m", var=new_root.var)))
    else:
      self.assertIs(new_root.optimizer.get_slot(name="m", var=new_root.var),
                    None)
    if context.executing_eagerly():
      new_root.optimizer.minimize(new_root.var.read_value)
    else:
      train_op = new_root.optimizer.minimize(new_root.var)
      # The slot variable now exists; restore() didn't create it, but we should
      # now have a restore op for it.
      slot_status.run_restore_ops()
      self.assertEqual(14., self.evaluate(
          new_root.optimizer.get_slot(name="m", var=new_root.var)))
      self.evaluate(train_op)
    slot_status.assert_consumed()
Exemple #3
0
  def testAnonymousVarsInInit(self):

    class Model(training.Model):

      def __init__(self):
        super(Model, self).__init__()
        self.w = resource_variable_ops.ResourceVariable(0.0)
        self.b = resource_variable_ops.ResourceVariable(0.0)
        self.vars = [self.w, self.b]

      def call(self, x):
        return x * self.w + self.b

    with context.eager_mode():
      model = Model()
      optimizer = adam.AdamOptimizer(learning_rate=0.05)
      checkpoint_directory = self.get_temp_dir()
      checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
      checkpoint = util.Checkpoint(
          model=model, optimizer=optimizer)
      for _ in range(2):
        checkpoint.save(checkpoint_prefix)
        with backprop.GradientTape() as tape:
          loss = (constant_op.constant(1.)
                  - model(constant_op.constant(1.))) ** 2
        grad = tape.gradient(loss, model.vars)
        optimizer.apply_gradients(
            [(g, v) for g, v in zip(grad, model.vars)])
Exemple #4
0
 def testAgnosticUsage(self):
   """Graph/eager agnostic usage."""
   # Does create garbage when executing eagerly due to ops.Graph() creation.
   num_training_steps = 10
   checkpoint_directory = self.get_temp_dir()
   checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
   for training_continuation in range(3):
     with ops.Graph().as_default(), self.test_session(
         graph=ops.get_default_graph()), test_util.device(use_gpu=True):
       model = MyModel()
       optimizer = adam.AdamOptimizer(0.001)
       root = util.Checkpoint(
           optimizer=optimizer, model=model,
           global_step=training_util.get_or_create_global_step())
       checkpoint_path = checkpoint_management.latest_checkpoint(
           checkpoint_directory)
       status = root.restore(save_path=checkpoint_path)
       input_value = constant_op.constant([[3.]])
       train_fn = functools.partial(
           optimizer.minimize,
           functools.partial(model, input_value),
           global_step=root.global_step)
       if not context.executing_eagerly():
         train_fn = functools.partial(self.evaluate, train_fn())
       status.initialize_or_restore()
       for _ in range(num_training_steps):
         train_fn()
       root.save(file_prefix=checkpoint_prefix)
       self.assertEqual((training_continuation + 1) * num_training_steps,
                        self.evaluate(root.global_step))
       self.assertEqual(training_continuation + 1,
                        self.evaluate(root.save_counter))
Exemple #5
0
 def testUsageGraph(self):
   """Expected usage when graph building."""
   with context.graph_mode():
     num_training_steps = 10
     checkpoint_directory = self.get_temp_dir()
     checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
     for training_continuation in range(3):
       with ops.Graph().as_default():
         model = MyModel()
         optimizer = adam.AdamOptimizer(0.001)
         root = util.CheckpointV1(
             optimizer=optimizer, model=model,
             global_step=training_util.get_or_create_global_step())
         input_value = constant_op.constant([[3.]])
         train_op = optimizer.minimize(
             model(input_value),
             global_step=root.global_step)
         checkpoint_path = checkpoint_management.latest_checkpoint(
             checkpoint_directory)
         with self.session(graph=ops.get_default_graph()) as session:
           status = root.restore(save_path=checkpoint_path)
           status.initialize_or_restore(session=session)
           if checkpoint_path is None:
             self.assertEqual(0, training_continuation)
             with self.assertRaises(AssertionError):
               status.assert_consumed()
           else:
             status.assert_consumed()
           for _ in range(num_training_steps):
             session.run(train_op)
           root.save(file_prefix=checkpoint_prefix, session=session)
           self.assertEqual((training_continuation + 1) * num_training_steps,
                            session.run(root.global_step))
           self.assertEqual(training_continuation + 1,
                            session.run(root.save_counter))
Exemple #6
0
  def testMultipleGraphsNonSlotVariables(self):
    with context.graph_mode():
      checkpoint_directory = self.get_temp_dir()
      checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
      optimizer = adam.AdamOptimizer(0.001)
      # Construct a model in one graph
      first_graph = ops.Graph()
      first_session = session_lib.Session(graph=first_graph)
      with first_graph.as_default(), first_session.as_default():
        first_variable = resource_variable_ops.ResourceVariable([1.])
        first_root_trackable = util.Checkpoint(
            optimizer=optimizer, variable=first_variable)
        train_op = optimizer.minimize(first_variable.read_value)
        self.evaluate(util.gather_initializers(
            first_root_trackable))
        self.evaluate(train_op)
        self.evaluate(first_variable.assign([1.]))
        self.evaluate(optimizer.get_slot(
            var=first_variable, name="m").assign([2.]))
        beta_1_power, _ = optimizer._get_beta_accumulators()
        self.evaluate(beta_1_power.assign(3.))

      # Save and load in a second graph
      second_graph = ops.Graph()
      with second_graph.as_default(), session_lib.Session(graph=second_graph):
        second_variable = resource_variable_ops.ResourceVariable([1.])
        second_root_trackable = util.Checkpoint(
            optimizer=optimizer, variable=second_variable)
        train_op = optimizer.minimize(second_variable.read_value)
        second_root_trackable.restore(None).initialize_or_restore()
        self.evaluate(train_op)
        self.evaluate(second_variable.assign([4.]))
        self.evaluate(optimizer.get_slot(
            var=second_variable, name="m").assign([5.]))
        beta_1_power, _ = optimizer._get_beta_accumulators()
        self.evaluate(beta_1_power.assign(6.))
        save_path = second_root_trackable.save(checkpoint_prefix)
        self.evaluate(second_variable.assign([7.]))
        self.evaluate(optimizer.get_slot(
            var=second_variable, name="m").assign([8.]))
        beta_1_power, _ = optimizer._get_beta_accumulators()
        self.assertAllEqual(6., self.evaluate(beta_1_power))
        status = second_root_trackable.restore(save_path)
        status.assert_consumed().run_restore_ops()
        self.assertAllEqual([4.], self.evaluate(second_variable))
        self.assertAllEqual([5.], self.evaluate(optimizer.get_slot(
            var=second_variable, name="m")))
        beta_1_power, _ = optimizer._get_beta_accumulators()
        self.assertAllEqual(6., self.evaluate(beta_1_power))

      # Check that the first graph is unmolested
      with first_graph.as_default(), first_session.as_default():
        self.assertAllEqual([1.], self.evaluate(first_variable))
        self.assertAllEqual([2.], self.evaluate(optimizer.get_slot(
            var=first_variable, name="m")))
        beta_1_power, _ = optimizer._get_beta_accumulators()
        self.assertAllEqual(3., self.evaluate(beta_1_power))
Exemple #7
0
 def testSlotsUniqueEager(self):
     with context.eager_mode():
         v1 = resource_variable_ops.ResourceVariable(1.)
         v2 = resource_variable_ops.ResourceVariable(1.)
         opt = adam.AdamOptimizer(1.)
         opt.minimize(lambda: v1 + v2)
         # There should be two non-slot variables, and two unique slot variables
         # for v1 and v2 respectively.
         self.assertEqual(6, len(set(opt.variables())))
Exemple #8
0
    def doTestSparse(self, use_resource=False):
        for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
            with self.cached_session():
                # Initialize variables for numpy implementation.
                m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0
                var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype)
                grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype)
                var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype)
                grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype)

                if use_resource:
                    var0 = resource_variable_ops.ResourceVariable(var0_np)
                    var1 = resource_variable_ops.ResourceVariable(var1_np)
                else:
                    var0 = variables.Variable(var0_np)
                    var1 = variables.Variable(var1_np)
                grads0_np_indices = np.array([0, 1], dtype=np.int32)
                grads0 = ops.IndexedSlices(
                    constant_op.constant(grads0_np),
                    constant_op.constant(grads0_np_indices),
                    constant_op.constant([2]))
                grads1_np_indices = np.array([0, 1], dtype=np.int32)
                grads1 = ops.IndexedSlices(
                    constant_op.constant(grads1_np),
                    constant_op.constant(grads1_np_indices),
                    constant_op.constant([2]))
                opt = adam.AdamOptimizer()
                update = opt.apply_gradients(
                    zip([grads0, grads1], [var0, var1]))
                variables.global_variables_initializer().run()

                # Fetch params to validate initial values
                self.assertAllClose([1.0, 2.0], var0.eval())
                self.assertAllClose([3.0, 4.0], var1.eval())

                beta1_power, beta2_power = opt._get_beta_accumulators()

                # Run 3 steps of Adam
                for t in range(1, 4):
                    self.assertAllCloseAccordingToType(0.9**t,
                                                       beta1_power.eval())
                    self.assertAllCloseAccordingToType(0.999**t,
                                                       beta2_power.eval())
                    update.run()

                    var0_np, m0, v0 = adam_update_numpy(
                        var0_np, grads0_np, t, m0, v0)
                    var1_np, m1, v1 = adam_update_numpy(
                        var1_np, grads1_np, t, m1, v1)

                    # Validate updated params
                    self.assertAllCloseAccordingToType(var0_np, var0.eval())
                    self.assertAllCloseAccordingToType(var1_np, var1.eval())
Exemple #9
0
 def testSparseDevicePlacement(self):
     for index_dtype in [dtypes.int32, dtypes.int64]:
         with self.cached_session(force_gpu=test.is_gpu_available()):
             # If a GPU is available, tests that all optimizer ops can be placed on
             # it (i.e. they have GPU kernels).
             var = variables.Variable([[1.0], [2.0]])
             indices = constant_op.constant([0, 1], dtype=index_dtype)
             gathered_sum = math_ops.reduce_sum(
                 array_ops.gather(var, indices))
             optimizer = adam.AdamOptimizer(3.0)
             minimize_op = optimizer.minimize(gathered_sum)
             variables.global_variables_initializer().run()
             minimize_op.run()
Exemple #10
0
  def test_trackable_save_restore(self):

    def _templated():
      v = variable_scope.get_variable(
          "v", shape=[1], initializer=init_ops.zeros_initializer(),
          use_resource=True)
      v2 = variable_scope.get_variable(
          "v2", shape=[1], initializer=init_ops.zeros_initializer(),
          use_resource=True)
      return v, v + 1., v2

    save_template = template.make_template("s1", _templated)
    v1_save, _, v2_save = save_template()
    optimizer = adam.AdamOptimizer(0.0)
    save_root = util.Checkpoint(
        my_template=save_template, optimizer=optimizer)
    optimizer.minimize(v1_save.read_value)
    self.evaluate([v.initializer for v in optimizer.variables()])
    self.evaluate(v1_save.assign([12.]))
    self.evaluate(v2_save.assign([14.]))
    checkpoint_directory = self.get_temp_dir()
    checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
    save_path = save_root.save(checkpoint_prefix)

    load_template = template.make_template("s2", _templated)
    load_optimizer = adam.AdamOptimizer(0.0)
    load_root = util.Checkpoint(
        my_template=load_template, optimizer=load_optimizer)
    status = load_root.restore(save_path)
    var, var_plus_one, var2 = load_template()
    load_optimizer.minimize(var.read_value)
    self.assertEqual(2, len(load_template._checkpoint_dependencies))
    self.assertEqual("v", load_template._checkpoint_dependencies[0].name)
    self.assertEqual("v2", load_template._checkpoint_dependencies[1].name)
    status.assert_consumed().run_restore_ops()
    self.assertAllEqual([12.], self.evaluate(var))
    self.assertAllEqual([13.], self.evaluate(var_plus_one))
    self.assertAllEqual([14.], self.evaluate(var2))
Exemple #11
0
 def testManySavesGraph(self):
   """Saves after the first should not modify the graph."""
   with context.graph_mode():
     graph = ops.Graph()
     with graph.as_default(), self.session(graph):
       checkpoint_directory = self.get_temp_dir()
       checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
       obj = util.Checkpoint()
       obj.var = variable_scope.get_variable(name="v", initializer=0.)
       obj.opt = adam.AdamOptimizer(0.1)
       obj.opt.minimize(obj.var.read_value())
       self.evaluate(util.gather_initializers(obj))
       obj.save(checkpoint_prefix)
       before_ops = graph.get_operations()
       obj.save(checkpoint_prefix)
       self.assertEqual(before_ops, graph.get_operations())
Exemple #12
0
    def testSharing(self):
        for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
            with self.cached_session():
                # Initialize variables for numpy implementation.
                m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0
                var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype)
                grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype)
                var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype)
                grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype)

                var0 = variables.Variable(var0_np)
                var1 = variables.Variable(var1_np)
                grads0 = constant_op.constant(grads0_np)
                grads1 = constant_op.constant(grads1_np)
                opt = adam.AdamOptimizer()
                update1 = opt.apply_gradients(
                    zip([grads0, grads1], [var0, var1]))
                update2 = opt.apply_gradients(
                    zip([grads0, grads1], [var0, var1]))
                variables.global_variables_initializer().run()

                beta1_power, beta2_power = opt._get_beta_accumulators()

                # Fetch params to validate initial values
                self.assertAllClose([1.0, 2.0], var0.eval())
                self.assertAllClose([3.0, 4.0], var1.eval())

                # Run 3 steps of intertwined Adam1 and Adam2.
                for t in range(1, 4):
                    self.assertAllCloseAccordingToType(0.9**t,
                                                       beta1_power.eval())
                    self.assertAllCloseAccordingToType(0.999**t,
                                                       beta2_power.eval())
                    if t % 2 == 0:
                        update1.run()
                    else:
                        update2.run()

                    var0_np, m0, v0 = adam_update_numpy(
                        var0_np, grads0_np, t, m0, v0)
                    var1_np, m1, v1 = adam_update_numpy(
                        var1_np, grads1_np, t, m1, v1)

                    # Validate updated params
                    self.assertAllCloseAccordingToType(var0_np, var0.eval())
                    self.assertAllCloseAccordingToType(var1_np, var1.eval())
Exemple #13
0
    def testTwoSessions(self):
        optimizer = adam.AdamOptimizer()
        g = ops.Graph()
        with g.as_default():
            with session.Session():
                var0 = variables.Variable(np.array([1.0, 2.0]), name="v0")
                grads0 = constant_op.constant(np.array([0.1, 0.1]))
                optimizer.apply_gradients([(grads0, var0)])

        gg = ops.Graph()
        with gg.as_default():
            with session.Session():
                var0 = variables.Variable(np.array([1.0, 2.0]), name="v0")
                grads0 = constant_op.constant(np.array([0.1, 0.1]))

                # If the optimizer saves any state not keyed by graph the following line
                # fails.
                optimizer.apply_gradients([(grads0, var0)])
Exemple #14
0
 def testWithDefun(self):
   num_training_steps = 2
   checkpoint_directory = self.get_temp_dir()
   checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
   for training_continuation in range(3):
     with ops.Graph().as_default(), self.test_session(
         graph=ops.get_default_graph()), test_util.device(use_gpu=True):
       model = MyModel()
       # Don't actually train so we can test variable values
       optimizer = adam.AdamOptimizer(0.)
       root = util.Checkpoint(
           optimizer=optimizer, model=model,
           global_step=training_util.get_or_create_global_step())
       checkpoint_path = checkpoint_management.latest_checkpoint(
           checkpoint_directory)
       status = root.restore(save_path=checkpoint_path)
       def train_fn():
         @function.defun
         def _call_model(x):
           return model(x)
         with backprop.GradientTape() as tape:
           loss = _call_model(constant_op.constant([[3.]]))
         gradients = tape.gradient(loss, model.variables)
         return optimizer.apply_gradients(zip(gradients, model.variables),
                                          global_step=root.global_step)
       if not context.executing_eagerly():
         train_fn = functools.partial(
             self.evaluate, train_fn())
       status.initialize_or_restore()
       for _ in range(num_training_steps):
         train_fn()
       if training_continuation > 0:
         status.assert_consumed()
         self.assertAllClose([[42.]], self.evaluate(model.variables[0]))
       else:
         self.evaluate(model.variables[0].assign([[42.]]))
       root.save(file_prefix=checkpoint_prefix)
       self.assertEqual((training_continuation + 1) * num_training_steps,
                        self.evaluate(root.global_step))
       self.assertEqual(training_continuation + 1,
                        self.evaluate(root.save_counter))
Exemple #15
0
 def _initialized_model(self):
   input_value = constant_op.constant([[3.]])
   model = MyModel()
   optimizer = adam.AdamOptimizer(0.001)
   optimizer_step = training_util.get_or_create_global_step()
   root_trackable = util.Checkpoint(
       optimizer=optimizer, model=model, optimizer_step=optimizer_step)
   train_op = optimizer.minimize(
       functools.partial(model, input_value),
       global_step=optimizer_step)
   self.evaluate(util.gather_initializers(
       root_trackable))
   self.evaluate(train_op)
   # A regular variable, a slot variable, and a non-slot Optimizer variable
   # with known values to check when loading.
   self.evaluate(model._named_dense.bias.assign([1.]))
   self.evaluate(optimizer.get_slot(
       var=model._named_dense.bias, name="m").assign([2.]))
   beta_1_power, _ = optimizer._get_beta_accumulators()
   self.evaluate(beta_1_power.assign(3.))
   return root_trackable
Exemple #16
0
 def testDeferredRestorationUsageEager(self):
   """An idiomatic eager execution example."""
   num_training_steps = 10
   checkpoint_directory = self.get_temp_dir()
   checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
   for training_continuation in range(3):
     model = MyModel()
     optimizer = adam.AdamOptimizer(0.001)
     root = util.Checkpoint(
         optimizer=optimizer, model=model,
         optimizer_step=training_util.get_or_create_global_step())
     root.restore(checkpoint_management.latest_checkpoint(
         checkpoint_directory))
     for _ in range(num_training_steps):
       # TODO(allenl): Use a Dataset and serialize/checkpoint it.
       input_value = constant_op.constant([[3.]])
       optimizer.minimize(
           lambda: model(input_value),  # pylint: disable=cell-var-from-loop
           global_step=root.optimizer_step)
     root.save(file_prefix=checkpoint_prefix)
     self.assertEqual((training_continuation + 1) * num_training_steps,
                      root.optimizer_step.numpy())
Exemple #17
0
 def testSaveRestore(self):
   model = MyModel()
   optimizer = adam.AdamOptimizer(0.001)
   root_trackable = util.Checkpoint(
       optimizer=optimizer, model=model)
   input_value = constant_op.constant([[3.]])
   if context.executing_eagerly():
     optimizer.minimize(
         lambda: model(input_value))
   else:
     train_op = optimizer.minimize(model(input_value))
     # TODO(allenl): Make initialization more pleasant when graph building.
     root_trackable.save_counter  # pylint: disable=pointless-statement
     self.evaluate(util.gather_initializers(
         root_trackable))
     self.evaluate(train_op)
   prefix = os.path.join(self.get_temp_dir(), "ckpt")
   self.evaluate(state_ops.assign(model._named_dense.variables[1], [42.]))
   m_bias_slot = optimizer.get_slot(model._named_dense.variables[1], "m")
   self.evaluate(state_ops.assign(m_bias_slot, [1.5]))
   save_path = root_trackable.save(file_prefix=prefix)
   self.evaluate(state_ops.assign(model._named_dense.variables[1], [43.]))
   self.evaluate(state_ops.assign(root_trackable.save_counter, 3))
   optimizer_variables = self.evaluate(optimizer.variables())
   self.evaluate(state_ops.assign(m_bias_slot, [-2.]))
   # Immediate restoration
   status = root_trackable.restore(save_path=save_path).assert_consumed()
   status.run_restore_ops()
   self.assertAllEqual([42.], self.evaluate(model._named_dense.variables[1]))
   self.assertAllEqual(1, self.evaluate(root_trackable.save_counter))
   self.assertAllEqual([1.5], self.evaluate(m_bias_slot))
   if not context.executing_eagerly():
     return  # Restore-on-create is only supported when executing eagerly
   on_create_model = MyModel()
   on_create_optimizer = adam.AdamOptimizer(
       0.001,
       # Preserve beta_1_power and beta_2_power when appying gradients
       # so we can test that they've been restored correctly.
       beta1=1.0,
       beta2=1.0)
   on_create_root = util.Checkpoint(
       optimizer=on_create_optimizer, model=on_create_model)
   # Deferred restoration
   status = on_create_root.restore(save_path=save_path)
   on_create_model(constant_op.constant([[3.]]))  # create variables
   self.assertAllEqual(1, self.evaluate(on_create_root.save_counter))
   self.assertAllEqual([42.],
                       self.evaluate(
                           on_create_model._named_dense.variables[1]))
   on_create_m_bias_slot = on_create_optimizer.get_slot(
       on_create_model._named_dense.variables[1], "m")
   # Optimizer slot variables are created when the original variable is
   # restored.
   self.assertAllEqual([1.5], self.evaluate(on_create_m_bias_slot))
   self.assertAllEqual(optimizer_variables[2:],
                       self.evaluate(on_create_optimizer.variables()))
   dummy_var = resource_variable_ops.ResourceVariable([1.])
   on_create_optimizer.minimize(loss=dummy_var.read_value)
   status.assert_consumed()
   beta_1_power, beta_2_power = on_create_optimizer._get_beta_accumulators()
   self.assertAllEqual(optimizer_variables[0], self.evaluate(beta_1_power))
   self.assertAllEqual(optimizer_variables[1], self.evaluate(beta_2_power))
Exemple #18
0
    def doTestBasic(self, use_resource=False):
        for i, dtype in enumerate(
            [dtypes.half, dtypes.float32, dtypes.float64]):
            with self.session(graph=ops.Graph()):
                # Initialize variables for numpy implementation.
                m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0
                var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype)
                grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype)
                var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype)
                grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype)

                if use_resource:
                    var0 = resource_variable_ops.ResourceVariable(
                        var0_np, name="var0_%d" % i)
                    var1 = resource_variable_ops.ResourceVariable(
                        var1_np, name="var1_%d" % i)
                else:
                    var0 = variables.Variable(var0_np)
                    var1 = variables.Variable(var1_np)
                grads0 = constant_op.constant(grads0_np)
                grads1 = constant_op.constant(grads1_np)

                opt = adam.AdamOptimizer()
                update = opt.apply_gradients(
                    zip([grads0, grads1], [var0, var1]))
                opt_variables = opt.variables()
                beta1_power, beta2_power = opt._get_beta_accumulators()
                self.assertTrue(beta1_power is not None)
                self.assertTrue(beta2_power is not None)
                self.assertIn(beta1_power, opt_variables)
                self.assertIn(beta2_power, opt_variables)

                with ops.Graph().as_default():
                    # Shouldn't return non-slot variables from other graphs.
                    self.assertEqual(0, len(opt.variables()))

                if not context.executing_eagerly():
                    self.evaluate(variables.global_variables_initializer())
                    # Fetch params to validate initial values
                    self.assertAllClose([1.0, 2.0], self.evaluate(var0))
                    self.assertAllClose([3.0, 4.0], self.evaluate(var1))

                beta1_power, beta2_power = opt._get_beta_accumulators()

                # Run 3 steps of Adam
                for t in range(1, 4):
                    if not context.executing_eagerly():
                        self.evaluate(update)
                    elif t > 1:
                        opt.apply_gradients(zip([grads0, grads1],
                                                [var0, var1]))

                    self.assertAllCloseAccordingToType(
                        0.9**(t + 1), self.evaluate(beta1_power))
                    self.assertAllCloseAccordingToType(
                        0.999**(t + 1), self.evaluate(beta2_power))

                    var0_np, m0, v0 = adam_update_numpy(
                        var0_np, grads0_np, t, m0, v0)
                    var1_np, m1, v1 = adam_update_numpy(
                        var1_np, grads1_np, t, m1, v1)

                    # Validate updated params
                    self.assertAllCloseAccordingToType(var0_np,
                                                       self.evaluate(var0))
                    self.assertAllCloseAccordingToType(var1_np,
                                                       self.evaluate(var1))
                    if use_resource:
                        self.assertEqual("var0_%d/Adam:0" % (i, ),
                                         opt.get_slot(var=var0, name="m").name)
Exemple #19
0
 def testNamingWithOptimizer(self):
   input_value = constant_op.constant([[3.]])
   model = MyModel()
   # A nuisance Model using the same optimizer. Its slot variables should not
   # go in the checkpoint, since it is never depended on.
   other_model = MyModel()
   optimizer = adam.AdamOptimizer(0.001)
   optimizer_step = training_util.get_or_create_global_step()
   root_trackable = util.Checkpoint(
       optimizer=optimizer, model=model, optimizer_step=optimizer_step)
   if context.executing_eagerly():
     optimizer.minimize(
         lambda: model(input_value),
         global_step=optimizer_step)
     optimizer.minimize(
         lambda: other_model(input_value),
         global_step=optimizer_step)
   else:
     train_op = optimizer.minimize(
         model(input_value), global_step=optimizer_step)
     optimizer.minimize(
         other_model(input_value),
         global_step=optimizer_step)
     self.evaluate(util.gather_initializers(
         root_trackable))
     self.evaluate(train_op)
   named_variables, serialized_graph, _ = graph_view.ObjectGraphView(
       root_trackable).serialize_object_graph()
   expected_checkpoint_names = (
       # Created in the root node, so no prefix.
       "optimizer_step",
       "model/_second/kernel",
       "model/_named_dense/kernel",
       "model/_named_dense/bias",
       # non-Layer dependency of the model
       "model/_non_layer/a_variable",
       # The optimizer creates two non-slot variables
       "optimizer/beta1_power",
       "optimizer/beta2_power",
       # Slot variables
       "model/_second/kernel/.OPTIMIZER_SLOT/optimizer/m",
       "model/_second/kernel/.OPTIMIZER_SLOT/optimizer/v",
       "model/_named_dense/kernel/.OPTIMIZER_SLOT/optimizer/m",
       "model/_named_dense/kernel/.OPTIMIZER_SLOT/optimizer/v",
       "model/_named_dense/bias/.OPTIMIZER_SLOT/optimizer/m",
       "model/_named_dense/bias/.OPTIMIZER_SLOT/optimizer/v",
   )
   suffix = "/.ATTRIBUTES/VARIABLE_VALUE"
   expected_checkpoint_names = [
       name + suffix for name in expected_checkpoint_names]
   named_variables = {v.name: v for v in named_variables}
   six.assertCountEqual(self, expected_checkpoint_names,
                        named_variables.keys())
   # Check that we've mapped to the right variable objects (not exhaustive)
   self.assertEqual(
       "global_step",
       named_variables["optimizer_step" + suffix].full_name)
   self.assertEqual(
       "my_model/dense_1/kernel",
       named_variables["model/_second/kernel" + suffix].full_name)
   self.assertEqual(
       "my_model/dense/kernel",
       named_variables["model/_named_dense/kernel" + suffix].full_name)
   self.assertEqual(
       "beta1_power",
       named_variables["optimizer/beta1_power" + suffix].full_name)
   self.assertEqual(
       "beta2_power",
       named_variables["optimizer/beta2_power" + suffix].full_name)
   # Spot check the generated protocol buffers.
   self.assertEqual("optimizer",
                    serialized_graph.nodes[0].children[1].local_name)
   optimizer_node = serialized_graph.nodes[serialized_graph.nodes[0].children[
       1].node_id]
   self.assertEqual("beta1_power", optimizer_node.children[0].local_name)
   self.assertEqual(
       "beta1_power", serialized_graph.nodes[optimizer_node.children[0]
                                             .node_id].attributes[0].full_name)
   self.assertEqual(
       "my_model/dense/kernel",
       serialized_graph.nodes[optimizer_node.slot_variables[0]
                              .original_variable_node_id]
       .attributes[0].full_name)
   # We strip off the :0 suffix, as variable.name-based saving does.
   self.assertEqual(
       "my_model/dense/kernel/Adam",
       serialized_graph.nodes[optimizer_node.slot_variables[0]
                              .slot_variable_node_id]
       .attributes[0].full_name)
   self.assertEqual(
       "my_model/dense/kernel/Adam:0",
       optimizer.get_slot(
           var=model._named_dense.kernel,
           name="m").name)
   self.assertEqual(
       "model/_named_dense/kernel" + suffix,
       serialized_graph.nodes[
           optimizer_node.slot_variables[0]
           .original_variable_node_id].attributes[0].checkpoint_key)
   self.assertEqual("m", optimizer_node.slot_variables[0].slot_name)
   self.assertEqual(
       "model/_named_dense/kernel/.OPTIMIZER_SLOT/optimizer/m" + suffix,
       serialized_graph.nodes[
           optimizer_node.slot_variables[0]
           .slot_variable_node_id].attributes[0].checkpoint_key)