def testDeferredSlotRestoration(self): checkpoint_directory = self.get_temp_dir() root = trackable_utils.Checkpoint() root.var = trackable_utils.add_variable( root, name="var", initializer=0.) optimizer = adam.AdamOptimizer(0.1) if context.executing_eagerly(): optimizer.minimize(root.var.read_value) else: train_op = optimizer.minimize(root.var) # Note that `optimizer` has not been added as a dependency of # `root`. Create a one-off grouping so that slot variables for `root.var` # get initialized too. self.evaluate(trackable_utils.gather_initializers( trackable_utils.Checkpoint(root=root, optimizer=optimizer))) self.evaluate(train_op) self.evaluate(state_ops.assign(root.var, 12.)) no_slots_path = root.save(os.path.join(checkpoint_directory, "no_slots")) root.optimizer = optimizer self.evaluate(state_ops.assign(root.var, 13.)) self.evaluate(state_ops.assign(optimizer.get_slot(name="m", var=root.var), 14.)) slots_path = root.save(os.path.join(checkpoint_directory, "with_slots")) new_root = trackable_utils.Checkpoint() # Load the slot-containing checkpoint (deferred), then immediately overwrite # the non-slot variable (also deferred). slot_status = new_root.restore(slots_path) no_slot_status = new_root.restore(no_slots_path) with self.assertRaises(AssertionError): no_slot_status.assert_consumed() new_root.var = trackable_utils.add_variable( new_root, name="var", shape=[]) no_slot_status.assert_consumed() no_slot_status.run_restore_ops() self.assertEqual(12., self.evaluate(new_root.var)) new_root.optimizer = adam.AdamOptimizer(0.1) slot_status.assert_existing_objects_matched() with self.assertRaisesRegexp(AssertionError, "beta1_power"): slot_status.assert_consumed() self.assertEqual(12., self.evaluate(new_root.var)) if context.executing_eagerly(): # Slot variables are only created with restoring initializers when # executing eagerly. self.assertEqual(14., self.evaluate( new_root.optimizer.get_slot(name="m", var=new_root.var))) else: self.assertIs(new_root.optimizer.get_slot(name="m", var=new_root.var), None) if context.executing_eagerly(): new_root.optimizer.minimize(new_root.var.read_value) else: train_op = new_root.optimizer.minimize(new_root.var) # The slot variable now exists; restore() didn't create it, but we should # now have a restore op for it. slot_status.run_restore_ops() self.assertEqual(14., self.evaluate( new_root.optimizer.get_slot(name="m", var=new_root.var))) self.evaluate(train_op) slot_status.assert_consumed()
def testAmbiguousLoad(self): # Not OK to split one checkpoint object into two checkpoint_directory = self.get_temp_dir() save_root = trackable_utils.Checkpoint() save_root.dep_one = tracking.AutoTrackable() save_root.dep_two = tracking.AutoTrackable() dep_three = tracking.AutoTrackable() save_root.dep_one.dep_three = dep_three save_root.dep_two.dep_three = dep_three trackable_utils.add_variable(dep_three, name="var", initializer=0.) self.evaluate(trackable_utils.gather_initializers(save_root)) save_path = save_root.save(os.path.join(checkpoint_directory, "ckpt")) load_root = trackable_utils.Checkpoint() status = load_root.restore(save_path) load_root.dep_one = tracking.AutoTrackable() load_root.dep_two = tracking.AutoTrackable() load_root.dep_one.dep_three = tracking.AutoTrackable() load_root.dep_two.dep_three = tracking.AutoTrackable() trackable_utils.add_variable(load_root.dep_one.dep_three, name="var", initializer=0.) trackable_utils.add_variable(load_root.dep_two.dep_three, name="var", initializer=0.) with self.assertRaises(AssertionError): status.assert_consumed() with self.assertRaises(AssertionError): status.assert_existing_objects_matched()
def testObjectsCombined(self): # Currently fine to load two checkpoint objects into one Python object checkpoint_directory = self.get_temp_dir() save_root = trackable_utils.Checkpoint() save_root.dep_one = tracking.AutoTrackable() save_root.dep_two = tracking.AutoTrackable() trackable_utils.add_variable(save_root.dep_one, name="var1", initializer=32., dtype=dtypes.float64) trackable_utils.add_variable(save_root.dep_two, name="var2", initializer=64., dtype=dtypes.float64) self.evaluate(trackable_utils.gather_initializers(save_root)) save_path = save_root.save(os.path.join(checkpoint_directory, "ckpt")) load_root = trackable_utils.Checkpoint() load_root.dep_one = tracking.AutoTrackable() load_root.dep_two = load_root.dep_one v1 = trackable_utils.add_variable(load_root.dep_one, name="var1", shape=[], dtype=dtypes.float64) v2 = trackable_utils.add_variable(load_root.dep_one, name="var2", shape=[], dtype=dtypes.float64) status = load_root.restore( save_path).assert_consumed().assert_existing_objects_matched() status.run_restore_ops() self.assertEqual(32., self.evaluate(v1)) self.assertEqual(64., self.evaluate(v2))
def testDeferredSlotRestoration(self): checkpoint_directory = self.get_temp_dir() root = trackable_utils.Checkpoint() root.var = trackable_utils.add_variable( root, name="var", initializer=0.) optimizer = adam.AdamOptimizer(0.1) if context.executing_eagerly(): optimizer.minimize(root.var.read_value) else: train_op = optimizer.minimize(root.var) # Note that `optimizer` has not been added as a dependency of # `root`. Create a one-off grouping so that slot variables for `root.var` # get initialized too. self.evaluate(trackable_utils.gather_initializers( trackable_utils.Checkpoint(root=root, optimizer=optimizer))) self.evaluate(train_op) self.evaluate(state_ops.assign(root.var, 12.)) no_slots_path = root.save(os.path.join(checkpoint_directory, "no_slots")) root.optimizer = optimizer self.evaluate(state_ops.assign(root.var, 13.)) self.evaluate(state_ops.assign(optimizer.get_slot(name="m", var=root.var), 14.)) slots_path = root.save(os.path.join(checkpoint_directory, "with_slots")) new_root = trackable_utils.Checkpoint() # Load the slot-containing checkpoint (deferred), then immediately overwrite # the non-slot variable (also deferred). slot_status = new_root.restore(slots_path) no_slot_status = new_root.restore(no_slots_path) with self.assertRaises(AssertionError): no_slot_status.assert_consumed() new_root.var = trackable_utils.add_variable( new_root, name="var", shape=[]) no_slot_status.assert_consumed() no_slot_status.run_restore_ops() self.assertEqual(12., self.evaluate(new_root.var)) new_root.optimizer = adam.AdamOptimizer(0.1) slot_status.assert_existing_objects_matched() with self.assertRaisesRegex(AssertionError, "beta1_power"): slot_status.assert_consumed() self.assertEqual(12., self.evaluate(new_root.var)) if context.executing_eagerly(): # Slot variables are only created with restoring initializers when # executing eagerly. self.assertEqual(14., self.evaluate( new_root.optimizer.get_slot(name="m", var=new_root.var))) else: self.assertIs(new_root.optimizer.get_slot(name="m", var=new_root.var), None) if context.executing_eagerly(): new_root.optimizer.minimize(new_root.var.read_value) else: train_op = new_root.optimizer.minimize(new_root.var) # The slot variable now exists; restore() didn't create it, but we should # now have a restore op for it. slot_status.run_restore_ops() self.assertEqual(14., self.evaluate( new_root.optimizer.get_slot(name="m", var=new_root.var))) self.evaluate(train_op) slot_status.assert_consumed()
def testAddVariable(self): obj = NonLayerTrackable() with self.assertRaisesRegex(ValueError, "do not specify shape"): trackable_utils.add_variable( obj, name="shape_specified_twice", shape=[], initializer=1) constant_initializer = trackable_utils.add_variable( obj, name="constant_initializer", initializer=1) with variable_scope.variable_scope("some_variable_scope"): ones_initializer = trackable_utils.add_variable( obj, name="ones_initializer", shape=[2], initializer=init_ops.ones_initializer(dtype=dtypes.float32)) bare_initializer = trackable_utils.add_variable( obj, name="bare_initializer", shape=[2, 2], dtype=dtypes.float64, initializer=init_ops.zeros_initializer) # Even in graph mode, there are no naming conflicts between objects, only # naming conflicts within an object. other_duplicate = resource_variable_ops.ResourceVariable( name="duplicate", initial_value=1.) duplicate = trackable_utils.add_variable( obj, name="duplicate", shape=[]) with self.assertRaisesRegex(ValueError, "'duplicate'.*already declared"): trackable_utils.add_variable(obj, name="duplicate", shape=[]) self.evaluate(trackable_utils.gather_initializers(obj)) self.assertEqual("constant_initializer:0", constant_initializer.name) self.assertEqual(1, self.evaluate(constant_initializer)) self.assertEqual("some_variable_scope/ones_initializer:0", ones_initializer.name) self.assertAllEqual([1, 1], self.evaluate(ones_initializer)) self.assertAllEqual([[0., 0.], [0., 0.]], self.evaluate(bare_initializer)) self.assertEqual("a_variable:0", obj.a_variable.name) self.assertEqual("duplicate:0", other_duplicate.name) if context.executing_eagerly(): # When executing eagerly, there's no uniquification of variable names. The # checkpoint name will be the same. self.assertEqual("duplicate:0", duplicate.name) else: # The .name attribute may be globally influenced, but the checkpoint name # won't be (tested below). self.assertEqual("duplicate_1:0", duplicate.name) named_variables, _, _ = ( graph_view.ObjectGraphView(obj).serialize_object_graph()) expected_checkpoint_names = ( "a_variable/.ATTRIBUTES/VARIABLE_VALUE", "bare_initializer/.ATTRIBUTES/VARIABLE_VALUE", "constant_initializer/.ATTRIBUTES/VARIABLE_VALUE", "duplicate/.ATTRIBUTES/VARIABLE_VALUE", "ones_initializer/.ATTRIBUTES/VARIABLE_VALUE", ) six.assertCountEqual( self, expected_checkpoint_names, [v.name for v in named_variables])
def testMultipleGraphsNonSlotVariables(self): with context.graph_mode(): checkpoint_directory = self.get_temp_dir() checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") optimizer = adam.AdamOptimizer(0.001) # Construct a model in one graph first_graph = ops.Graph() first_session = session_lib.Session(graph=first_graph) with first_graph.as_default(), first_session.as_default(): first_variable = resource_variable_ops.ResourceVariable([1.]) first_root_trackable = util.Checkpoint( optimizer=optimizer, variable=first_variable) train_op = optimizer.minimize(first_variable.read_value) self.evaluate(util.gather_initializers( first_root_trackable)) self.evaluate(train_op) self.evaluate(first_variable.assign([1.])) self.evaluate(optimizer.get_slot( var=first_variable, name="m").assign([2.])) beta_1_power, _ = optimizer._get_beta_accumulators() self.evaluate(beta_1_power.assign(3.)) # Save and load in a second graph second_graph = ops.Graph() with second_graph.as_default(), session_lib.Session(graph=second_graph): second_variable = resource_variable_ops.ResourceVariable([1.]) second_root_trackable = util.Checkpoint( optimizer=optimizer, variable=second_variable) train_op = optimizer.minimize(second_variable.read_value) second_root_trackable.restore(None).initialize_or_restore() self.evaluate(train_op) self.evaluate(second_variable.assign([4.])) self.evaluate(optimizer.get_slot( var=second_variable, name="m").assign([5.])) beta_1_power, _ = optimizer._get_beta_accumulators() self.evaluate(beta_1_power.assign(6.)) save_path = second_root_trackable.save(checkpoint_prefix) self.evaluate(second_variable.assign([7.])) self.evaluate(optimizer.get_slot( var=second_variable, name="m").assign([8.])) beta_1_power, _ = optimizer._get_beta_accumulators() self.assertAllEqual(6., self.evaluate(beta_1_power)) status = second_root_trackable.restore(save_path) status.assert_consumed().run_restore_ops() self.assertAllEqual([4.], self.evaluate(second_variable)) self.assertAllEqual([5.], self.evaluate(optimizer.get_slot( var=second_variable, name="m"))) beta_1_power, _ = optimizer._get_beta_accumulators() self.assertAllEqual(6., self.evaluate(beta_1_power)) # Check that the first graph is unmolested with first_graph.as_default(), first_session.as_default(): self.assertAllEqual([1.], self.evaluate(first_variable)) self.assertAllEqual([2.], self.evaluate(optimizer.get_slot( var=first_variable, name="m"))) beta_1_power, _ = optimizer._get_beta_accumulators() self.assertAllEqual(3., self.evaluate(beta_1_power))
def testAssertConsumedNoCheckpoint(self): prefix = os.path.join(self.get_temp_dir(), "ckpt") v = variable_scope.get_variable(name="v", initializer=0.) self.evaluate(v.initializer) ckpt = trackable_utils.Checkpoint(v=v) self.evaluate(trackable_utils.gather_initializers(ckpt)) save_path = ckpt.save(file_prefix=prefix) status = ckpt.restore(save_path=save_path) del ckpt status.assert_consumed()
def testAssertConsumedNoCheckpoint(self, enable_async_ckpt): if enable_async_ckpt and not context.executing_eagerly(): self.skipTest( "Skipping this test as async checkpoint does not support graph mode.") prefix = os.path.join(self.get_temp_dir(), "ckpt") v = variable_scope.get_variable(name="v", initializer=0.) self.evaluate(v.initializer) ckpt = trackable_utils.Checkpoint(v=v) self.evaluate(trackable_utils.gather_initializers(ckpt)) ckpt_options = checkpoint_options.CheckpointOptions( experimental_enable_async_checkpoint=enable_async_ckpt) save_path = ckpt.save(file_prefix=prefix, options=ckpt_options) status = ckpt.restore(save_path=save_path) del ckpt status.assert_consumed()
def testCheckpointState(self): # No checkpoints are deleted by default checkpoint_directory = self.get_temp_dir() checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") obj = tracking.AutoTrackable() obj.var = variable_scope.get_variable(name="v", initializer=0.) self.evaluate(trackable_utils.gather_initializers(obj)) saver = trackable_utils.Checkpoint(obj=obj) for _ in range(10): saver.save(checkpoint_prefix) expected_filenames = ["checkpoint"] for checkpoint_number in range(1, 11): expected_filenames.append("ckpt-%d.index" % (checkpoint_number, )) self.assertEmpty( set(expected_filenames) - set(os.listdir(checkpoint_directory)))
def testManySavesGraph(self): """Saves after the first should not modify the graph.""" with context.graph_mode(): graph = ops.Graph() with graph.as_default(), self.session(graph): checkpoint_directory = self.get_temp_dir() checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") obj = util.Checkpoint() obj.var = variable_scope.get_variable(name="v", initializer=0.) obj.opt = adam.AdamOptimizer(0.1) obj.opt.minimize(obj.var.read_value()) self.evaluate(util.gather_initializers(obj)) obj.save(checkpoint_prefix) before_ops = graph.get_operations() obj.save(checkpoint_prefix) self.assertEqual(before_ops, graph.get_operations())
def testManySavesGraph(self): """Saves after the first should not modify the graph.""" with context.graph_mode(): graph = tf.Graph() with graph.as_default(), self.session(graph): checkpoint_directory = self.get_temp_dir() checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") obj = tf.train.Checkpoint() obj.var = tf.Variable(0., name="v") obj.opt = adam.Adam(0.1) variables = [obj.var] gradients = [1.] obj.opt.apply_gradients(zip(gradients, variables)) self.evaluate(trackable_utils.gather_initializers(obj)) obj.save(checkpoint_prefix) graph.finalize() obj.save(checkpoint_prefix)
def testDependencyLoop(self): # Note: this test creates garbage during eager execution because it # purposefully creates a reference cycle. first = trackable_utils.Checkpoint() second = trackable_utils.Checkpoint() first.second = second second.first = first first.v = trackable_utils.add_variable(first, "v1", initializer=[3., 1., 4.]) second.v = trackable_utils.add_variable(second, "v2", initializer=[1., 1., 2., 3.]) self.evaluate(trackable_utils.gather_initializers(first)) checkpoint_directory = self.get_temp_dir() save_path = first.save(os.path.join(checkpoint_directory, "ckpt")) # Test deferred loading first_load = trackable_utils.Checkpoint() status = first_load.restore(save_path) second_load = tracking.AutoTrackable() first_load.second = second_load second_load.first = first_load with self.assertRaises(AssertionError): status.assert_consumed() first_load.v = trackable_utils.add_variable(first_load, "v1", shape=[3]) second_load.v = trackable_utils.add_variable(second_load, "v2", shape=[4]) status.assert_consumed() status.run_restore_ops() self.assertAllEqual([3., 1., 4.], self.evaluate(first_load.v)) self.assertAllEqual([1., 1., 2., 3.], self.evaluate(second_load.v)) # Test loading when variables have already been created self.evaluate(first_load.v.assign([2., 7., 1.])) self.assertAllEqual([2., 7., 1.], self.evaluate(first_load.v)) self.evaluate(second_load.v.assign([2., 7., 1., 8.])) self.assertAllEqual([2., 7., 1., 8.], self.evaluate(second_load.v)) status = first_load.restore(save_path).assert_consumed() status.run_restore_ops() self.assertAllEqual([3., 1., 4.], self.evaluate(first_load.v)) self.assertAllEqual([1., 1., 2., 3.], self.evaluate(second_load.v))
def testCheckpointStateChangingVarList(self): checkpoint_directory = self.get_temp_dir() checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") obj = tracking.AutoTrackable() obj.var = variable_scope.get_variable(name="v", initializer=0.) self.evaluate(trackable_utils.gather_initializers(obj)) checkpoint = trackable_utils.Checkpoint(obj=obj) looped_variables = [] for iteration in range(10): new_variable = resource_variable_ops.ResourceVariable(iteration) self.evaluate(new_variable.initializer) setattr(checkpoint, "var_%d" % iteration, new_variable) checkpoint.save(checkpoint_prefix) looped_variables.append(new_variable) expected_filenames = ["checkpoint"] # We've copied the saver each time, but checkpoint management should still # be consistent. Nothing gets deleted. for checkpoint_number in range(1, 11): expected_filenames.append("ckpt-%d.index" % (checkpoint_number,)) self.assertEmpty( set(expected_filenames) - set(os.listdir(checkpoint_directory))) self.assertEqual( checkpoint_prefix + "-10", checkpoint_management.latest_checkpoint(checkpoint_directory)) # The checkpoint list only contains the most recent checkpoint, but they're # all on disk. This means we won't eventually run into proto size limits. self.assertEqual( [checkpoint_prefix + "-10"], (checkpoint_management.get_checkpoint_state(checkpoint_directory) .all_model_checkpoint_paths)) for v in looped_variables: self.evaluate(v.assign(314)) checkpoint.restore(checkpoint_prefix + "-6").run_restore_ops() self.assertEqual(314, self.evaluate(checkpoint.var_9)) self.assertEqual(314, self.evaluate(checkpoint.var_8)) self.assertEqual(314, self.evaluate(checkpoint.var_6)) self.assertEqual(5, self.evaluate(checkpoint.var_5)) self.assertEqual(1, self.evaluate(checkpoint.var_1)) self.assertEqual(0, self.evaluate(checkpoint.var_0)) checkpoint.restore(checkpoint_prefix + "-10").run_restore_ops() self.assertEqual(9, self.evaluate(checkpoint.var_9)) self.assertEqual(8, self.evaluate(checkpoint.var_8)) self.assertEqual(1, self.evaluate(checkpoint.var_1)) self.assertEqual(0, self.evaluate(checkpoint.var_0))
def testPassingCheckpointOptions(self): localhost = "/job:localhost/device:CPU:0" options = checkpoint_options.CheckpointOptions( experimental_io_device=localhost) prefix = os.path.join(self.get_temp_dir(), "ckpt") v = variable_scope.get_variable(name="v", initializer=0.) self.evaluate(v.initializer) ckpt = trackable_utils.Checkpoint(v=v) self.evaluate(trackable_utils.gather_initializers(ckpt)) save_path = ckpt.save(file_prefix=prefix, options=options) status = ckpt.restore(save_path=save_path, options=options) del ckpt status.assert_consumed() # In graph mode, verify that the save and restore ops were set to run on # localhost. if not context.executing_eagerly(): for op in ops.get_default_graph().get_operations(): if op.type in ("SaveV2", "RestoreV2"): self.assertEqual(localhost, op.device)
def _initialized_model(self): input_value = tf.constant([[3.]]) model = MyModel() optimizer = adam.Adam(0.001) root_trackable = tf.train.Checkpoint(optimizer=optimizer, model=model) with tf.GradientTape() as tape: loss = model(input_value) variables = model.trainable_variables gradients = tape.gradient(loss, variables) train_op = optimizer.apply_gradients(zip(gradients, variables)) self.evaluate(trackable_utils.gather_initializers(root_trackable)) self.evaluate(train_op) # A regular variable, a slot variable, and a non-slot Optimizer variable # with known values to check when loading. self.evaluate(model._named_dense.bias.assign([1.])) self.evaluate( optimizer.get_slot(var=model._named_dense.bias, slot_name="m").assign([2.])) self.evaluate(optimizer.beta_1.assign(3.)) return root_trackable
def _initialized_model(self): input_value = constant_op.constant([[3.]]) model = MyModel() optimizer = adam.AdamOptimizer(0.001) optimizer_step = training_util.get_or_create_global_step() root_trackable = trackable_utils.Checkpoint( optimizer=optimizer, model=model, optimizer_step=optimizer_step) train_op = optimizer.minimize(functools.partial(model, input_value), global_step=optimizer_step) self.evaluate(trackable_utils.gather_initializers(root_trackable)) self.evaluate(train_op) # A regular variable, a slot variable, and a non-slot Optimizer variable # with known values to check when loading. self.evaluate(model._named_dense.bias.assign([1.])) self.evaluate( optimizer.get_slot(var=model._named_dense.bias, name="m").assign([2.])) beta1_power, _ = optimizer._get_beta_accumulators() self.evaluate(beta1_power.assign(3.)) return root_trackable
def _initialized_model(self): input_value = constant_op.constant([[3.]]) model = MyModel() optimizer = adam.AdamOptimizer(0.001) optimizer_step = training_util.get_or_create_global_step() root_trackable = util.Checkpoint( optimizer=optimizer, model=model, optimizer_step=optimizer_step) train_op = optimizer.minimize( functools.partial(model, input_value), global_step=optimizer_step) self.evaluate(util.gather_initializers( root_trackable)) self.evaluate(train_op) # A regular variable, a slot variable, and a non-slot Optimizer variable # with known values to check when loading. self.evaluate(model._named_dense.bias.assign([1.])) self.evaluate(optimizer.get_slot( var=model._named_dense.bias, name="m").assign([2.])) beta_1_power, _ = optimizer._get_beta_accumulators() self.evaluate(beta_1_power.assign(3.)) return root_trackable
def testNamingWithOptimizer(self): input_value = constant_op.constant([[3.]]) model = MyModel() # A nuisance Model using the same optimizer. Its slot variables should not # go in the checkpoint, since it is never depended on. other_model = MyModel() optimizer = adam.AdamOptimizer(0.001) optimizer_step = training_util.get_or_create_global_step() root_trackable = util.Checkpoint( optimizer=optimizer, model=model, optimizer_step=optimizer_step) if context.executing_eagerly(): optimizer.minimize( lambda: model(input_value), global_step=optimizer_step) optimizer.minimize( lambda: other_model(input_value), global_step=optimizer_step) else: train_op = optimizer.minimize( model(input_value), global_step=optimizer_step) optimizer.minimize( other_model(input_value), global_step=optimizer_step) self.evaluate(util.gather_initializers( root_trackable)) self.evaluate(train_op) named_variables, serialized_graph, _ = graph_view.ObjectGraphView( root_trackable).serialize_object_graph() expected_checkpoint_names = ( # Created in the root node, so no prefix. "optimizer_step", "model/_second/kernel", "model/_named_dense/kernel", "model/_named_dense/bias", # non-Layer dependency of the model "model/_non_layer/a_variable", # The optimizer creates two non-slot variables "optimizer/beta1_power", "optimizer/beta2_power", # Slot variables "model/_second/kernel/.OPTIMIZER_SLOT/optimizer/m", "model/_second/kernel/.OPTIMIZER_SLOT/optimizer/v", "model/_named_dense/kernel/.OPTIMIZER_SLOT/optimizer/m", "model/_named_dense/kernel/.OPTIMIZER_SLOT/optimizer/v", "model/_named_dense/bias/.OPTIMIZER_SLOT/optimizer/m", "model/_named_dense/bias/.OPTIMIZER_SLOT/optimizer/v", ) suffix = "/.ATTRIBUTES/VARIABLE_VALUE" expected_checkpoint_names = [ name + suffix for name in expected_checkpoint_names] # The optimizer and Dense layers also save get_config() JSON expected_checkpoint_names.extend([ "model/_second/.ATTRIBUTES/OBJECT_CONFIG_JSON", "model/_named_dense/.ATTRIBUTES/OBJECT_CONFIG_JSON" ]) named_variables = {v.name: v for v in named_variables} six.assertCountEqual(self, expected_checkpoint_names, named_variables.keys()) # Check that we've mapped to the right variable objects (not exhaustive) self.assertEqual( "global_step", named_variables["optimizer_step" + suffix].full_name) self.assertEqual( "my_model/dense_1/kernel", named_variables["model/_second/kernel" + suffix].full_name) self.assertEqual( "my_model/dense/kernel", named_variables["model/_named_dense/kernel" + suffix].full_name) self.assertEqual( "beta1_power", named_variables["optimizer/beta1_power" + suffix].full_name) self.assertEqual( "beta2_power", named_variables["optimizer/beta2_power" + suffix].full_name) # Spot check the generated protocol buffers. self.assertEqual("optimizer", serialized_graph.nodes[0].children[1].local_name) optimizer_node = serialized_graph.nodes[serialized_graph.nodes[0].children[ 1].node_id] self.assertEqual("beta1_power", optimizer_node.children[0].local_name) self.assertEqual( "beta1_power", serialized_graph.nodes[optimizer_node.children[0] .node_id].attributes[0].full_name) self.assertEqual( "my_model/dense/kernel", serialized_graph.nodes[optimizer_node.slot_variables[0] .original_variable_node_id] .attributes[0].full_name) # We strip off the :0 suffix, as variable.name-based saving does. self.assertEqual( "my_model/dense/kernel/Adam", serialized_graph.nodes[optimizer_node.slot_variables[0] .slot_variable_node_id] .attributes[0].full_name) self.assertEqual( "my_model/dense/kernel/Adam:0", optimizer.get_slot( var=model._named_dense.kernel, name="m").name) self.assertEqual( "model/_named_dense/kernel" + suffix, serialized_graph.nodes[ optimizer_node.slot_variables[0] .original_variable_node_id].attributes[0].checkpoint_key) self.assertEqual("m", optimizer_node.slot_variables[0].slot_name) self.assertEqual( "model/_named_dense/kernel/.OPTIMIZER_SLOT/optimizer/m" + suffix, serialized_graph.nodes[ optimizer_node.slot_variables[0] .slot_variable_node_id].attributes[0].checkpoint_key)
def testNamingWithOptimizer(self): input_value = tf.constant([[3.]]) model = MyModel() # A nuisance Model using the same optimizer. Its slot variables should not # go in the checkpoint, since it is never depended on. other_model = MyModel() optimizer = tf.compat.v1.train.AdamOptimizer(0.001) optimizer_step = tf.compat.v1.train.get_or_create_global_step() root_trackable = tf.train.Checkpoint(optimizer=optimizer, model=model, optimizer_step=optimizer_step) if tf.executing_eagerly(): optimizer.minimize(lambda: model(input_value), global_step=optimizer_step) optimizer.minimize(lambda: other_model(input_value), global_step=optimizer_step) else: train_op = optimizer.minimize(model(input_value), global_step=optimizer_step) optimizer.minimize(other_model(input_value), global_step=optimizer_step) self.evaluate(trackable_utils.gather_initializers(root_trackable)) self.evaluate(train_op) named_variables, serialized_graph, _ = tf.__internal__.tracking.ObjectGraphView( root_trackable).serialize_object_graph() expected_checkpoint_names = ( # Created in the root node, so no prefix. "optimizer_step", "model/_second/kernel", "model/_named_dense/kernel", "model/_named_dense/bias", # non-Layer dependency of the model "model/_non_layer/a_variable", # The optimizer creates two non-slot variables "optimizer/beta1_power", "optimizer/beta2_power", # Slot variables "model/_second/kernel/.OPTIMIZER_SLOT/optimizer/m", "model/_second/kernel/.OPTIMIZER_SLOT/optimizer/v", "model/_named_dense/kernel/.OPTIMIZER_SLOT/optimizer/m", "model/_named_dense/kernel/.OPTIMIZER_SLOT/optimizer/v", "model/_named_dense/bias/.OPTIMIZER_SLOT/optimizer/m", "model/_named_dense/bias/.OPTIMIZER_SLOT/optimizer/v", ) suffix = "/.ATTRIBUTES/VARIABLE_VALUE" expected_checkpoint_names = [ name + suffix for name in expected_checkpoint_names ] named_variables = {v.name: v for v in named_variables} self.assertEqual(len(expected_checkpoint_names), len(named_variables.keys())) # Check that we've created the right full_names of objects (not exhaustive) expected_names = { "optimizer_step" + suffix: "global_step", "model/_second/kernel" + suffix: "my_model/dense_1/kernel", "model/_named_dense/kernel" + suffix: "my_model/dense/kernel", "optimizer/beta1_power" + suffix: "beta1_power", "optimizer/beta2_power" + suffix: "beta2_power", } for nodes in serialized_graph.nodes: for attribute in nodes.attributes: expected_name = expected_names.pop(attribute.checkpoint_key, None) if expected_name is not None: self.assertEqual(expected_name, attribute.full_name) self.assertEmpty(expected_names) # Spot check the generated protocol buffers. self.assertEqual("optimizer", serialized_graph.nodes[0].children[1].local_name) optimizer_node = serialized_graph.nodes[ serialized_graph.nodes[0].children[1].node_id] self.assertEqual("beta1_power", optimizer_node.children[0].local_name) self.assertEqual( "beta1_power", serialized_graph.nodes[ optimizer_node.children[0].node_id].attributes[0].full_name) self.assertEqual( "my_model/dense/kernel", serialized_graph.nodes[optimizer_node.slot_variables[ 0].original_variable_node_id].attributes[0].full_name) # We strip off the :0 suffix, as variable.name-based saving does. self.assertEqual( "my_model/dense/kernel/Adam", serialized_graph.nodes[optimizer_node.slot_variables[ 0].slot_variable_node_id].attributes[0].full_name) self.assertEqual( "my_model/dense/kernel/Adam:0", optimizer.get_slot(var=model._named_dense.kernel, name="m").name) self.assertEqual( "model/_named_dense/kernel" + suffix, serialized_graph.nodes[optimizer_node.slot_variables[ 0].original_variable_node_id].attributes[0].checkpoint_key) self.assertEqual("m", optimizer_node.slot_variables[0].slot_name) self.assertEqual( "model/_named_dense/kernel/.OPTIMIZER_SLOT/optimizer/m" + suffix, serialized_graph.nodes[optimizer_node.slot_variables[ 0].slot_variable_node_id].attributes[0].checkpoint_key)
def testSaveRestore(self): with self.test_session(): model = MyModel() optimizer = adam.Adam(0.001) root_trackable = tf.train.Checkpoint(optimizer=optimizer, model=model) input_value = tf.constant([[3.]]) with tf.GradientTape() as tape: loss = model(input_value) variables = model.trainable_variables gradients = tape.gradient(loss, variables) train_op = optimizer.apply_gradients(zip(gradients, variables)) self.assertFalse(root_trackable.save_counter.trainable) self.evaluate(trackable_utils.gather_initializers(root_trackable)) self.evaluate(train_op) prefix = os.path.join(self.get_temp_dir(), "ckpt") self.evaluate( tf.compat.v1.assign(model._named_dense.variables[1], [42.])) m_bias_slot = optimizer.get_slot(model._named_dense.variables[1], "m") self.evaluate(tf.compat.v1.assign(m_bias_slot, [1.5])) save_path = root_trackable.save(file_prefix=prefix) self.evaluate( tf.compat.v1.assign(model._named_dense.variables[1], [43.])) self.evaluate(tf.compat.v1.assign(root_trackable.save_counter, 3)) optimizer_variables = self.evaluate( sorted(optimizer.variables(), key=lambda v: v.name)) self.evaluate(tf.compat.v1.assign(m_bias_slot, [-2.])) # Immediate restoration status = root_trackable.restore( save_path=save_path).assert_consumed() status.run_restore_ops() self.assertAllEqual([42.], self.evaluate(model._named_dense.variables[1])) self.assertAllEqual(1, self.evaluate(root_trackable.save_counter)) self.assertAllEqual([1.5], self.evaluate(m_bias_slot)) if not tf.executing_eagerly(): return # Restore-on-create is only supported when executing eagerly on_create_model = MyModel() on_create_optimizer = adam.Adam(0.001) on_create_root = tf.train.Checkpoint(optimizer=on_create_optimizer, model=on_create_model) # Deferred restoration status = on_create_root.restore(save_path=save_path) status.assert_nontrivial_match() status.assert_existing_objects_matched() with self.assertRaises(AssertionError): status.assert_consumed() on_create_model(tf.constant([[3.]])) # create variables self.assertAllEqual(1, self.evaluate(on_create_root.save_counter)) self.assertAllEqual([42.], self.evaluate( on_create_model._named_dense.variables[1])) on_create_m_bias_slot = on_create_optimizer.get_slot( on_create_model._named_dense.variables[1], "m") status.assert_existing_objects_matched() if not tf.executing_eagerly(): with self.assertRaises(AssertionError): status.assert_consumed() # Optimizer slot variables are created when the original variable is # restored. self.assertAllEqual([1.5], self.evaluate(on_create_m_bias_slot)) dummy_var = tf.Variable([1.]) on_create_optimizer.minimize(loss=dummy_var.read_value, var_list=[dummy_var]) status.assert_existing_objects_matched() status.assert_consumed() self.assertAllEqual( optimizer_variables, # Creation order is different, so .variables() needs to be re-sorted. self.evaluate( sorted(optimizer.variables(), key=lambda v: v.name)))
def testSaveRestore(self): model = MyModel() optimizer = adam.AdamOptimizer(0.001) root_trackable = util.Checkpoint( optimizer=optimizer, model=model) input_value = constant_op.constant([[3.]]) if context.executing_eagerly(): optimizer.minimize( lambda: model(input_value)) else: train_op = optimizer.minimize(model(input_value)) # TODO(allenl): Make initialization more pleasant when graph building. root_trackable.save_counter # pylint: disable=pointless-statement self.evaluate(util.gather_initializers( root_trackable)) self.evaluate(train_op) prefix = os.path.join(self.get_temp_dir(), "ckpt") self.evaluate(state_ops.assign(model._named_dense.variables[1], [42.])) m_bias_slot = optimizer.get_slot(model._named_dense.variables[1], "m") self.evaluate(state_ops.assign(m_bias_slot, [1.5])) save_path = root_trackable.save(file_prefix=prefix) self.evaluate(state_ops.assign(model._named_dense.variables[1], [43.])) self.evaluate(state_ops.assign(root_trackable.save_counter, 3)) optimizer_variables = self.evaluate(optimizer.variables()) self.evaluate(state_ops.assign(m_bias_slot, [-2.])) # Immediate restoration status = root_trackable.restore(save_path=save_path).assert_consumed() status.run_restore_ops() self.assertAllEqual([42.], self.evaluate(model._named_dense.variables[1])) self.assertAllEqual(1, self.evaluate(root_trackable.save_counter)) self.assertAllEqual([1.5], self.evaluate(m_bias_slot)) if not context.executing_eagerly(): return # Restore-on-create is only supported when executing eagerly on_create_model = MyModel() on_create_optimizer = adam.AdamOptimizer( 0.001, # Preserve beta_1_power and beta_2_power when appying gradients # so we can test that they've been restored correctly. beta1=1.0, beta2=1.0) on_create_root = util.Checkpoint( optimizer=on_create_optimizer, model=on_create_model) # Deferred restoration status = on_create_root.restore(save_path=save_path) on_create_model(constant_op.constant([[3.]])) # create variables self.assertAllEqual(1, self.evaluate(on_create_root.save_counter)) self.assertAllEqual([42.], self.evaluate( on_create_model._named_dense.variables[1])) on_create_m_bias_slot = on_create_optimizer.get_slot( on_create_model._named_dense.variables[1], "m") # Optimizer slot variables are created when the original variable is # restored. self.assertAllEqual([1.5], self.evaluate(on_create_m_bias_slot)) self.assertAllEqual(optimizer_variables[2:], self.evaluate(on_create_optimizer.variables())) dummy_var = resource_variable_ops.ResourceVariable([1.]) on_create_optimizer.minimize(loss=dummy_var.read_value) status.assert_consumed() beta_1_power, beta_2_power = on_create_optimizer._get_beta_accumulators() self.assertAllEqual(optimizer_variables[0], self.evaluate(beta_1_power)) self.assertAllEqual(optimizer_variables[1], self.evaluate(beta_2_power))
def testDeferredSlotRestoration(self): with self.test_session(): checkpoint_directory = self.get_temp_dir() root = tf.train.Checkpoint() root.var = trackable_utils.add_variable(root, name="var", initializer=0.) optimizer = adam.Adam(0.1) variables = [root.var] gradients = [1.] train_op = optimizer.apply_gradients(zip(gradients, variables)) # Note that `optimizer` has not been added as a dependency of # `root`. Create a one-off grouping so that slot variables for `root.var` # get initialized too. self.evaluate( trackable_utils.gather_initializers( tf.train.Checkpoint(root=root, optimizer=optimizer))) self.evaluate(train_op) self.evaluate(tf.compat.v1.assign(root.var, 12.)) no_slots_path = root.save( os.path.join(checkpoint_directory, "no_slots")) root.optimizer = optimizer self.evaluate(tf.compat.v1.assign(root.var, 13.)) self.evaluate( tf.compat.v1.assign( optimizer.get_slot(slot_name="m", var=root.var), 14.)) slots_path = root.save( os.path.join(checkpoint_directory, "with_slots")) new_root = tf.train.Checkpoint() # Load the slot-containing checkpoint (deferred), then immediately # overwrite the non-slot variable (also deferred). slot_status = new_root.restore(slots_path) no_slot_status = new_root.restore(no_slots_path) with self.assertRaises(AssertionError): no_slot_status.assert_consumed() new_root.var = trackable_utils.add_variable(new_root, name="var", shape=[]) no_slot_status.assert_consumed() no_slot_status.run_restore_ops() self.assertEqual(12., self.evaluate(new_root.var)) new_root.optimizer = adam.Adam(0.1) slot_status.assert_existing_objects_matched() if not tf.executing_eagerly(): with self.assertRaisesRegex(AssertionError, "Unresolved object"): slot_status.assert_consumed() self.assertEqual(12., self.evaluate(new_root.var)) if tf.executing_eagerly(): # Slot variables are only created with restoring initializers when # executing eagerly. self.assertEqual( 14., self.evaluate( new_root.optimizer.get_slot(slot_name="m", var=new_root.var))) else: # Slot variables are not created eagerly when graph building. with self.assertRaises(KeyError): new_root.optimizer.get_slot(slot_name="m", var=new_root.var) variables = [new_root.var] gradients = [1.] train_op = new_root.optimizer.apply_gradients( zip(gradients, variables)) # The slot variable now exists; restore() didn't create it, but we should # now have a restore op for it. slot_status.run_restore_ops() if not tf.executing_eagerly(): # The train op hasn't run when graph building, so the slot variable has # its restored value. It has run in eager, so the value will # be different. self.assertEqual( 14., self.evaluate( new_root.optimizer.get_slot(slot_name="m", var=new_root.var))) self.evaluate(train_op) slot_status.assert_consumed()
def testNamingWithOptimizer(self): input_value = tf.constant([[3.]]) model = MyModel() # A nuisance Model using the same optimizer. Its slot variables should not # go in the checkpoint, since it is never depended on. other_model = MyModel() optimizer = adam.Adam(0.001) step = tf.compat.v1.train.get_or_create_global_step() root_trackable = tf.train.Checkpoint(optimizer=optimizer, model=model, step=step) with tf.GradientTape() as tape: loss = model(input_value) variables = model.trainable_variables gradients = tape.gradient(loss, variables) train_op = tf.group( optimizer.apply_gradients(zip(gradients, variables)), step.assign_add(1)) with tf.GradientTape() as tape: loss = other_model(input_value) variables = other_model.trainable_variables gradients = tape.gradient(loss, variables) optimizer.apply_gradients(zip(gradients, variables)) self.evaluate(trackable_utils.gather_initializers(root_trackable)) self.evaluate(train_op) named_variables, serialized_graph, _ = graph_view.ObjectGraphView( root_trackable).serialize_object_graph() expected_slot_keys = ( "model/_second/kernel/.OPTIMIZER_SLOT/optimizer/m", "model/_second/kernel/.OPTIMIZER_SLOT/optimizer/v", "model/_named_dense/kernel/.OPTIMIZER_SLOT/optimizer/m", "model/_named_dense/kernel/.OPTIMIZER_SLOT/optimizer/v", "model/_named_dense/bias/.OPTIMIZER_SLOT/optimizer/m", "model/_named_dense/bias/.OPTIMIZER_SLOT/optimizer/v", ) expected_checkpoint_names = ( # Created in the root node, so no prefix. "step", "model/_second/kernel", "model/_named_dense/kernel", "model/_named_dense/bias", # non-Layer dependency of the model "model/_non_layer/a_variable", "optimizer/learning_rate", "optimizer/beta_1", "optimizer/beta_2", "optimizer/iter", "optimizer/decay", ) + expected_slot_keys suffix = "/.ATTRIBUTES/VARIABLE_VALUE" expected_checkpoint_names = [ name + suffix for name in expected_checkpoint_names ] named_variables = {v.name: v for v in named_variables} self.assertEqual(len(expected_checkpoint_names), len(named_variables.keys())) # Check that we've mapped to the right variable objects (not exhaustive) self.assertEqual("global_step", named_variables["step" + suffix].full_name) self.assertEqual( "my_model/dense_1/kernel", named_variables["model/_second/kernel" + suffix].full_name) self.assertEqual( "my_model/dense/kernel", named_variables["model/_named_dense/kernel" + suffix].full_name) self.assertEqual( "Adam/beta_1", named_variables["optimizer/beta_1" + suffix].full_name) self.assertEqual( "Adam/beta_2", named_variables["optimizer/beta_2" + suffix].full_name) # Spot check the generated protocol buffers. self.assertEqual("optimizer", serialized_graph.nodes[0].children[1].local_name) optimizer_node = serialized_graph.nodes[ serialized_graph.nodes[0].children[1].node_id] children = [node.local_name for node in optimizer_node.children] self.assertEqual( # hyper variable dependencies len(["beta_1", "beta_2", "iter", "decay", "learning_rate"]), len(children)) serialized_slot_keys = [] for slot in optimizer_node.slot_variables: for attribute in (serialized_graph.nodes[ slot.slot_variable_node_id].attributes): serialized_slot_keys.append(attribute.checkpoint_key) self.assertEqual(len([key + suffix for key in expected_slot_keys]), len(serialized_slot_keys))