def _save_first_checkpoint(keras_model, estimator, custom_objects, keras_weights): """Save first checkpoint for the keras Estimator. Args: keras_model: an instance of compiled keras model. estimator: keras estimator. custom_objects: Dictionary for custom objects. keras_weights: A flat list of Numpy arrays for weights of given keras_model. Returns: The model_fn for a keras Estimator. """ # Load weights and save to checkpoint if there is no checkpoint latest_path = saver_lib.latest_checkpoint(estimator.model_dir) if not latest_path: with ops.Graph().as_default(): random_seed.set_random_seed(estimator.config.tf_random_seed) training_util.create_global_step() model = _clone_and_build_model(model_fn_lib.ModeKeys.TRAIN, keras_model, custom_objects) # save to checkpoint with session.Session(config=estimator._session_config) as sess: if keras_weights: model.set_weights(keras_weights) # Make update ops and initialize all variables. if not model.train_function: # pylint: disable=protected-access model._make_train_function() K._initialize_variables(sess) # pylint: enable=protected-access saver = saver_lib.Saver() saver.save( sess, os.path.join(estimator.model_dir, 'keras_model.ckpt'))
def _test_logits(self, mode, rnn_units, logits_dimension, features_fn, sequence_feature_columns, context_feature_columns, expected_logits): """Tests that the expected logits are calculated.""" with ops.Graph().as_default(): # Global step needed for MonitoredSession, which is in turn used to # explicitly set variable weights through a checkpoint. training_util.create_global_step() # Use a variable scope here with 'rnn', emulating the rnn model_fn, so # the checkpoint naming is shared. with variable_scope.variable_scope('rnn'): input_layer_partitioner = ( partitioned_variables.min_max_variable_partitioner( max_partitions=0, min_slice_size=64 << 20)) logit_fn = rnn._rnn_logit_fn_builder( output_units=logits_dimension, rnn_cell_fn=rnn._make_rnn_cell_fn(rnn_units), sequence_feature_columns=sequence_feature_columns, context_feature_columns=context_feature_columns, input_layer_partitioner=input_layer_partitioner) # Features are constructed within this function, otherwise the Tensors # containing the features would be defined outside this graph. logits = logit_fn(features=features_fn(), mode=mode) with monitored_session.MonitoredTrainingSession( checkpoint_dir=self._model_dir) as sess: self.assertAllClose(expected_logits, sess.run(logits), atol=1e-4)
def _test_logits( self, mode, hidden_units, logits_dimension, inputs, expected_logits): """Tests that the expected logits are passed to mock head.""" with ops.Graph().as_default(): training_util.create_global_step() head = _mock_head( self, hidden_units=hidden_units, logits_dimension=logits_dimension, expected_logits=expected_logits) estimator_spec = dnn._dnn_model_fn( features={'age': constant_op.constant(inputs)}, labels=constant_op.constant([[1]]), mode=mode, head=head, hidden_units=hidden_units, feature_columns=[ feature_column.numeric_column('age', shape=np.array(inputs).shape[1:])], optimizer=_mock_optimizer(self, hidden_units)) with monitored_session.MonitoredTrainingSession( checkpoint_dir=self._model_dir) as sess: if mode == model_fn.ModeKeys.TRAIN: sess.run(estimator_spec.train_op) elif mode == model_fn.ModeKeys.EVAL: sess.run(estimator_spec.loss) elif mode == model_fn.ModeKeys.PREDICT: sess.run(estimator_spec.predictions) else: self.fail('Invalid mode: {}'.format(mode))
def _save_first_checkpoint(keras_model, estimator, custom_objects, keras_weights): """Save first checkpoint for the keras Estimator. Args: keras_model: an instance of compiled keras model. estimator: keras estimator. custom_objects: Dictionary for custom objects. keras_weights: A flat list of Numpy arrays for weights of given keras_model. Returns: The model_fn for a keras Estimator. """ # Load weights and save to checkpoint if there is no checkpoint latest_path = saver_lib.latest_checkpoint(estimator.model_dir) if not latest_path: with ops.Graph().as_default(): random_seed.set_random_seed(estimator.config.tf_random_seed) training_util.create_global_step() model = _clone_and_build_model(model_fn_lib.ModeKeys.TRAIN, keras_model, custom_objects) # save to checkpoint with session.Session(config=estimator._session_config) as sess: if keras_weights: model.set_weights(keras_weights) # Make update ops and initialize all variables. if not model.train_function: # pylint: disable=protected-access model._make_train_function() K._initialize_variables(sess) # pylint: enable=protected-access saver = saver_lib.Saver() saver.save(sess, os.path.join(estimator.model_dir, 'keras_model.ckpt'))
def test_features_tensor_raises_value_error(self): """Tests that passing a Tensor for features raises a ValueError.""" hidden_units = (2, 2) logits_dimension = 3 inputs = ([[10.]], [[8.]]) expected_logits = [[0, 0, 0]] with ops.Graph().as_default(): training_util.create_global_step() head = mock_head( self, hidden_units=hidden_units, logits_dimension=logits_dimension, expected_logits=expected_logits) with self.assertRaisesRegexp(ValueError, 'features should be a dict'): self._dnn_model_fn( features=constant_op.constant(inputs), labels=constant_op.constant([[1]]), mode=model_fn.ModeKeys.TRAIN, head=head, hidden_units=hidden_units, feature_columns=[ feature_column.numeric_column( 'age', shape=np.array(inputs).shape[1:]) ], optimizer=mock_optimizer(self, hidden_units))
def _test_logits(self, mode, rnn_units, logits_dimension, features_fn, sequence_feature_columns, context_feature_columns, expected_logits): """Tests that the expected logits are calculated.""" with ops.Graph().as_default(): # Global step needed for MonitoredSession, which is in turn used to # explicitly set variable weights through a checkpoint. training_util.create_global_step() # Use a variable scope here with 'rnn', emulating the rnn model_fn, so # the checkpoint naming is shared. with variable_scope.variable_scope('rnn'): input_layer_partitioner = ( partitioned_variables.min_max_variable_partitioner( max_partitions=0, min_slice_size=64 << 20)) logit_fn = rnn._rnn_logit_fn_builder( output_units=logits_dimension, rnn_cell_fn=rnn._make_rnn_cell_fn(rnn_units), sequence_feature_columns=sequence_feature_columns, context_feature_columns=context_feature_columns, input_layer_partitioner=input_layer_partitioner) # Features are constructed within this function, otherwise the Tensors # containing the features would be defined outside this graph. logits = logit_fn(features=features_fn(), mode=mode) with monitored_session.MonitoredTrainingSession( checkpoint_dir=self._model_dir) as sess: self.assertAllClose(expected_logits, sess.run(logits), atol=1e-4)
def _test_logits(self, mode, hidden_units, logits_dimension, inputs, expected_logits): """Tests that the expected logits are passed to mock head.""" with ops.Graph().as_default(): training_util.create_global_step() head = _mock_head(self, hidden_units=hidden_units, logits_dimension=logits_dimension, expected_logits=expected_logits) estimator_spec = dnn._dnn_model_fn( features={'age': constant_op.constant(inputs)}, labels=constant_op.constant([[1]]), mode=mode, head=head, hidden_units=hidden_units, feature_columns=[ feature_column.numeric_column( 'age', shape=np.array(inputs).shape[1:]) ], optimizer=_mock_optimizer(self, hidden_units)) with monitored_session.MonitoredTrainingSession( checkpoint_dir=self._model_dir) as sess: if mode == model_fn.ModeKeys.TRAIN: sess.run(estimator_spec.train_op) elif mode == model_fn.ModeKeys.EVAL: sess.run(estimator_spec.loss) elif mode == model_fn.ModeKeys.PREDICT: sess.run(estimator_spec.predictions) else: self.fail('Invalid mode: {}'.format(mode))
def _save_first_checkpoint(keras_model, custom_objects, config, save_object_ckpt): """Save first checkpoint for the keras Estimator. Args: keras_model: an instance of compiled keras model. custom_objects: Dictionary for custom objects. config: Estimator config. save_object_ckpt: Whether to save an object-based checkpoint. Returns: The path where keras model checkpoint is saved. """ # save checkpoint into subdirectory to allow warm start keras_model_dir = os.path.join(config.model_dir, 'keras') # Load weights and save to checkpoint if there is no checkpoint latest_path = checkpoint_management.latest_checkpoint(keras_model_dir) if not latest_path: keras_weights = None if _any_weight_initialized(keras_model): keras_weights = keras_model.get_weights() if not gfile.IsDirectory(keras_model_dir): gfile.MakeDirs(keras_model_dir) with ops.Graph().as_default(): random_seed.set_random_seed(config.tf_random_seed) training_util.create_global_step() model = _clone_and_build_model(ModeKeys.TRAIN, keras_model, custom_objects) # Init the train_function outside of the context of session. This is due # to the fact that train function will update the graph by adding backprop # parts. This will potentially trying to update the node in forward graph # which will fail if it is done within same session. # Always create the train_function here since the model is just cloned. # See https://github.com/tensorflow/tensorflow/issues/27750 for details. model._make_train_function() # pylint: disable=protected-access # save to checkpoint with session.Session(config=config.session_config) as sess: if keras_weights: model.set_weights(keras_weights) # model._make_train_function() will potentially create the optimizer # variable, which will require another variable initialization. K._initialize_variables(sess) # pylint: disable=protected-access if save_object_ckpt: model._track_trackable( # pylint: disable=protected-access training_util.get_global_step(), 'estimator_global_step') latest_path = os.path.join(keras_model_dir, 'keras_model.ckpt') model.save_weights(latest_path) else: saver = saver_lib.Saver() latest_path = os.path.join(keras_model_dir, 'keras_model.ckpt') saver.save(sess, latest_path) return latest_path
def run_session(self, hooks, should_stop): hooks = hooks if isinstance(hooks, list) else [hooks] with ops.Graph().as_default(): training_util.create_global_step() no_op = control_flow_ops.no_op() with monitored_session.SingularMonitoredSession(hooks=hooks) as mon_sess: mon_sess.run(no_op) self.assertEqual(mon_sess.should_stop(), should_stop)
def run_session(self, hooks, should_stop): hooks = hooks if isinstance(hooks, list) else [hooks] with ops.Graph().as_default(): training_util.create_global_step() no_op = control_flow_ops.no_op() with monitored_session.SingularMonitoredSession(hooks=hooks) as mon_sess: mon_sess.run(no_op) self.assertEqual(mon_sess.should_stop(), should_stop)
def test_checkpoint_overwrite_warm_start(self): extra_run_step = 2 ws_ckpt_dir = tempfile.mkdtemp( prefix=os.path.join(self.get_temp_dir(), "warm_start")) final_ckpt_dir = tempfile.mkdtemp( prefix=os.path.join(self.get_temp_dir(), "final")) for run_id, num_shards, k_dtype, d_dtype, init_mode, dim, run_step \ in _next_run_step_config(): error_msg = "Cond:{},{},{},{},{},{}".format( num_shards, k_dtype, d_dtype, init_mode, dim, run_step) with ops.Graph().as_default() as g: with self.session(graph=g, use_gpu=test_util.is_gpu_available(), config=default_config) as sess: training_util.create_global_step() graph = TestGraph(k_dtype, d_dtype, dim, num_shards, 'var', 'devar', run_id) self.evaluate(variables.global_variables_initializer()) sess.run([graph.devar_init_op]) prev_x = sess.run([graph.x])[0] for _ in range(run_step): sess.run([graph.var_opt_op, graph.devar_opt_op]) saver_lib.Saver().save(sess, os.path.join(ws_ckpt_dir, "model")) prev_ws_var_loss, prev_ws_devar_loss = sess.run( [graph.var_loss, graph.devar_loss]) self.assertAllCloseAccordingToType(prev_ws_var_loss, prev_ws_devar_loss, msg=error_msg) for _ in range(extra_run_step): sess.run([graph.var_opt_op, graph.devar_opt_op]) saver_lib.Saver().save( sess, os.path.join(final_ckpt_dir, "model")) prev_final_var_loss, prev_final_devar_loss = sess.run( [graph.var_loss, graph.devar_loss]) self.assertAllCloseAccordingToType(prev_final_var_loss, prev_final_devar_loss, msg=error_msg) with ops.Graph().as_default(): training_util.create_global_step() graph = TestGraph(k_dtype, d_dtype, dim, num_shards, 'var', 'devar', run_id, prev_x) ws_util.warm_start(ws_ckpt_dir, vars_to_warm_start=['.*']) with monitored_session.MonitoredTrainingSession( config=default_config, is_chief=True, checkpoint_dir=final_ckpt_dir) as sess: var_loss, devar_loss = sess.run( [graph.var_loss, graph.devar_loss]) self.assertAllCloseAccordingToType(var_loss, prev_final_var_loss, msg=error_msg) self.assertAllCloseAccordingToType(devar_loss, prev_final_devar_loss, msg=error_msg)
def test_create_global_step(self): self.assertIsNone(training_util.get_global_step()) with ops.Graph().as_default() as g: global_step = training_util.create_global_step() self._assert_global_step(global_step) self.assertRaisesRegex(ValueError, 'already exists', training_util.create_global_step) self.assertRaisesRegex(ValueError, 'already exists', training_util.create_global_step, g) self._assert_global_step(training_util.create_global_step(ops.Graph()))
def test_create_global_step(self): self.assertIsNone(training_util.get_global_step()) with ops.Graph().as_default() as g: global_step = training_util.create_global_step() self._assert_global_step(global_step) self.assertRaisesRegexp(ValueError, 'already exists', training_util.create_global_step) self.assertRaisesRegexp(ValueError, 'already exists', training_util.create_global_step, g) self._assert_global_step(training_util.create_global_step(ops.Graph()))
def _save_first_checkpoint(keras_model, custom_objects, config, save_object_ckpt): """Save first checkpoint for the keras Estimator. Args: keras_model: an instance of compiled keras model. custom_objects: Dictionary for custom objects. config: Estimator config. save_object_ckpt: Whether to save an object-based checkpoint. Returns: The path where keras model checkpoint is saved. """ # save checkpoint into subdirectory to allow warm start keras_model_dir = os.path.join(config.model_dir, 'keras') # Load weights and save to checkpoint if there is no checkpoint latest_path = checkpoint_management.latest_checkpoint(keras_model_dir) if not latest_path: keras_weights = None if _any_weight_initialized(keras_model): keras_weights = keras_model.get_weights() if not gfile.IsDirectory(keras_model_dir): gfile.MakeDirs(keras_model_dir) with ops.Graph().as_default(): random_seed.set_random_seed(config.tf_random_seed) training_util.create_global_step() model = _clone_and_build_model(ModeKeys.TRAIN, keras_model, custom_objects) # save to checkpoint with session.Session(config=config.session_config) as sess: if keras_weights: model.set_weights(keras_weights) # Make update ops and initialize all variables. if not model.train_function: # pylint: disable=protected-access model._make_train_function() K._initialize_variables(sess) # pylint: enable=protected-access if save_object_ckpt: model._track_trackable( # pylint: disable=protected-access training_util.get_global_step(), 'estimator_global_step') latest_path = os.path.join(keras_model_dir, 'keras_model.ckpt') model.save_weights(latest_path) else: saver = saver_lib.Saver() latest_path = os.path.join(keras_model_dir, 'keras_model.ckpt') saver.save(sess, latest_path) return latest_path
def test_multi_feature_column_multi_dim_logits(self): """Tests multiple feature columns and multi-dimensional logits. All numbers are the same as test_multi_dim_input_multi_dim_logits. The only difference is that the input consists of two 1D feature columns, instead of one 2D feature column. """ base_global_step = 100 _create_checkpoint(( ([[.6, .5], [-.6, -.5]], [.1, -.1]), ([[1., .8], [-.8, -1.]], [.2, -.2]), ([[-1., 1., .5], [-1., 1., .5]], [.3, -.3, .0]), ), base_global_step, self._model_dir) hidden_units = (2, 2) logits_dimension = 3 inputs = ([[10.]], [[8.]]) expected_logits = [[-0.48, 0.48, 0.39]] for mode in [ model_fn.ModeKeys.TRAIN, model_fn.ModeKeys.EVAL, model_fn.ModeKeys.PREDICT ]: with ops.Graph().as_default(): training_util.create_global_step() head = _mock_head(self, hidden_units=hidden_units, logits_dimension=logits_dimension, expected_logits=expected_logits) estimator_spec = dnn._dnn_model_fn( features={ 'age': constant_op.constant(inputs[0]), 'height': constant_op.constant(inputs[1]) }, labels=constant_op.constant([[1]]), mode=mode, head=head, hidden_units=hidden_units, feature_columns=[ feature_column.numeric_column('age'), feature_column.numeric_column('height') ], optimizer=_mock_optimizer(self, hidden_units)) with monitored_session.MonitoredTrainingSession( checkpoint_dir=self._model_dir) as sess: if mode == model_fn.ModeKeys.TRAIN: sess.run(estimator_spec.train_op) elif mode == model_fn.ModeKeys.EVAL: sess.run(estimator_spec.loss) elif mode == model_fn.ModeKeys.PREDICT: sess.run(estimator_spec.predictions) else: self.fail('Invalid mode: {}'.format(mode))
def test_multi_feature_column_multi_dim_logits(self): """Tests multiple feature columns and multi-dimensional logits. All numbers are the same as test_multi_dim_input_multi_dim_logits. The only difference is that the input consists of two 1D feature columns, instead of one 2D feature column. """ base_global_step = 100 create_checkpoint((([[.6, .5], [-.6, -.5]], [.1, -.1]), ([[1., .8], [-.8, -1.]], [.2, -.2]), ([[-1., 1., .5], [-1., 1., .5]], [.3, -.3, .0]),), base_global_step, self._model_dir) hidden_units = (2, 2) logits_dimension = 3 inputs = ([[10.]], [[8.]]) expected_logits = [[-0.48, 0.48, 0.39]] for mode in [ model_fn.ModeKeys.TRAIN, model_fn.ModeKeys.EVAL, model_fn.ModeKeys.PREDICT ]: with ops.Graph().as_default(): training_util.create_global_step() head = mock_head( self, hidden_units=hidden_units, logits_dimension=logits_dimension, expected_logits=expected_logits) estimator_spec = self._dnn_model_fn( features={ 'age': constant_op.constant(inputs[0]), 'height': constant_op.constant(inputs[1]) }, labels=constant_op.constant([[1]]), mode=mode, head=head, hidden_units=hidden_units, feature_columns=[ feature_column.numeric_column('age'), feature_column.numeric_column('height') ], optimizer=mock_optimizer(self, hidden_units)) with monitored_session.MonitoredTrainingSession( checkpoint_dir=self._model_dir) as sess: if mode == model_fn.ModeKeys.TRAIN: sess.run(estimator_spec.train_op) elif mode == model_fn.ModeKeys.EVAL: sess.run(estimator_spec.loss) elif mode == model_fn.ModeKeys.PREDICT: sess.run(estimator_spec.predictions) else: self.fail('Invalid mode: {}'.format(mode))
def test_reads_before_increments(self): with ops.Graph().as_default(): training_util.create_global_step() read_tensor = training_util._get_or_create_global_step_read() inc_op = training_util._increment_global_step(1) inc_three_op = training_util._increment_global_step(3) with monitored_session.MonitoredTrainingSession() as sess: read_value, _ = sess.run([read_tensor, inc_op]) self.assertEqual(0, read_value) read_value, _ = sess.run([read_tensor, inc_three_op]) self.assertEqual(1, read_value) read_value = sess.run(read_tensor) self.assertEqual(4, read_value)
def test_reads_before_increments(self): with ops.Graph().as_default(): training_util.create_global_step() read_tensor = training_util._get_or_create_global_step_read() inc_op = training_util._increment_global_step(1) inc_three_op = training_util._increment_global_step(3) with monitored_session.MonitoredTrainingSession() as sess: read_value, _ = sess.run([read_tensor, inc_op]) self.assertEqual(0, read_value) read_value, _ = sess.run([read_tensor, inc_three_op]) self.assertEqual(1, read_value) read_value = sess.run(read_tensor) self.assertEqual(4, read_value)
def _save_first_checkpoint(keras_model, custom_objects, config): """Save first checkpoint for the keras Estimator. Args: keras_model: an instance of compiled keras model. custom_objects: Dictionary for custom objects. config: Estimator config. Returns: The path where keras model checkpoint is saved. """ # save checkpoint into subdirectory to allow warm start keras_model_dir = os.path.join(config.model_dir, 'keras') # Load weights and save to checkpoint if there is no checkpoint latest_path = checkpoint_management.latest_checkpoint(keras_model_dir) if not latest_path: keras_weights = None if _any_weight_initialized(keras_model): keras_weights = keras_model.get_weights() if not gfile.IsDirectory(keras_model_dir): gfile.MakeDirs(keras_model_dir) with ops.Graph().as_default(): random_seed.set_random_seed(config.tf_random_seed) training_util.create_global_step() model = _clone_and_build_model(model_fn_lib.ModeKeys.TRAIN, keras_model, custom_objects) # save to checkpoint with session.Session(config=config.session_config) as sess: if keras_weights: model.set_weights(keras_weights) # Make update ops and initialize all variables. if not model.train_function: # pylint: disable=protected-access model._make_train_function() # We are using global variables collection here because: # estimator runs eager mode under context.graph_mode() context manager # When we try to get all the TF optimizer variables using # optimizer.variables() we try to return variables that belong to the # current graph. This check (variable.op.graph is current_graph) will # error as the context is graph mode but variables are eager. # TODO(psv): investigate this and see if we can remove the usage of # collection here. K._initialize_variables( sess, variables_module.global_variables()) # pylint: enable=protected-access saver = saver_lib.Saver() latest_path = os.path.join(keras_model_dir, 'keras_model.ckpt') saver.save(sess, latest_path) return latest_path
def create_checkpoint(rnn_weights, rnn_biases, logits_weights, logits_biases, global_step, model_dir): """Create checkpoint file with provided model weights. Args: rnn_weights: Iterable of values of weights for the RNN cell. rnn_biases: Iterable of values of biases for the RNN cell. logits_weights: Iterable of values for matrix connecting RNN output to logits. logits_biases: Iterable of values for logits bias term. global_step: Initial global step to save in checkpoint. model_dir: Directory into which checkpoint is saved. """ model_weights = {} model_weights[CELL_WEIGHTS_NAME] = rnn_weights model_weights[CELL_BIAS_NAME] = rnn_biases model_weights[LOGITS_WEIGHTS_NAME] = logits_weights model_weights[LOGITS_BIAS_NAME] = logits_biases with ops.Graph().as_default(): # Create model variables. for k, v in six.iteritems(model_weights): variables_lib.Variable(v, name=k, dtype=dtypes.float32) # Create non-model variables. global_step_var = training_util.create_global_step() assign_op = global_step_var.assign(global_step) # Initialize vars and save checkpoint. with monitored_session.MonitoredTrainingSession( checkpoint_dir=model_dir) as sess: sess.run(assign_op)
def _create_checkpoint(weights_and_biases, global_step, model_dir): """Create checkpoint file with provided model weights. Args: weights_and_biases: Iterable of tuples of weight and bias values. global_step: Initial global step to save in checkpoint. model_dir: Directory into which checkpoint is saved. """ weights, biases = zip(*weights_and_biases) model_weights = {} # Hidden layer weights. for i in range(0, len(weights) - 1): model_weights[_HIDDEN_WEIGHTS_NAME_PATTERN % i] = weights[i] model_weights[_HIDDEN_BIASES_NAME_PATTERN % i] = biases[i] # Output layer weights. model_weights[_LOGITS_WEIGHTS_NAME] = weights[-1] model_weights[_LOGITS_BIASES_NAME] = biases[-1] with ops.Graph().as_default(): # Create model variables. for k, v in six.iteritems(model_weights): variables_lib.Variable(v, name=k, dtype=dtypes.float32) # Create non-model variables. global_step_var = training_util.create_global_step() # Initialize vars and save checkpoint. with tf_session.Session() as sess: variables_lib.global_variables_initializer().run() global_step_var.assign(global_step).eval() saver.Saver().save(sess, os.path.join(model_dir, 'model.ckpt'))
def create_checkpoint(rnn_weights, rnn_biases, logits_weights, logits_biases, global_step, model_dir): """Create checkpoint file with provided model weights. Args: rnn_weights: Iterable of values of weights for the RNN cell. rnn_biases: Iterable of values of biases for the RNN cell. logits_weights: Iterable of values for matrix connecting RNN output to logits. logits_biases: Iterable of values for logits bias term. global_step: Initial global step to save in checkpoint. model_dir: Directory into which checkpoint is saved. """ model_weights = {} model_weights[CELL_WEIGHTS_NAME] = rnn_weights model_weights[CELL_BIAS_NAME] = rnn_biases model_weights[LOGITS_WEIGHTS_NAME] = logits_weights model_weights[LOGITS_BIAS_NAME] = logits_biases with ops.Graph().as_default(): # Create model variables. for k, v in six.iteritems(model_weights): variables_lib.Variable(v, name=k, dtype=dtypes.float32) # Create non-model variables. global_step_var = training_util.create_global_step() assign_op = global_step_var.assign(global_step) # Initialize vars and save checkpoint. with monitored_session.MonitoredTrainingSession( checkpoint_dir=model_dir) as sess: sess.run(assign_op)
def _create_checkpoint(weights_and_biases, global_step, model_dir): """Create checkpoint file with provided model weights. Args: weights_and_biases: Iterable of tuples of weight and bias values. global_step: Initial global step to save in checkpoint. model_dir: Directory into which checkpoint is saved. """ weights, biases = zip(*weights_and_biases) model_weights = {} # Hidden layer weights. for i in range(0, len(weights) - 1): model_weights[_HIDDEN_WEIGHTS_NAME_PATTERN % i] = weights[i] model_weights[_HIDDEN_BIASES_NAME_PATTERN % i] = biases[i] # Output layer weights. model_weights[_LOGITS_WEIGHTS_NAME] = weights[-1] model_weights[_LOGITS_BIASES_NAME] = biases[-1] with ops.Graph().as_default(): # Create model variables. for k, v in six.iteritems(model_weights): variables_lib.Variable(v, name=k, dtype=dtypes.float32) # Create non-model variables. global_step_var = training_util.create_global_step() # Initialize vars and save checkpoint. with tf_session.Session() as sess: variables_lib.global_variables_initializer().run() global_step_var.assign(global_step).eval() saver.Saver().save(sess, os.path.join(model_dir, 'model.ckpt'))
def test_ops_with_var_and_adagrad_da(self): var_list = [ deo.get_variable('sp_var', initializer=0.0, dim=2), ] gstep = training_util.create_global_step() opt_list = [ adagrad_da.AdagradDAOptimizer(0.1, gstep), ] self.common_run_context(var_list, opt_list, name='adagrad_da_test')
def common_minimize_trainable(self, base_opt, test_opt, name): tf.config.set_soft_device_placement(True) hvd.init() base_opt = de.DynamicEmbeddingOptimizer(base_opt, synchronous=True) for dtype, run_step, dim in itertools.product([dtypes.float32], [1], [10]): x = tf.random.uniform(shape=[32, dim]) y = tf.zeros([32, 1]) global_step = training_util.create_global_step() base_weight = tf.compat.v1.get_variable(name="base_weights", initializer=tf.ones( [10, 1])) base_logits = tf.nn.relu(math_ops.matmul(x, base_weight)) base_loss = tf.nn.sigmoid_cross_entropy_with_logits( labels=y, logits=base_logits) base_opt_op = base_opt.minimize(base_loss, global_step, var_list=[base_weight]) test_weight = tf.compat.v1.get_variable(name="test_weights", initializer=tf.ones( [10, 1])) test_logits = tf.nn.relu(math_ops.matmul(x, test_weight)) test_loss = tf.nn.sigmoid_cross_entropy_with_logits( labels=y, logits=test_logits) grads_and_vars = test_opt.compute_gradients(test_loss, var_list=[test_weight]) var_list = [] aggregated_grad = [] for grad, var in grads_and_vars: var_list.append(var) aggregated_grad.append(hvd.allreduce(grad, op=hvd.Sum)) aggregated_grads_and_vars = zip(aggregated_grad, var_list) test_opt_op = test_opt.apply_gradients(aggregated_grads_and_vars, global_step) with monitored_session.MonitoredTrainingSession( is_chief=True, config=default_config) as sess: for _ in range(run_step): sess.run(base_opt_op) sess.run(test_opt_op) self.assertAllCloseAccordingToType( sess.run(base_weight), sess.run(test_weight), msg="Cond:{},{},{}".format(dtype, run_step, dim), )
def test_stop(self): hook = early_stopping._StopOnPredicateHook( should_stop_fn=lambda: False, run_every_secs=0) with ops.Graph().as_default(): training_util.create_global_step() no_op = control_flow_ops.no_op() with monitored_session.SingularMonitoredSession(hooks=[hook]) as mon_sess: mon_sess.run(no_op) self.assertFalse(mon_sess.should_stop()) self.assertFalse(mon_sess.raw_session().run(hook._stop_var)) hook = early_stopping._StopOnPredicateHook( should_stop_fn=lambda: True, run_every_secs=0) with ops.Graph().as_default(): training_util.create_global_step() no_op = control_flow_ops.no_op() with monitored_session.SingularMonitoredSession(hooks=[hook]) as mon_sess: mon_sess.run(no_op) self.assertTrue(mon_sess.should_stop()) self.assertTrue(mon_sess.raw_session().run(hook._stop_var))
def global_step(self): if self._global_step is None: # Get the default create_global_step utility to actually call # self.add_variable, by setting a custom getter. def _owned_variable_as_custom_getter(getter, *args, **kwargs): return self.add_variable(*args, getter=getter, **kwargs) with variable_scope.variable_scope( "", custom_getter=_owned_variable_as_custom_getter): self._global_step = training_util.create_global_step() return self._global_step
def test_stop(self): hook = early_stopping._StopOnPredicateHook( should_stop_fn=lambda: False, run_every_secs=0) with ops.Graph().as_default(): training_util.create_global_step() no_op = control_flow_ops.no_op() with monitored_session.SingularMonitoredSession(hooks=[hook]) as mon_sess: mon_sess.run(no_op) self.assertFalse(mon_sess.should_stop()) self.assertFalse(mon_sess.raw_session().run(hook._stop_var)) hook = early_stopping._StopOnPredicateHook( should_stop_fn=lambda: True, run_every_secs=0) with ops.Graph().as_default(): training_util.create_global_step() no_op = control_flow_ops.no_op() with monitored_session.SingularMonitoredSession(hooks=[hook]) as mon_sess: mon_sess.run(no_op) self.assertTrue(mon_sess.should_stop()) self.assertTrue(mon_sess.raw_session().run(hook._stop_var))
def global_step(self): if self._global_step is None: # Get the default create_global_step utility to actually call # self.add_variable, by setting a custom getter. def _owned_variable_as_custom_getter(getter, *args, **kwargs): return self.add_variable(*args, getter=getter, **kwargs) with variable_scope.variable_scope( "", custom_getter=_owned_variable_as_custom_getter): self._global_step = training_util.create_global_step() return self._global_step
def _gan_train_ops(self, generator_add, discriminator_add): step = training_util.create_global_step() # Increment the global count every time a train op is run so we can count # the number of times they're run. # NOTE: `use_locking=True` is required to avoid race conditions with # joint training. train_ops = namedtuples.GANTrainOps( generator_train_op=step.assign_add(generator_add, use_locking=True), discriminator_train_op=step.assign_add( discriminator_add, use_locking=True), global_step_inc_op=step.assign_add(1)) return train_ops
def testGlobalStepIsWrappedOnTwoGPUs(self): strategy, _, _ = create_test_objects(num_gpus=2) with ops.Graph().as_default(), strategy.scope(): created_step = training_util.create_global_step() get_step = training_util.get_global_step() self.assertEqual(created_step, get_step, msg=('created_step %s type %s vs. get_step %s type %s' % (id(created_step), created_step.__class__.__name__, id(get_step), get_step.__class__.__name__))) self.assertIs(values.AggregatingVariable, type(created_step)) self.assertIs(values.AggregatingVariable, type(get_step)) self.assertIs(strategy, created_step.distribute_strategy)
def _gan_train_ops(self, generator_add, discriminator_add): step = training_util.create_global_step() # Increment the global count every time a train op is run so we can count # the number of times they're run. # NOTE: `use_locking=True` is required to avoid race conditions with # joint training. train_ops = namedtuples.GANTrainOps( generator_train_op=step.assign_add(generator_add, use_locking=True), discriminator_train_op=step.assign_add(discriminator_add, use_locking=True), global_step_inc_op=step.assign_add(1)) return train_ops
def testGlobalStepIsWrapped(self): distribution = parameter_server_strategy.ParameterServerStrategy( num_gpus_per_worker=2) with ops.Graph().as_default(), distribution.scope(): created_step = training_util.create_global_step() get_step = training_util.get_global_step() self.assertEqual(created_step, get_step, msg=('created_step %s type %s vs. get_step %s type %s' % (id(created_step), created_step.__class__.__name__, id(get_step), get_step.__class__.__name__))) self.assertIs(values.AggregatingVariable, type(created_step)) self.assertIs(values.AggregatingVariable, type(get_step))
def _save_first_checkpoint(keras_model, custom_objects, config): """Save first checkpoint for the keras Estimator. Args: keras_model: an instance of compiled keras model. custom_objects: Dictionary for custom objects. config: Estimator config. Returns: The path where keras model checkpoint is saved. """ # save checkpoint into subdirectory to allow warm start keras_model_dir = os.path.join(config.model_dir, 'keras') # Load weights and save to checkpoint if there is no checkpoint latest_path = checkpoint_management.latest_checkpoint(keras_model_dir) if not latest_path: keras_weights = None if _any_weight_initialized(keras_model): keras_weights = keras_model.get_weights() if not gfile.IsDirectory(keras_model_dir): gfile.MakeDirs(keras_model_dir) with ops.Graph().as_default(): random_seed.set_random_seed(config.tf_random_seed) training_util.create_global_step() model = _clone_and_build_model(model_fn_lib.ModeKeys.TRAIN, keras_model, custom_objects) # save to checkpoint with session.Session(config=config.session_config) as sess: if keras_weights: model.set_weights(keras_weights) # Make update ops and initialize all variables. if not model.train_function: # pylint: disable=protected-access model._make_train_function() K._initialize_variables(sess) # pylint: enable=protected-access saver = saver_lib.Saver() latest_path = os.path.join(keras_model_dir, 'keras_model.ckpt') saver.save(sess, latest_path) return latest_path
def test_scalar_summary_v2__v1_set_step(self): """Tests scalar v2 invocation when v1 step is set.""" global_step = training_util.create_global_step() global_step.assign(1024) with test.mock.patch.object( summary_v2, 'scalar', autospec=True) as mock_scalar_v2: with summary_ops_v2.create_summary_file_writer('/tmp/test').as_default(): i = constant_op.constant(2.5) tensor = summary_lib.scalar('float', i) # Returns empty string. self.assertEqual(tensor.numpy(), b'') self.assertEqual(tensor.dtype, dtypes.string) mock_scalar_v2.assert_called_once_with('float', data=i, step=1024)
def testGlobalStepIsWrappedOnTwoGPUs(self, use_core_strategy): strategy, _, _ = create_test_objects( num_gpus=2, use_core_strategy=use_core_strategy) with ops.Graph().as_default(), strategy.scope(): created_step = training_util.create_global_step() get_step = training_util.get_global_step() self.assertEqual(created_step, get_step, msg=('created_step %s type %s vs. get_step %s type %s' % (id(created_step), created_step.__class__.__name__, id(get_step), get_step.__class__.__name__))) self.assertIs(values.AggregatingVariable, type(created_step)) self.assertIs(values.AggregatingVariable, type(get_step)) self.assertIs(strategy, created_step.distribute_strategy)
def test_requests(self): with ops.Graph().as_default(), session_lib.Session() as sess: training_util.create_global_step() mock_mon = FakeMonitor() mock_mon2 = FakeMonitor() hook = learn.monitors.RunHookAdapterForMonitors([mock_mon, mock_mon2]) hook.begin() mon_sess = monitored_session._HookedSession(sess=sess, hooks=[hook]) a_tensor = constant_op.constant([0], name='a_tensor') constant_op.constant([5], name='another_tensor') constant_op.constant([10], name='third_tensor') mock_mon.requested_tensors = ['another_tensor'] mock_mon2.requested_tensors = ['third_tensor'] sess.run(variables.global_variables_initializer()) output = mon_sess.run(a_tensor) self.assertEqual(output, [0]) self.assertEqual(mock_mon.output['another_tensor'], [5]) self.assertEqual(mock_mon2.output['third_tensor'], [10])
def testGlobalStepIsNotWrappedOnOneGPU(self, use_core_strategy): strategy, _, _ = create_test_objects( num_gpus=1, use_core_strategy=use_core_strategy) with ops.Graph().as_default(), strategy.scope(): created_step = training_util.create_global_step() get_step = training_util.get_global_step() self.assertEqual(created_step, get_step, msg=('created_step %s type %s vs. get_step %s type %s' % (id(created_step), created_step.__class__.__name__, id(get_step), get_step.__class__.__name__))) self.assertIs(resource_variable_ops.ResourceVariable, type(created_step)) self.assertIs(resource_variable_ops.ResourceVariable, type(get_step)) self.assertIs(strategy, created_step.distribute_strategy)
def testGlobalStepIsNotWrappedOnOneGPU(self): distribution = parameter_server_strategy.ParameterServerStrategy( num_gpus_per_worker=1) with ops.Graph().as_default(), distribution.scope(): created_step = training_util.create_global_step() get_step = training_util.get_global_step() self.assertEqual(created_step, get_step, msg=('created_step %s type %s vs. get_step %s type %s' % (id(created_step), created_step.__class__.__name__, id(get_step), get_step.__class__.__name__))) self.assertIs(resource_variable_ops.ResourceVariable, type(created_step)) self.assertIs(resource_variable_ops.ResourceVariable, type(get_step)) self.assertIs(distribution, created_step.distribute_strategy)
def test_requests(self): with ops.Graph().as_default(), session_lib.Session() as sess: training_util.create_global_step() mock_mon = FakeMonitor() mock_mon2 = FakeMonitor() hook = learn.monitors.RunHookAdapterForMonitors([mock_mon, mock_mon2]) hook.begin() mon_sess = monitored_session._HookedSession(sess=sess, hooks=[hook]) a_tensor = constant_op.constant([0], name='a_tensor') constant_op.constant([5], name='another_tensor') constant_op.constant([10], name='third_tensor') mock_mon.requested_tensors = ['another_tensor'] mock_mon2.requested_tensors = ['third_tensor'] sess.run(variables.global_variables_initializer()) output = mon_sess.run(a_tensor) self.assertEqual(output, [0]) self.assertEqual(mock_mon.output['another_tensor'], [5]) self.assertEqual(mock_mon2.output['third_tensor'], [10])
def testGlobalStepIsNotWrappedOnOneGPU(self, use_core_strategy): strategy, _, _ = create_test_objects( num_gpus=1, use_core_strategy=use_core_strategy) with ops.Graph().as_default(), strategy.scope(): created_step = training_util.create_global_step() get_step = training_util.get_global_step() self.assertEqual(created_step, get_step, msg=('created_step %s type %s vs. get_step %s type %s' % (id(created_step), created_step.__class__.__name__, id(get_step), get_step.__class__.__name__))) self.assertIs(resource_variable_ops.ResourceVariable, type(created_step)) self.assertIs(resource_variable_ops.ResourceVariable, type(get_step)) self.assertIs(strategy, created_step.distribute_strategy)
def test_saving_restoring_checkpoint(self): logdir = _test_dir(self.get_temp_dir(), "test_saving_restoring_checkpoint") with ops.Graph().as_default(): gstep = training_util.create_global_step() do_step = state_ops.assign_add(gstep, 1) v0 = variables.Variable(10.0, name="v0") v1 = variables.Variable(20.0, name="v1") target_values = [[0.0], [1.0], [2.0]] keys = array_ops.placeholder(dtypes.int64) values = constant_op.constant(target_values, dtypes.float32) table = de.Variable( key_dtype=dtypes.int64, value_dtype=dtypes.float32, initializer=-1.0, name="m100", dim=1, ) upsert_op = table.upsert(keys, values) lookup_op = table.lookup(keys) size_op = table.size() with monitored_session.MonitoredTrainingSession( config=default_config, is_chief=True, checkpoint_dir=logdir) as sess: self.assertEqual(0, sess.run(gstep)) self.assertEqual(1, sess.run(do_step)) self.assertEqual(2, sess.run(do_step)) # Check that the parameter nodes have been initialized. self.assertEqual(10.0, sess.run(v0)) self.assertEqual(20.0, sess.run(v1)) self.assertAllEqual(0, sess.run(size_op)) sess.run(upsert_op, feed_dict={keys: [0, 1, 2]}) self.assertAllEqual(3, sess.run(size_op)) self.device_check(table) # A restart will find the checkpoint and recover automatically. with monitored_session.MonitoredTrainingSession( config=default_config, is_chief=True, checkpoint_dir=logdir) as sess: self.assertEqual(2, sess.run(gstep)) self.assertAllEqual(3, sess.run(table.size())) self.assertAllEqual( target_values, sess.run(lookup_op, feed_dict={keys: [0, 1, 2]})) self.device_check(table)
def testIntegerSummaries(self): step = training_util.create_global_step() writer = self.create_db_writer() def adder(x, y): state_ops.assign_add(step, 1) summary_ops.generic('x', x) summary_ops.generic('y', y) sum_ = x + y summary_ops.generic('sum', sum_) return sum_ with summary_ops.always_record_summaries(): with writer.as_default(): self.assertEqual(5, adder(int64(2), int64(3)).numpy()) six.assertCountEqual( self, [1, 1, 1], get_all(self.db, 'SELECT step FROM Tensors WHERE dtype IS NOT NULL')) six.assertCountEqual(self, ['x', 'y', 'sum'], get_all(self.db, 'SELECT tag_name FROM Tags')) x_id = get_one(self.db, 'SELECT tag_id FROM Tags WHERE tag_name = "x"') y_id = get_one(self.db, 'SELECT tag_id FROM Tags WHERE tag_name = "y"') sum_id = get_one(self.db, 'SELECT tag_id FROM Tags WHERE tag_name = "sum"') with summary_ops.always_record_summaries(): with writer.as_default(): self.assertEqual(9, adder(int64(4), int64(5)).numpy()) six.assertCountEqual( self, [1, 1, 1, 2, 2, 2], get_all(self.db, 'SELECT step FROM Tensors WHERE dtype IS NOT NULL')) six.assertCountEqual(self, [x_id, y_id, sum_id], get_all(self.db, 'SELECT tag_id FROM Tags')) self.assertEqual(2, get_tensor(self.db, x_id, 1)) self.assertEqual(3, get_tensor(self.db, y_id, 1)) self.assertEqual(5, get_tensor(self.db, sum_id, 1)) self.assertEqual(4, get_tensor(self.db, x_id, 2)) self.assertEqual(5, get_tensor(self.db, y_id, 2)) self.assertEqual(9, get_tensor(self.db, sum_id, 2)) six.assertCountEqual( self, ['experiment'], get_all(self.db, 'SELECT experiment_name FROM Experiments')) six.assertCountEqual(self, ['run'], get_all(self.db, 'SELECT run_name FROM Runs')) six.assertCountEqual(self, ['user'], get_all(self.db, 'SELECT user_name FROM Users'))
def testMonitoredSessionStopAtStepHook(self): random_seed.set_random_seed(1) with ops.device("/device:IPU:0"): pa = array_ops.placeholder(np.float32, [2, 2], name="a") pb = array_ops.placeholder(np.float32, [2, 2], name="b") output = pa + pb with variable_scope.variable_scope('gs', use_resource=True): training_util.create_global_step() hook = basic_session_run_hooks.StopAtStepHook(num_steps=2) with ms.MonitoredSession(session_creator=ms.ChiefSessionCreator(), hooks=[hook]) as sess: fd = {pa: [[1., 1.], [2., 3.]], pb: [[0., 1.], [4., 5.]]} result = sess.run(output, fd) self.assertAllClose(result, [[1., 2.], [6., 8.]]) fd = {pa: [[0., 0.], [1., 1.]], pb: [[2., 1.], [4., 5.]]} result = sess.run(output, fd) self.assertAllClose(result, [[2., 1.], [5., 6.]])
def create_global_step(graph=None): """Create global step tensor in graph. Args: graph: The graph in which to create the global step tensor. If missing, use default graph. Returns: Global step tensor. Raises: ValueError: if global step tensor is already defined. """ return training_util.create_global_step(graph)
def create_global_step(graph=None): """Create global step tensor in graph. Args: graph: The graph in which to create the global step tensor. If missing, use default graph. Returns: Global step tensor. Raises: ValueError: if global step tensor is already defined. """ return training_util.create_global_step(graph)
def testGlobalStepIsNotWrappedOnOneGPU(self): strategy, _, _ = create_test_objects(num_gpus=1) with ops.Graph().as_default(), strategy.scope(): created_step = training_util.create_global_step() get_step = training_util.get_global_step() self.assertEqual(created_step, get_step, msg=('created_step %s type %s vs. get_step %s type %s' % (id(created_step), created_step.__class__.__name__, id(get_step), get_step.__class__.__name__))) self.assertIs(resource_variable_ops.ResourceVariable, type(created_step)) self.assertIs(resource_variable_ops.ResourceVariable, type(get_step)) # All variables have an _distribute_strategy parameter. Only variable # subclasses in distribution strategy expose it publicly. self.assertFalse(hasattr(strategy, 'distribute_strategy')) self.assertIs(strategy, created_step._distribute_strategy)
def testGlobalStepIsNotWrappedOnOneGPU(self, use_core_strategy): strategy, _, _ = create_test_objects( num_gpus=1, use_core_strategy=use_core_strategy) with ops.Graph().as_default(), strategy.scope(): created_step = training_util.create_global_step() get_step = training_util.get_global_step() self.assertEqual(created_step, get_step, msg=('created_step %s type %s vs. get_step %s type %s' % (id(created_step), created_step.__class__.__name__, id(get_step), get_step.__class__.__name__))) self.assertIs(resource_variable_ops.ResourceVariable, type(created_step)) self.assertIs(resource_variable_ops.ResourceVariable, type(get_step)) # All variables have an _distribute_strategy parameter. Only variable # subclasses in distribution strategy expose it publicly. self.assertFalse(hasattr(strategy, 'distribute_strategy')) self.assertIs(strategy, created_step._distribute_strategy)
def global_step(self): if self._global_step is None: # Get the default create_global_step utility to actually call # self.add_variable, by setting a custom creator. def _owned_variable_as_creator( next_creator, initial_value, **kwargs): def _creator_as_getter(initializer, **kwargs): return next_creator(initial_value=initializer, **kwargs) return self.add_variable( getter=_creator_as_getter, initializer=initial_value, shape=[], **kwargs) with variable_scope.variable_creator_scope( _owned_variable_as_creator): self._global_step = training_util.create_global_step() return self._global_step
def create_global_step(graph=None): """Create global step tensor in graph. This API is deprecated. Use core framework training version instead. Args: graph: The graph in which to create the global step tensor. If missing, use default graph. Returns: Global step tensor. Raises: ValueError: if global step tensor is already defined. """ return training_util.create_global_step(graph)
def testIntegerSummaries(self): step = training_util.create_global_step() writer = self.create_db_writer() def adder(x, y): state_ops.assign_add(step, 1) summary_ops.generic('x', x) summary_ops.generic('y', y) sum_ = x + y summary_ops.generic('sum', sum_) return sum_ with summary_ops.always_record_summaries(): with writer.as_default(): self.assertEqual(5, adder(int64(2), int64(3)).numpy()) six.assertCountEqual( self, [1, 1, 1], get_all(self.db, 'SELECT step FROM Tensors WHERE dtype IS NOT NULL')) six.assertCountEqual(self, ['x', 'y', 'sum'], get_all(self.db, 'SELECT tag_name FROM Tags')) x_id = get_one(self.db, 'SELECT tag_id FROM Tags WHERE tag_name = "x"') y_id = get_one(self.db, 'SELECT tag_id FROM Tags WHERE tag_name = "y"') sum_id = get_one(self.db, 'SELECT tag_id FROM Tags WHERE tag_name = "sum"') with summary_ops.always_record_summaries(): with writer.as_default(): self.assertEqual(9, adder(int64(4), int64(5)).numpy()) six.assertCountEqual( self, [1, 1, 1, 2, 2, 2], get_all(self.db, 'SELECT step FROM Tensors WHERE dtype IS NOT NULL')) six.assertCountEqual(self, [x_id, y_id, sum_id], get_all(self.db, 'SELECT tag_id FROM Tags')) self.assertEqual(2, get_tensor(self.db, x_id, 1)) self.assertEqual(3, get_tensor(self.db, y_id, 1)) self.assertEqual(5, get_tensor(self.db, sum_id, 1)) self.assertEqual(4, get_tensor(self.db, x_id, 2)) self.assertEqual(5, get_tensor(self.db, y_id, 2)) self.assertEqual(9, get_tensor(self.db, sum_id, 2)) six.assertCountEqual( self, ['experiment'], get_all(self.db, 'SELECT experiment_name FROM Experiments')) six.assertCountEqual(self, ['run'], get_all(self.db, 'SELECT run_name FROM Runs')) six.assertCountEqual(self, ['user'], get_all(self.db, 'SELECT user_name FROM Users'))
def test_supervisor_run_gan_model_train_ops_multiple_steps(self): step = training_util.create_global_step() train_ops = namedtuples.GANTrainOps( generator_train_op=constant_op.constant(3.0), discriminator_train_op=constant_op.constant(2.0), global_step_inc_op=step.assign_add(1)) train_steps = namedtuples.GANTrainSteps( generator_train_steps=3, discriminator_train_steps=4) final_loss = slim_learning.train( train_op=train_ops, logdir='', global_step=step, number_of_steps=1, train_step_fn=train.get_sequential_train_steps(train_steps)) self.assertTrue(np.isscalar(final_loss)) self.assertEqual(17.0, final_loss)
def assert_increasing_global_step(self, optimizer): keras_model, _, _, train_input_fn, _ = get_resource_for_simple_model( model_type='sequential', is_evaluate=True) keras_model.compile( loss='categorical_crossentropy', optimizer=optimizer, metrics=['mse', keras.metrics.CategoricalAccuracy()]) with self.cached_session() as sess: keras_model_fn = keras_lib._create_keras_model_fn(keras_model) global_step = training_util.create_global_step() features, labels = train_input_fn().make_one_shot_iterator().get_next() spec = keras_model_fn(features, labels, mode=model_fn_lib.ModeKeys.TRAIN) sess.run(variables.global_variables_initializer()) sess.run(variables.local_variables_initializer()) self.assertEqual(global_step.eval(), 0) # Sanity check sess.run(spec.train_op) self.assertEqual(global_step.eval(), 1)
def test_calls_and_steps(self): with ops.Graph().as_default(), session_lib.Session() as sess: global_step_tensor = training_util.create_global_step() inc_5 = state_ops.assign_add(global_step_tensor, 5) mock_mon = FakeMonitor() mock_mon2 = FakeMonitor() hook = learn.monitors.RunHookAdapterForMonitors([mock_mon, mock_mon2]) hook.begin() for mon in [mock_mon, mock_mon2]: self.assertEqual(mon.call_counter['begin'], 1) sess.run(variables.global_variables_initializer()) sess.run(global_step_tensor.assign(10)) mon_sess = monitored_session._HookedSession(sess=sess, hooks=[hook]) mon_sess.run(inc_5) for mon in [mock_mon, mock_mon2]: self.assertEqual(mon.output, {}) self.assertEqual(mon.last_begin_step, 11) self.assertEqual(mon.last_end_step, 11) self.assertEqual(mon.last_post_step, 11) self.assertEqual(mon.call_counter['step_end'], 1) self.assertEqual(mon.call_counter['step_begin'], 1) self.assertEqual(mon.call_counter['post_step'], 1) mon_sess.run(inc_5) for mon in [mock_mon, mock_mon2]: self.assertEqual(mon.output, {}) self.assertEqual(mon.last_begin_step, 16) self.assertEqual(mon.last_end_step, 16) self.assertEqual(mon.last_post_step, 16) self.assertEqual(mon.call_counter['step_end'], 2) self.assertEqual(mon.call_counter['step_begin'], 2) self.assertEqual(mon.call_counter['post_step'], 2) hook.end(sess) for mon in [mock_mon, mock_mon2]: self.assertEqual(mon.call_counter['end'], 1)
def _export_estimator(estimator, export_dir, signature_fn, input_fn, default_batch_size, exports_to_keep, input_feature_key=None, use_deprecated_input_fn=True, prediction_key=None, checkpoint_path=None): if use_deprecated_input_fn: input_fn = input_fn or _default_input_fn elif input_fn is None: raise ValueError('input_fn must be defined.') # If checkpoint_path is specified, use the specified checkpoint path. checkpoint_path = (checkpoint_path or checkpoint_management.latest_checkpoint( estimator._model_dir)) with ops.Graph().as_default() as g: training_util.create_global_step(g) if use_deprecated_input_fn: examples = array_ops.placeholder(dtype=dtypes.string, shape=[default_batch_size], name='input_example_tensor') features = input_fn(estimator, examples) else: features, _ = input_fn() examples = None if input_feature_key is not None: examples = features.pop(input_feature_key) if (not features) and (examples is None): raise ValueError('Either features or examples must be defined.') predictions = estimator._get_predict_ops(features).predictions if prediction_key is not None: predictions = predictions[prediction_key] # Explicit signature_fn takes priority if signature_fn: default_signature, named_graph_signatures = signature_fn(examples, features, predictions) else: try: # Some estimators provide a signature function. # TODO(zakaria): check if the estimator has this function, # raise helpful error if not signature_fn = estimator._create_signature_fn() default_signature, named_graph_signatures = ( signature_fn(examples, features, predictions)) except AttributeError: logging.warn( 'Change warning: `signature_fn` will be required after' '2016-08-01.\n' 'Using generic signatures for now. To maintain this behavior, ' 'pass:\n' ' signature_fn=export.generic_signature_fn\n' 'Also consider passing a regression or classification signature; ' 'see cl/126430915 for an example.') default_signature, named_graph_signatures = generic_signature_fn( examples, features, predictions) if exports_to_keep is not None: exports_to_keep = gc.largest_export_versions(exports_to_keep) return _export_graph( g, _get_saver(), checkpoint_path, export_dir, default_graph_signature=default_signature, named_graph_signatures=named_graph_signatures, exports_to_keep=exports_to_keep)
def test_global_step_read_is_none_if_there_is_no_global_step(self): with ops.Graph().as_default(): self.assertIsNone(training_util._get_or_create_global_step_read()) training_util.create_global_step() self.assertIsNotNone(training_util._get_or_create_global_step_read())
def test_reads_from_cache(self): with ops.Graph().as_default(): training_util.create_global_step() first = training_util._get_or_create_global_step_read() second = training_util._get_or_create_global_step_read() self.assertEqual(first, second)