Example #1
0
def _save_first_checkpoint(keras_model, estimator, custom_objects,
                           keras_weights):
    """Save first checkpoint for the keras Estimator.

  Args:
    keras_model: an instance of compiled keras model.
    estimator: keras estimator.
    custom_objects: Dictionary for custom objects.
    keras_weights: A flat list of Numpy arrays for weights of given keras_model.

  Returns:
    The model_fn for a keras Estimator.
  """
    # Load weights and save to checkpoint if there is no checkpoint
    latest_path = saver_lib.latest_checkpoint(estimator.model_dir)
    if not latest_path:
        with ops.Graph().as_default():
            random_seed.set_random_seed(estimator.config.tf_random_seed)
            training_util.create_global_step()
            model = _clone_and_build_model(model_fn_lib.ModeKeys.TRAIN,
                                           keras_model, custom_objects)
            # save to checkpoint
            with session.Session(config=estimator._session_config) as sess:
                if keras_weights:
                    model.set_weights(keras_weights)
                # Make update ops and initialize all variables.
                if not model.train_function:
                    # pylint: disable=protected-access
                    model._make_train_function()
                    K._initialize_variables(sess)
                    # pylint: enable=protected-access
                saver = saver_lib.Saver()
                saver.save(
                    sess, os.path.join(estimator.model_dir,
                                       'keras_model.ckpt'))
Example #2
0
 def _test_logits(self, mode, rnn_units, logits_dimension, features_fn,
                  sequence_feature_columns, context_feature_columns,
                  expected_logits):
     """Tests that the expected logits are calculated."""
     with ops.Graph().as_default():
         # Global step needed for MonitoredSession, which is in turn used to
         # explicitly set variable weights through a checkpoint.
         training_util.create_global_step()
         # Use a variable scope here with 'rnn', emulating the rnn model_fn, so
         # the checkpoint naming is shared.
         with variable_scope.variable_scope('rnn'):
             input_layer_partitioner = (
                 partitioned_variables.min_max_variable_partitioner(
                     max_partitions=0, min_slice_size=64 << 20))
             logit_fn = rnn._rnn_logit_fn_builder(
                 output_units=logits_dimension,
                 rnn_cell_fn=rnn._make_rnn_cell_fn(rnn_units),
                 sequence_feature_columns=sequence_feature_columns,
                 context_feature_columns=context_feature_columns,
                 input_layer_partitioner=input_layer_partitioner)
             # Features are constructed within this function, otherwise the Tensors
             # containing the features would be defined outside this graph.
             logits = logit_fn(features=features_fn(), mode=mode)
             with monitored_session.MonitoredTrainingSession(
                     checkpoint_dir=self._model_dir) as sess:
                 self.assertAllClose(expected_logits,
                                     sess.run(logits),
                                     atol=1e-4)
Example #3
0
 def _test_logits(
     self, mode, hidden_units, logits_dimension, inputs, expected_logits):
   """Tests that the expected logits are passed to mock head."""
   with ops.Graph().as_default():
     training_util.create_global_step()
     head = _mock_head(
         self,
         hidden_units=hidden_units,
         logits_dimension=logits_dimension,
         expected_logits=expected_logits)
     estimator_spec = dnn._dnn_model_fn(
         features={'age': constant_op.constant(inputs)},
         labels=constant_op.constant([[1]]),
         mode=mode,
         head=head,
         hidden_units=hidden_units,
         feature_columns=[
             feature_column.numeric_column('age',
                                           shape=np.array(inputs).shape[1:])],
         optimizer=_mock_optimizer(self, hidden_units))
     with monitored_session.MonitoredTrainingSession(
         checkpoint_dir=self._model_dir) as sess:
       if mode == model_fn.ModeKeys.TRAIN:
         sess.run(estimator_spec.train_op)
       elif mode == model_fn.ModeKeys.EVAL:
         sess.run(estimator_spec.loss)
       elif mode == model_fn.ModeKeys.PREDICT:
         sess.run(estimator_spec.predictions)
       else:
         self.fail('Invalid mode: {}'.format(mode))
Example #4
0
def _save_first_checkpoint(keras_model, estimator, custom_objects,
                           keras_weights):
  """Save first checkpoint for the keras Estimator.

  Args:
    keras_model: an instance of compiled keras model.
    estimator: keras estimator.
    custom_objects: Dictionary for custom objects.
    keras_weights: A flat list of Numpy arrays for weights of given keras_model.

  Returns:
    The model_fn for a keras Estimator.
  """
  # Load weights and save to checkpoint if there is no checkpoint
  latest_path = saver_lib.latest_checkpoint(estimator.model_dir)
  if not latest_path:
    with ops.Graph().as_default():
      random_seed.set_random_seed(estimator.config.tf_random_seed)
      training_util.create_global_step()
      model = _clone_and_build_model(model_fn_lib.ModeKeys.TRAIN, keras_model,
                                     custom_objects)
      # save to checkpoint
      with session.Session(config=estimator._session_config) as sess:
        if keras_weights:
          model.set_weights(keras_weights)
        # Make update ops and initialize all variables.
        if not model.train_function:
          # pylint: disable=protected-access
          model._make_train_function()
          K._initialize_variables(sess)
          # pylint: enable=protected-access
        saver = saver_lib.Saver()
        saver.save(sess, os.path.join(estimator.model_dir, 'keras_model.ckpt'))
Example #5
0
  def test_features_tensor_raises_value_error(self):
    """Tests that passing a Tensor for features raises a ValueError."""
    hidden_units = (2, 2)
    logits_dimension = 3
    inputs = ([[10.]], [[8.]])
    expected_logits = [[0, 0, 0]]

    with ops.Graph().as_default():
      training_util.create_global_step()
      head = mock_head(
          self,
          hidden_units=hidden_units,
          logits_dimension=logits_dimension,
          expected_logits=expected_logits)
      with self.assertRaisesRegexp(ValueError, 'features should be a dict'):
        self._dnn_model_fn(
            features=constant_op.constant(inputs),
            labels=constant_op.constant([[1]]),
            mode=model_fn.ModeKeys.TRAIN,
            head=head,
            hidden_units=hidden_units,
            feature_columns=[
                feature_column.numeric_column(
                    'age', shape=np.array(inputs).shape[1:])
            ],
            optimizer=mock_optimizer(self, hidden_units))
Example #6
0
 def _test_logits(self, mode, rnn_units, logits_dimension, features_fn,
                  sequence_feature_columns, context_feature_columns,
                  expected_logits):
   """Tests that the expected logits are calculated."""
   with ops.Graph().as_default():
     # Global step needed for MonitoredSession, which is in turn used to
     # explicitly set variable weights through a checkpoint.
     training_util.create_global_step()
     # Use a variable scope here with 'rnn', emulating the rnn model_fn, so
     # the checkpoint naming is shared.
     with variable_scope.variable_scope('rnn'):
       input_layer_partitioner = (
           partitioned_variables.min_max_variable_partitioner(
               max_partitions=0, min_slice_size=64 << 20))
       logit_fn = rnn._rnn_logit_fn_builder(
           output_units=logits_dimension,
           rnn_cell_fn=rnn._make_rnn_cell_fn(rnn_units),
           sequence_feature_columns=sequence_feature_columns,
           context_feature_columns=context_feature_columns,
           input_layer_partitioner=input_layer_partitioner)
       # Features are constructed within this function, otherwise the Tensors
       # containing the features would be defined outside this graph.
       logits = logit_fn(features=features_fn(), mode=mode)
       with monitored_session.MonitoredTrainingSession(
           checkpoint_dir=self._model_dir) as sess:
         self.assertAllClose(expected_logits, sess.run(logits), atol=1e-4)
Example #7
0
 def _test_logits(self, mode, hidden_units, logits_dimension, inputs,
                  expected_logits):
     """Tests that the expected logits are passed to mock head."""
     with ops.Graph().as_default():
         training_util.create_global_step()
         head = _mock_head(self,
                           hidden_units=hidden_units,
                           logits_dimension=logits_dimension,
                           expected_logits=expected_logits)
         estimator_spec = dnn._dnn_model_fn(
             features={'age': constant_op.constant(inputs)},
             labels=constant_op.constant([[1]]),
             mode=mode,
             head=head,
             hidden_units=hidden_units,
             feature_columns=[
                 feature_column.numeric_column(
                     'age', shape=np.array(inputs).shape[1:])
             ],
             optimizer=_mock_optimizer(self, hidden_units))
         with monitored_session.MonitoredTrainingSession(
                 checkpoint_dir=self._model_dir) as sess:
             if mode == model_fn.ModeKeys.TRAIN:
                 sess.run(estimator_spec.train_op)
             elif mode == model_fn.ModeKeys.EVAL:
                 sess.run(estimator_spec.loss)
             elif mode == model_fn.ModeKeys.PREDICT:
                 sess.run(estimator_spec.predictions)
             else:
                 self.fail('Invalid mode: {}'.format(mode))
Example #8
0
def _save_first_checkpoint(keras_model, custom_objects, config,
                           save_object_ckpt):
    """Save first checkpoint for the keras Estimator.

  Args:
    keras_model: an instance of compiled keras model.
    custom_objects: Dictionary for custom objects.
    config: Estimator config.
    save_object_ckpt: Whether to save an object-based checkpoint.

  Returns:
    The path where keras model checkpoint is saved.
  """
    # save checkpoint into subdirectory to allow warm start
    keras_model_dir = os.path.join(config.model_dir, 'keras')
    # Load weights and save to checkpoint if there is no checkpoint
    latest_path = checkpoint_management.latest_checkpoint(keras_model_dir)
    if not latest_path:
        keras_weights = None
        if _any_weight_initialized(keras_model):
            keras_weights = keras_model.get_weights()
        if not gfile.IsDirectory(keras_model_dir):
            gfile.MakeDirs(keras_model_dir)
        with ops.Graph().as_default():
            random_seed.set_random_seed(config.tf_random_seed)
            training_util.create_global_step()
            model = _clone_and_build_model(ModeKeys.TRAIN, keras_model,
                                           custom_objects)

            # Init the train_function outside of the context of session. This is due
            # to the fact that train function will update the graph by adding backprop
            # parts. This will potentially trying to update the node in forward graph
            # which will fail if it is done within same session.
            # Always create the train_function here since the model is just cloned.
            # See https://github.com/tensorflow/tensorflow/issues/27750 for details.
            model._make_train_function()  # pylint: disable=protected-access

            # save to checkpoint
            with session.Session(config=config.session_config) as sess:
                if keras_weights:
                    model.set_weights(keras_weights)
                # model._make_train_function() will potentially create the optimizer
                # variable, which will require another variable initialization.
                K._initialize_variables(sess)  # pylint: disable=protected-access

                if save_object_ckpt:
                    model._track_trackable(  # pylint: disable=protected-access
                        training_util.get_global_step(),
                        'estimator_global_step')
                    latest_path = os.path.join(keras_model_dir,
                                               'keras_model.ckpt')
                    model.save_weights(latest_path)
                else:
                    saver = saver_lib.Saver()
                    latest_path = os.path.join(keras_model_dir,
                                               'keras_model.ckpt')
                    saver.save(sess, latest_path)

    return latest_path
 def run_session(self, hooks, should_stop):
   hooks = hooks if isinstance(hooks, list) else [hooks]
   with ops.Graph().as_default():
     training_util.create_global_step()
     no_op = control_flow_ops.no_op()
     with monitored_session.SingularMonitoredSession(hooks=hooks) as mon_sess:
       mon_sess.run(no_op)
       self.assertEqual(mon_sess.should_stop(), should_stop)
Example #10
0
 def run_session(self, hooks, should_stop):
   hooks = hooks if isinstance(hooks, list) else [hooks]
   with ops.Graph().as_default():
     training_util.create_global_step()
     no_op = control_flow_ops.no_op()
     with monitored_session.SingularMonitoredSession(hooks=hooks) as mon_sess:
       mon_sess.run(no_op)
       self.assertEqual(mon_sess.should_stop(), should_stop)
    def test_checkpoint_overwrite_warm_start(self):
        extra_run_step = 2
        ws_ckpt_dir = tempfile.mkdtemp(
            prefix=os.path.join(self.get_temp_dir(), "warm_start"))
        final_ckpt_dir = tempfile.mkdtemp(
            prefix=os.path.join(self.get_temp_dir(), "final"))
        for run_id, num_shards, k_dtype, d_dtype, init_mode, dim, run_step \
            in _next_run_step_config():
            error_msg = "Cond:{},{},{},{},{},{}".format(
                num_shards, k_dtype, d_dtype, init_mode, dim, run_step)
            with ops.Graph().as_default() as g:
                with self.session(graph=g,
                                  use_gpu=test_util.is_gpu_available(),
                                  config=default_config) as sess:
                    training_util.create_global_step()
                    graph = TestGraph(k_dtype, d_dtype, dim, num_shards, 'var',
                                      'devar', run_id)
                    self.evaluate(variables.global_variables_initializer())
                    sess.run([graph.devar_init_op])
                    prev_x = sess.run([graph.x])[0]
                    for _ in range(run_step):
                        sess.run([graph.var_opt_op, graph.devar_opt_op])
                    saver_lib.Saver().save(sess,
                                           os.path.join(ws_ckpt_dir, "model"))
                    prev_ws_var_loss, prev_ws_devar_loss = sess.run(
                        [graph.var_loss, graph.devar_loss])
                    self.assertAllCloseAccordingToType(prev_ws_var_loss,
                                                       prev_ws_devar_loss,
                                                       msg=error_msg)
                    for _ in range(extra_run_step):
                        sess.run([graph.var_opt_op, graph.devar_opt_op])
                    saver_lib.Saver().save(
                        sess, os.path.join(final_ckpt_dir, "model"))
                    prev_final_var_loss, prev_final_devar_loss = sess.run(
                        [graph.var_loss, graph.devar_loss])
                    self.assertAllCloseAccordingToType(prev_final_var_loss,
                                                       prev_final_devar_loss,
                                                       msg=error_msg)

            with ops.Graph().as_default():
                training_util.create_global_step()
                graph = TestGraph(k_dtype, d_dtype, dim, num_shards, 'var',
                                  'devar', run_id, prev_x)
                ws_util.warm_start(ws_ckpt_dir, vars_to_warm_start=['.*'])
                with monitored_session.MonitoredTrainingSession(
                        config=default_config,
                        is_chief=True,
                        checkpoint_dir=final_ckpt_dir) as sess:
                    var_loss, devar_loss = sess.run(
                        [graph.var_loss, graph.devar_loss])
                    self.assertAllCloseAccordingToType(var_loss,
                                                       prev_final_var_loss,
                                                       msg=error_msg)
                    self.assertAllCloseAccordingToType(devar_loss,
                                                       prev_final_devar_loss,
                                                       msg=error_msg)
Example #12
0
 def test_create_global_step(self):
   self.assertIsNone(training_util.get_global_step())
   with ops.Graph().as_default() as g:
     global_step = training_util.create_global_step()
     self._assert_global_step(global_step)
     self.assertRaisesRegex(ValueError, 'already exists',
                            training_util.create_global_step)
     self.assertRaisesRegex(ValueError, 'already exists',
                            training_util.create_global_step, g)
     self._assert_global_step(training_util.create_global_step(ops.Graph()))
Example #13
0
 def test_create_global_step(self):
   self.assertIsNone(training_util.get_global_step())
   with ops.Graph().as_default() as g:
     global_step = training_util.create_global_step()
     self._assert_global_step(global_step)
     self.assertRaisesRegexp(ValueError, 'already exists',
                             training_util.create_global_step)
     self.assertRaisesRegexp(ValueError, 'already exists',
                             training_util.create_global_step, g)
     self._assert_global_step(training_util.create_global_step(ops.Graph()))
Example #14
0
def _save_first_checkpoint(keras_model, custom_objects, config,
                           save_object_ckpt):
    """Save first checkpoint for the keras Estimator.

  Args:
    keras_model: an instance of compiled keras model.
    custom_objects: Dictionary for custom objects.
    config: Estimator config.
    save_object_ckpt: Whether to save an object-based checkpoint.

  Returns:
    The path where keras model checkpoint is saved.
  """
    # save checkpoint into subdirectory to allow warm start
    keras_model_dir = os.path.join(config.model_dir, 'keras')
    # Load weights and save to checkpoint if there is no checkpoint
    latest_path = checkpoint_management.latest_checkpoint(keras_model_dir)
    if not latest_path:
        keras_weights = None
        if _any_weight_initialized(keras_model):
            keras_weights = keras_model.get_weights()
        if not gfile.IsDirectory(keras_model_dir):
            gfile.MakeDirs(keras_model_dir)
        with ops.Graph().as_default():
            random_seed.set_random_seed(config.tf_random_seed)
            training_util.create_global_step()
            model = _clone_and_build_model(ModeKeys.TRAIN, keras_model,
                                           custom_objects)
            # save to checkpoint
            with session.Session(config=config.session_config) as sess:
                if keras_weights:
                    model.set_weights(keras_weights)
                # Make update ops and initialize all variables.
                if not model.train_function:
                    # pylint: disable=protected-access
                    model._make_train_function()
                    K._initialize_variables(sess)
                    # pylint: enable=protected-access

                if save_object_ckpt:
                    model._track_trackable(  # pylint: disable=protected-access
                        training_util.get_global_step(),
                        'estimator_global_step')
                    latest_path = os.path.join(keras_model_dir,
                                               'keras_model.ckpt')
                    model.save_weights(latest_path)
                else:
                    saver = saver_lib.Saver()
                    latest_path = os.path.join(keras_model_dir,
                                               'keras_model.ckpt')
                    saver.save(sess, latest_path)

    return latest_path
Example #15
0
    def test_multi_feature_column_multi_dim_logits(self):
        """Tests multiple feature columns and multi-dimensional logits.

    All numbers are the same as test_multi_dim_input_multi_dim_logits. The only
    difference is that the input consists of two 1D feature columns, instead of
    one 2D feature column.
    """
        base_global_step = 100
        _create_checkpoint((
            ([[.6, .5], [-.6, -.5]], [.1, -.1]),
            ([[1., .8], [-.8, -1.]], [.2, -.2]),
            ([[-1., 1., .5], [-1., 1., .5]], [.3, -.3, .0]),
        ), base_global_step, self._model_dir)
        hidden_units = (2, 2)
        logits_dimension = 3
        inputs = ([[10.]], [[8.]])
        expected_logits = [[-0.48, 0.48, 0.39]]

        for mode in [
                model_fn.ModeKeys.TRAIN, model_fn.ModeKeys.EVAL,
                model_fn.ModeKeys.PREDICT
        ]:
            with ops.Graph().as_default():
                training_util.create_global_step()
                head = _mock_head(self,
                                  hidden_units=hidden_units,
                                  logits_dimension=logits_dimension,
                                  expected_logits=expected_logits)
                estimator_spec = dnn._dnn_model_fn(
                    features={
                        'age': constant_op.constant(inputs[0]),
                        'height': constant_op.constant(inputs[1])
                    },
                    labels=constant_op.constant([[1]]),
                    mode=mode,
                    head=head,
                    hidden_units=hidden_units,
                    feature_columns=[
                        feature_column.numeric_column('age'),
                        feature_column.numeric_column('height')
                    ],
                    optimizer=_mock_optimizer(self, hidden_units))
                with monitored_session.MonitoredTrainingSession(
                        checkpoint_dir=self._model_dir) as sess:
                    if mode == model_fn.ModeKeys.TRAIN:
                        sess.run(estimator_spec.train_op)
                    elif mode == model_fn.ModeKeys.EVAL:
                        sess.run(estimator_spec.loss)
                    elif mode == model_fn.ModeKeys.PREDICT:
                        sess.run(estimator_spec.predictions)
                    else:
                        self.fail('Invalid mode: {}'.format(mode))
Example #16
0
  def test_multi_feature_column_multi_dim_logits(self):
    """Tests multiple feature columns and multi-dimensional logits.

    All numbers are the same as test_multi_dim_input_multi_dim_logits. The only
    difference is that the input consists of two 1D feature columns, instead of
    one 2D feature column.
    """
    base_global_step = 100
    create_checkpoint((([[.6, .5], [-.6, -.5]],
                        [.1, -.1]), ([[1., .8], [-.8, -1.]], [.2, -.2]),
                       ([[-1., 1., .5], [-1., 1., .5]], [.3, -.3, .0]),),
                      base_global_step, self._model_dir)
    hidden_units = (2, 2)
    logits_dimension = 3
    inputs = ([[10.]], [[8.]])
    expected_logits = [[-0.48, 0.48, 0.39]]

    for mode in [
        model_fn.ModeKeys.TRAIN, model_fn.ModeKeys.EVAL,
        model_fn.ModeKeys.PREDICT
    ]:
      with ops.Graph().as_default():
        training_util.create_global_step()
        head = mock_head(
            self,
            hidden_units=hidden_units,
            logits_dimension=logits_dimension,
            expected_logits=expected_logits)
        estimator_spec = self._dnn_model_fn(
            features={
                'age': constant_op.constant(inputs[0]),
                'height': constant_op.constant(inputs[1])
            },
            labels=constant_op.constant([[1]]),
            mode=mode,
            head=head,
            hidden_units=hidden_units,
            feature_columns=[
                feature_column.numeric_column('age'),
                feature_column.numeric_column('height')
            ],
            optimizer=mock_optimizer(self, hidden_units))
        with monitored_session.MonitoredTrainingSession(
            checkpoint_dir=self._model_dir) as sess:
          if mode == model_fn.ModeKeys.TRAIN:
            sess.run(estimator_spec.train_op)
          elif mode == model_fn.ModeKeys.EVAL:
            sess.run(estimator_spec.loss)
          elif mode == model_fn.ModeKeys.PREDICT:
            sess.run(estimator_spec.predictions)
          else:
            self.fail('Invalid mode: {}'.format(mode))
Example #17
0
 def test_reads_before_increments(self):
     with ops.Graph().as_default():
         training_util.create_global_step()
         read_tensor = training_util._get_or_create_global_step_read()
         inc_op = training_util._increment_global_step(1)
         inc_three_op = training_util._increment_global_step(3)
         with monitored_session.MonitoredTrainingSession() as sess:
             read_value, _ = sess.run([read_tensor, inc_op])
             self.assertEqual(0, read_value)
             read_value, _ = sess.run([read_tensor, inc_three_op])
             self.assertEqual(1, read_value)
             read_value = sess.run(read_tensor)
             self.assertEqual(4, read_value)
Example #18
0
 def test_reads_before_increments(self):
   with ops.Graph().as_default():
     training_util.create_global_step()
     read_tensor = training_util._get_or_create_global_step_read()
     inc_op = training_util._increment_global_step(1)
     inc_three_op = training_util._increment_global_step(3)
     with monitored_session.MonitoredTrainingSession() as sess:
       read_value, _ = sess.run([read_tensor, inc_op])
       self.assertEqual(0, read_value)
       read_value, _ = sess.run([read_tensor, inc_three_op])
       self.assertEqual(1, read_value)
       read_value = sess.run(read_tensor)
       self.assertEqual(4, read_value)
Example #19
0
def _save_first_checkpoint(keras_model, custom_objects, config):
    """Save first checkpoint for the keras Estimator.

  Args:
    keras_model: an instance of compiled keras model.
    custom_objects: Dictionary for custom objects.
    config: Estimator config.

  Returns:
    The path where keras model checkpoint is saved.
  """
    # save checkpoint into subdirectory to allow warm start
    keras_model_dir = os.path.join(config.model_dir, 'keras')
    # Load weights and save to checkpoint if there is no checkpoint
    latest_path = checkpoint_management.latest_checkpoint(keras_model_dir)
    if not latest_path:
        keras_weights = None
        if _any_weight_initialized(keras_model):
            keras_weights = keras_model.get_weights()
        if not gfile.IsDirectory(keras_model_dir):
            gfile.MakeDirs(keras_model_dir)
        with ops.Graph().as_default():
            random_seed.set_random_seed(config.tf_random_seed)
            training_util.create_global_step()
            model = _clone_and_build_model(model_fn_lib.ModeKeys.TRAIN,
                                           keras_model, custom_objects)
            # save to checkpoint
            with session.Session(config=config.session_config) as sess:
                if keras_weights:
                    model.set_weights(keras_weights)
                # Make update ops and initialize all variables.
                if not model.train_function:
                    # pylint: disable=protected-access
                    model._make_train_function()
                    # We are using global variables collection here because:
                    # estimator runs eager mode under context.graph_mode() context manager
                    # When we try to get all the TF optimizer variables using
                    # optimizer.variables() we try to return variables that belong to the
                    # current graph. This check (variable.op.graph is current_graph) will
                    # error as the context is graph mode but variables are eager.
                    # TODO(psv): investigate this and see if we can remove the usage of
                    # collection here.
                    K._initialize_variables(
                        sess, variables_module.global_variables())
                    # pylint: enable=protected-access
                saver = saver_lib.Saver()
                latest_path = os.path.join(keras_model_dir, 'keras_model.ckpt')
                saver.save(sess, latest_path)
    return latest_path
Example #20
0
def create_checkpoint(rnn_weights, rnn_biases, logits_weights, logits_biases,
                      global_step, model_dir):
  """Create checkpoint file with provided model weights.

  Args:
    rnn_weights: Iterable of values of weights for the RNN cell.
    rnn_biases: Iterable of values of biases for the RNN cell.
    logits_weights: Iterable of values for matrix connecting RNN output to
      logits.
    logits_biases: Iterable of values for logits bias term.
    global_step: Initial global step to save in checkpoint.
    model_dir: Directory into which checkpoint is saved.
  """
  model_weights = {}
  model_weights[CELL_WEIGHTS_NAME] = rnn_weights
  model_weights[CELL_BIAS_NAME] = rnn_biases
  model_weights[LOGITS_WEIGHTS_NAME] = logits_weights
  model_weights[LOGITS_BIAS_NAME] = logits_biases

  with ops.Graph().as_default():
    # Create model variables.
    for k, v in six.iteritems(model_weights):
      variables_lib.Variable(v, name=k, dtype=dtypes.float32)

    # Create non-model variables.
    global_step_var = training_util.create_global_step()
    assign_op = global_step_var.assign(global_step)

    # Initialize vars and save checkpoint.
    with monitored_session.MonitoredTrainingSession(
        checkpoint_dir=model_dir) as sess:
      sess.run(assign_op)
Example #21
0
def _create_checkpoint(weights_and_biases, global_step, model_dir):
  """Create checkpoint file with provided model weights.

  Args:
    weights_and_biases: Iterable of tuples of weight and bias values.
    global_step: Initial global step to save in checkpoint.
    model_dir: Directory into which checkpoint is saved.
  """
  weights, biases = zip(*weights_and_biases)
  model_weights = {}

  # Hidden layer weights.
  for i in range(0, len(weights) - 1):
    model_weights[_HIDDEN_WEIGHTS_NAME_PATTERN % i] = weights[i]
    model_weights[_HIDDEN_BIASES_NAME_PATTERN % i] = biases[i]

  # Output layer weights.
  model_weights[_LOGITS_WEIGHTS_NAME] = weights[-1]
  model_weights[_LOGITS_BIASES_NAME] = biases[-1]

  with ops.Graph().as_default():
    # Create model variables.
    for k, v in six.iteritems(model_weights):
      variables_lib.Variable(v, name=k, dtype=dtypes.float32)

    # Create non-model variables.
    global_step_var = training_util.create_global_step()

    # Initialize vars and save checkpoint.
    with tf_session.Session() as sess:
      variables_lib.global_variables_initializer().run()
      global_step_var.assign(global_step).eval()
      saver.Saver().save(sess, os.path.join(model_dir, 'model.ckpt'))
Example #22
0
def create_checkpoint(rnn_weights, rnn_biases, logits_weights, logits_biases,
                      global_step, model_dir):
    """Create checkpoint file with provided model weights.

  Args:
    rnn_weights: Iterable of values of weights for the RNN cell.
    rnn_biases: Iterable of values of biases for the RNN cell.
    logits_weights: Iterable of values for matrix connecting RNN output to
      logits.
    logits_biases: Iterable of values for logits bias term.
    global_step: Initial global step to save in checkpoint.
    model_dir: Directory into which checkpoint is saved.
  """
    model_weights = {}
    model_weights[CELL_WEIGHTS_NAME] = rnn_weights
    model_weights[CELL_BIAS_NAME] = rnn_biases
    model_weights[LOGITS_WEIGHTS_NAME] = logits_weights
    model_weights[LOGITS_BIAS_NAME] = logits_biases

    with ops.Graph().as_default():
        # Create model variables.
        for k, v in six.iteritems(model_weights):
            variables_lib.Variable(v, name=k, dtype=dtypes.float32)

        # Create non-model variables.
        global_step_var = training_util.create_global_step()
        assign_op = global_step_var.assign(global_step)

        # Initialize vars and save checkpoint.
        with monitored_session.MonitoredTrainingSession(
                checkpoint_dir=model_dir) as sess:
            sess.run(assign_op)
Example #23
0
def _create_checkpoint(weights_and_biases, global_step, model_dir):
    """Create checkpoint file with provided model weights.

  Args:
    weights_and_biases: Iterable of tuples of weight and bias values.
    global_step: Initial global step to save in checkpoint.
    model_dir: Directory into which checkpoint is saved.
  """
    weights, biases = zip(*weights_and_biases)
    model_weights = {}

    # Hidden layer weights.
    for i in range(0, len(weights) - 1):
        model_weights[_HIDDEN_WEIGHTS_NAME_PATTERN % i] = weights[i]
        model_weights[_HIDDEN_BIASES_NAME_PATTERN % i] = biases[i]

    # Output layer weights.
    model_weights[_LOGITS_WEIGHTS_NAME] = weights[-1]
    model_weights[_LOGITS_BIASES_NAME] = biases[-1]

    with ops.Graph().as_default():
        # Create model variables.
        for k, v in six.iteritems(model_weights):
            variables_lib.Variable(v, name=k, dtype=dtypes.float32)

        # Create non-model variables.
        global_step_var = training_util.create_global_step()

        # Initialize vars and save checkpoint.
        with tf_session.Session() as sess:
            variables_lib.global_variables_initializer().run()
            global_step_var.assign(global_step).eval()
            saver.Saver().save(sess, os.path.join(model_dir, 'model.ckpt'))
Example #24
0
 def test_ops_with_var_and_adagrad_da(self):
     var_list = [
         deo.get_variable('sp_var', initializer=0.0, dim=2),
     ]
     gstep = training_util.create_global_step()
     opt_list = [
         adagrad_da.AdagradDAOptimizer(0.1, gstep),
     ]
     self.common_run_context(var_list, opt_list, name='adagrad_da_test')
    def common_minimize_trainable(self, base_opt, test_opt, name):
        tf.config.set_soft_device_placement(True)
        hvd.init()
        base_opt = de.DynamicEmbeddingOptimizer(base_opt, synchronous=True)
        for dtype, run_step, dim in itertools.product([dtypes.float32], [1],
                                                      [10]):
            x = tf.random.uniform(shape=[32, dim])
            y = tf.zeros([32, 1])

            global_step = training_util.create_global_step()

            base_weight = tf.compat.v1.get_variable(name="base_weights",
                                                    initializer=tf.ones(
                                                        [10, 1]))

            base_logits = tf.nn.relu(math_ops.matmul(x, base_weight))
            base_loss = tf.nn.sigmoid_cross_entropy_with_logits(
                labels=y, logits=base_logits)

            base_opt_op = base_opt.minimize(base_loss,
                                            global_step,
                                            var_list=[base_weight])

            test_weight = tf.compat.v1.get_variable(name="test_weights",
                                                    initializer=tf.ones(
                                                        [10, 1]))

            test_logits = tf.nn.relu(math_ops.matmul(x, test_weight))
            test_loss = tf.nn.sigmoid_cross_entropy_with_logits(
                labels=y, logits=test_logits)

            grads_and_vars = test_opt.compute_gradients(test_loss,
                                                        var_list=[test_weight])
            var_list = []
            aggregated_grad = []
            for grad, var in grads_and_vars:
                var_list.append(var)
                aggregated_grad.append(hvd.allreduce(grad, op=hvd.Sum))
            aggregated_grads_and_vars = zip(aggregated_grad, var_list)
            test_opt_op = test_opt.apply_gradients(aggregated_grads_and_vars,
                                                   global_step)

            with monitored_session.MonitoredTrainingSession(
                    is_chief=True, config=default_config) as sess:

                for _ in range(run_step):
                    sess.run(base_opt_op)
                    sess.run(test_opt_op)

                self.assertAllCloseAccordingToType(
                    sess.run(base_weight),
                    sess.run(test_weight),
                    msg="Cond:{},{},{}".format(dtype, run_step, dim),
                )
Example #26
0
  def test_stop(self):
    hook = early_stopping._StopOnPredicateHook(
        should_stop_fn=lambda: False, run_every_secs=0)
    with ops.Graph().as_default():
      training_util.create_global_step()
      no_op = control_flow_ops.no_op()
      with monitored_session.SingularMonitoredSession(hooks=[hook]) as mon_sess:
        mon_sess.run(no_op)
        self.assertFalse(mon_sess.should_stop())
        self.assertFalse(mon_sess.raw_session().run(hook._stop_var))

    hook = early_stopping._StopOnPredicateHook(
        should_stop_fn=lambda: True, run_every_secs=0)
    with ops.Graph().as_default():
      training_util.create_global_step()
      no_op = control_flow_ops.no_op()
      with monitored_session.SingularMonitoredSession(hooks=[hook]) as mon_sess:
        mon_sess.run(no_op)
        self.assertTrue(mon_sess.should_stop())
        self.assertTrue(mon_sess.raw_session().run(hook._stop_var))
  def global_step(self):
    if self._global_step is None:
      # Get the default create_global_step utility to actually call
      # self.add_variable, by setting a custom getter.
      def _owned_variable_as_custom_getter(getter, *args, **kwargs):
        return self.add_variable(*args, getter=getter, **kwargs)

      with variable_scope.variable_scope(
          "", custom_getter=_owned_variable_as_custom_getter):
        self._global_step = training_util.create_global_step()
    return self._global_step
  def test_stop(self):
    hook = early_stopping._StopOnPredicateHook(
        should_stop_fn=lambda: False, run_every_secs=0)
    with ops.Graph().as_default():
      training_util.create_global_step()
      no_op = control_flow_ops.no_op()
      with monitored_session.SingularMonitoredSession(hooks=[hook]) as mon_sess:
        mon_sess.run(no_op)
        self.assertFalse(mon_sess.should_stop())
        self.assertFalse(mon_sess.raw_session().run(hook._stop_var))

    hook = early_stopping._StopOnPredicateHook(
        should_stop_fn=lambda: True, run_every_secs=0)
    with ops.Graph().as_default():
      training_util.create_global_step()
      no_op = control_flow_ops.no_op()
      with monitored_session.SingularMonitoredSession(hooks=[hook]) as mon_sess:
        mon_sess.run(no_op)
        self.assertTrue(mon_sess.should_stop())
        self.assertTrue(mon_sess.raw_session().run(hook._stop_var))
  def global_step(self):
    if self._global_step is None:
      # Get the default create_global_step utility to actually call
      # self.add_variable, by setting a custom getter.
      def _owned_variable_as_custom_getter(getter, *args, **kwargs):
        return self.add_variable(*args, getter=getter, **kwargs)

      with variable_scope.variable_scope(
          "", custom_getter=_owned_variable_as_custom_getter):
        self._global_step = training_util.create_global_step()
    return self._global_step
Example #30
0
 def _gan_train_ops(self, generator_add, discriminator_add):
   step = training_util.create_global_step()
   # Increment the global count every time a train op is run so we can count
   # the number of times they're run.
   # NOTE: `use_locking=True` is required to avoid race conditions with
   # joint training.
   train_ops = namedtuples.GANTrainOps(
       generator_train_op=step.assign_add(generator_add, use_locking=True),
       discriminator_train_op=step.assign_add(
           discriminator_add, use_locking=True),
       global_step_inc_op=step.assign_add(1))
   return train_ops
Example #31
0
 def testGlobalStepIsWrappedOnTwoGPUs(self):
   strategy, _, _ = create_test_objects(num_gpus=2)
   with ops.Graph().as_default(), strategy.scope():
     created_step = training_util.create_global_step()
     get_step = training_util.get_global_step()
     self.assertEqual(created_step, get_step,
                      msg=('created_step %s type %s vs. get_step %s type %s' %
                           (id(created_step), created_step.__class__.__name__,
                            id(get_step), get_step.__class__.__name__)))
     self.assertIs(values.AggregatingVariable, type(created_step))
     self.assertIs(values.AggregatingVariable, type(get_step))
     self.assertIs(strategy, created_step.distribute_strategy)
Example #32
0
 def _gan_train_ops(self, generator_add, discriminator_add):
   step = training_util.create_global_step()
   # Increment the global count every time a train op is run so we can count
   # the number of times they're run.
   # NOTE: `use_locking=True` is required to avoid race conditions with
   # joint training.
   train_ops = namedtuples.GANTrainOps(
       generator_train_op=step.assign_add(generator_add, use_locking=True),
       discriminator_train_op=step.assign_add(discriminator_add,
                                              use_locking=True),
       global_step_inc_op=step.assign_add(1))
   return train_ops
 def testGlobalStepIsWrapped(self):
   distribution = parameter_server_strategy.ParameterServerStrategy(
       num_gpus_per_worker=2)
   with ops.Graph().as_default(), distribution.scope():
     created_step = training_util.create_global_step()
     get_step = training_util.get_global_step()
     self.assertEqual(created_step, get_step,
                      msg=('created_step %s type %s vs. get_step %s type %s' %
                           (id(created_step), created_step.__class__.__name__,
                            id(get_step), get_step.__class__.__name__)))
     self.assertIs(values.AggregatingVariable, type(created_step))
     self.assertIs(values.AggregatingVariable, type(get_step))
Example #34
0
def _save_first_checkpoint(keras_model, custom_objects, config):
  """Save first checkpoint for the keras Estimator.

  Args:
    keras_model: an instance of compiled keras model.
    custom_objects: Dictionary for custom objects.
    config: Estimator config.

  Returns:
    The path where keras model checkpoint is saved.
  """
  # save checkpoint into subdirectory to allow warm start
  keras_model_dir = os.path.join(config.model_dir, 'keras')
  # Load weights and save to checkpoint if there is no checkpoint
  latest_path = checkpoint_management.latest_checkpoint(keras_model_dir)
  if not latest_path:
    keras_weights = None
    if _any_weight_initialized(keras_model):
      keras_weights = keras_model.get_weights()
    if not gfile.IsDirectory(keras_model_dir):
      gfile.MakeDirs(keras_model_dir)
    with ops.Graph().as_default():
      random_seed.set_random_seed(config.tf_random_seed)
      training_util.create_global_step()
      model = _clone_and_build_model(model_fn_lib.ModeKeys.TRAIN, keras_model,
                                     custom_objects)
      # save to checkpoint
      with session.Session(config=config.session_config) as sess:
        if keras_weights:
          model.set_weights(keras_weights)
        # Make update ops and initialize all variables.
        if not model.train_function:
          # pylint: disable=protected-access
          model._make_train_function()
          K._initialize_variables(sess)
          # pylint: enable=protected-access
        saver = saver_lib.Saver()
        latest_path = os.path.join(keras_model_dir, 'keras_model.ckpt')
        saver.save(sess, latest_path)
  return latest_path
Example #35
0
 def test_scalar_summary_v2__v1_set_step(self):
   """Tests scalar v2 invocation when v1 step is set."""
   global_step = training_util.create_global_step()
   global_step.assign(1024)
   with test.mock.patch.object(
       summary_v2, 'scalar', autospec=True) as mock_scalar_v2:
     with summary_ops_v2.create_summary_file_writer('/tmp/test').as_default():
       i = constant_op.constant(2.5)
       tensor = summary_lib.scalar('float', i)
   # Returns empty string.
   self.assertEqual(tensor.numpy(), b'')
   self.assertEqual(tensor.dtype, dtypes.string)
   mock_scalar_v2.assert_called_once_with('float', data=i, step=1024)
 def testGlobalStepIsWrappedOnTwoGPUs(self, use_core_strategy):
   strategy, _, _ = create_test_objects(
       num_gpus=2, use_core_strategy=use_core_strategy)
   with ops.Graph().as_default(), strategy.scope():
     created_step = training_util.create_global_step()
     get_step = training_util.get_global_step()
     self.assertEqual(created_step, get_step,
                      msg=('created_step %s type %s vs. get_step %s type %s' %
                           (id(created_step), created_step.__class__.__name__,
                            id(get_step), get_step.__class__.__name__)))
     self.assertIs(values.AggregatingVariable, type(created_step))
     self.assertIs(values.AggregatingVariable, type(get_step))
     self.assertIs(strategy, created_step.distribute_strategy)
Example #37
0
  def test_requests(self):
    with ops.Graph().as_default(), session_lib.Session() as sess:
      training_util.create_global_step()
      mock_mon = FakeMonitor()
      mock_mon2 = FakeMonitor()

      hook = learn.monitors.RunHookAdapterForMonitors([mock_mon, mock_mon2])
      hook.begin()

      mon_sess = monitored_session._HookedSession(sess=sess, hooks=[hook])

      a_tensor = constant_op.constant([0], name='a_tensor')
      constant_op.constant([5], name='another_tensor')
      constant_op.constant([10], name='third_tensor')
      mock_mon.requested_tensors = ['another_tensor']
      mock_mon2.requested_tensors = ['third_tensor']
      sess.run(variables.global_variables_initializer())

      output = mon_sess.run(a_tensor)
      self.assertEqual(output, [0])
      self.assertEqual(mock_mon.output['another_tensor'], [5])
      self.assertEqual(mock_mon2.output['third_tensor'], [10])
 def testGlobalStepIsNotWrappedOnOneGPU(self, use_core_strategy):
   strategy, _, _ = create_test_objects(
       num_gpus=1, use_core_strategy=use_core_strategy)
   with ops.Graph().as_default(), strategy.scope():
     created_step = training_util.create_global_step()
     get_step = training_util.get_global_step()
     self.assertEqual(created_step, get_step,
                      msg=('created_step %s type %s vs. get_step %s type %s' %
                           (id(created_step), created_step.__class__.__name__,
                            id(get_step), get_step.__class__.__name__)))
     self.assertIs(resource_variable_ops.ResourceVariable, type(created_step))
     self.assertIs(resource_variable_ops.ResourceVariable, type(get_step))
     self.assertIs(strategy, created_step.distribute_strategy)
Example #39
0
 def testGlobalStepIsNotWrappedOnOneGPU(self):
   distribution = parameter_server_strategy.ParameterServerStrategy(
       num_gpus_per_worker=1)
   with ops.Graph().as_default(), distribution.scope():
     created_step = training_util.create_global_step()
     get_step = training_util.get_global_step()
     self.assertEqual(created_step, get_step,
                      msg=('created_step %s type %s vs. get_step %s type %s' %
                           (id(created_step), created_step.__class__.__name__,
                            id(get_step), get_step.__class__.__name__)))
     self.assertIs(resource_variable_ops.ResourceVariable, type(created_step))
     self.assertIs(resource_variable_ops.ResourceVariable, type(get_step))
     self.assertIs(distribution, created_step.distribute_strategy)
Example #40
0
  def test_requests(self):
    with ops.Graph().as_default(), session_lib.Session() as sess:
      training_util.create_global_step()
      mock_mon = FakeMonitor()
      mock_mon2 = FakeMonitor()

      hook = learn.monitors.RunHookAdapterForMonitors([mock_mon, mock_mon2])
      hook.begin()

      mon_sess = monitored_session._HookedSession(sess=sess, hooks=[hook])

      a_tensor = constant_op.constant([0], name='a_tensor')
      constant_op.constant([5], name='another_tensor')
      constant_op.constant([10], name='third_tensor')
      mock_mon.requested_tensors = ['another_tensor']
      mock_mon2.requested_tensors = ['third_tensor']
      sess.run(variables.global_variables_initializer())

      output = mon_sess.run(a_tensor)
      self.assertEqual(output, [0])
      self.assertEqual(mock_mon.output['another_tensor'], [5])
      self.assertEqual(mock_mon2.output['third_tensor'], [10])
 def testGlobalStepIsNotWrappedOnOneGPU(self, use_core_strategy):
   strategy, _, _ = create_test_objects(
       num_gpus=1, use_core_strategy=use_core_strategy)
   with ops.Graph().as_default(), strategy.scope():
     created_step = training_util.create_global_step()
     get_step = training_util.get_global_step()
     self.assertEqual(created_step, get_step,
                      msg=('created_step %s type %s vs. get_step %s type %s' %
                           (id(created_step), created_step.__class__.__name__,
                            id(get_step), get_step.__class__.__name__)))
     self.assertIs(resource_variable_ops.ResourceVariable, type(created_step))
     self.assertIs(resource_variable_ops.ResourceVariable, type(get_step))
     self.assertIs(strategy, created_step.distribute_strategy)
    def test_saving_restoring_checkpoint(self):

        logdir = _test_dir(self.get_temp_dir(),
                           "test_saving_restoring_checkpoint")
        with ops.Graph().as_default():
            gstep = training_util.create_global_step()
            do_step = state_ops.assign_add(gstep, 1)

            v0 = variables.Variable(10.0, name="v0")
            v1 = variables.Variable(20.0, name="v1")

            target_values = [[0.0], [1.0], [2.0]]
            keys = array_ops.placeholder(dtypes.int64)
            values = constant_op.constant(target_values, dtypes.float32)

            table = de.Variable(
                key_dtype=dtypes.int64,
                value_dtype=dtypes.float32,
                initializer=-1.0,
                name="m100",
                dim=1,
            )
            upsert_op = table.upsert(keys, values)
            lookup_op = table.lookup(keys)
            size_op = table.size()
            with monitored_session.MonitoredTrainingSession(
                    config=default_config, is_chief=True,
                    checkpoint_dir=logdir) as sess:
                self.assertEqual(0, sess.run(gstep))
                self.assertEqual(1, sess.run(do_step))
                self.assertEqual(2, sess.run(do_step))

                # Check that the parameter nodes have been initialized.
                self.assertEqual(10.0, sess.run(v0))
                self.assertEqual(20.0, sess.run(v1))
                self.assertAllEqual(0, sess.run(size_op))
                sess.run(upsert_op, feed_dict={keys: [0, 1, 2]})
                self.assertAllEqual(3, sess.run(size_op))
                self.device_check(table)

            # A restart will find the checkpoint and recover automatically.
            with monitored_session.MonitoredTrainingSession(
                    config=default_config, is_chief=True,
                    checkpoint_dir=logdir) as sess:
                self.assertEqual(2, sess.run(gstep))
                self.assertAllEqual(3, sess.run(table.size()))
                self.assertAllEqual(
                    target_values,
                    sess.run(lookup_op, feed_dict={keys: [0, 1, 2]}))

                self.device_check(table)
Example #43
0
    def testIntegerSummaries(self):
        step = training_util.create_global_step()
        writer = self.create_db_writer()

        def adder(x, y):
            state_ops.assign_add(step, 1)
            summary_ops.generic('x', x)
            summary_ops.generic('y', y)
            sum_ = x + y
            summary_ops.generic('sum', sum_)
            return sum_

        with summary_ops.always_record_summaries():
            with writer.as_default():
                self.assertEqual(5, adder(int64(2), int64(3)).numpy())

        six.assertCountEqual(
            self, [1, 1, 1],
            get_all(self.db,
                    'SELECT step FROM Tensors WHERE dtype IS NOT NULL'))
        six.assertCountEqual(self, ['x', 'y', 'sum'],
                             get_all(self.db, 'SELECT tag_name FROM Tags'))
        x_id = get_one(self.db, 'SELECT tag_id FROM Tags WHERE tag_name = "x"')
        y_id = get_one(self.db, 'SELECT tag_id FROM Tags WHERE tag_name = "y"')
        sum_id = get_one(self.db,
                         'SELECT tag_id FROM Tags WHERE tag_name = "sum"')

        with summary_ops.always_record_summaries():
            with writer.as_default():
                self.assertEqual(9, adder(int64(4), int64(5)).numpy())

        six.assertCountEqual(
            self, [1, 1, 1, 2, 2, 2],
            get_all(self.db,
                    'SELECT step FROM Tensors WHERE dtype IS NOT NULL'))
        six.assertCountEqual(self, [x_id, y_id, sum_id],
                             get_all(self.db, 'SELECT tag_id FROM Tags'))
        self.assertEqual(2, get_tensor(self.db, x_id, 1))
        self.assertEqual(3, get_tensor(self.db, y_id, 1))
        self.assertEqual(5, get_tensor(self.db, sum_id, 1))
        self.assertEqual(4, get_tensor(self.db, x_id, 2))
        self.assertEqual(5, get_tensor(self.db, y_id, 2))
        self.assertEqual(9, get_tensor(self.db, sum_id, 2))
        six.assertCountEqual(
            self, ['experiment'],
            get_all(self.db, 'SELECT experiment_name FROM Experiments'))
        six.assertCountEqual(self, ['run'],
                             get_all(self.db, 'SELECT run_name FROM Runs'))
        six.assertCountEqual(self, ['user'],
                             get_all(self.db, 'SELECT user_name FROM Users'))
Example #44
0
    def testMonitoredSessionStopAtStepHook(self):
        random_seed.set_random_seed(1)

        with ops.device("/device:IPU:0"):
            pa = array_ops.placeholder(np.float32, [2, 2], name="a")
            pb = array_ops.placeholder(np.float32, [2, 2], name="b")
            output = pa + pb

        with variable_scope.variable_scope('gs', use_resource=True):
            training_util.create_global_step()

        hook = basic_session_run_hooks.StopAtStepHook(num_steps=2)

        with ms.MonitoredSession(session_creator=ms.ChiefSessionCreator(),
                                 hooks=[hook]) as sess:

            fd = {pa: [[1., 1.], [2., 3.]], pb: [[0., 1.], [4., 5.]]}
            result = sess.run(output, fd)
            self.assertAllClose(result, [[1., 2.], [6., 8.]])

            fd = {pa: [[0., 0.], [1., 1.]], pb: [[2., 1.], [4., 5.]]}
            result = sess.run(output, fd)
            self.assertAllClose(result, [[2., 1.], [5., 6.]])
Example #45
0
def create_global_step(graph=None):
    """Create global step tensor in graph.

  Args:
    graph: The graph in which to create the global step tensor. If missing,
      use default graph.

  Returns:
    Global step tensor.

  Raises:
    ValueError: if global step tensor is already defined.
  """
    return training_util.create_global_step(graph)
Example #46
0
def create_global_step(graph=None):
  """Create global step tensor in graph.

  Args:
    graph: The graph in which to create the global step tensor. If missing,
      use default graph.

  Returns:
    Global step tensor.

  Raises:
    ValueError: if global step tensor is already defined.
  """
  return training_util.create_global_step(graph)
Example #47
0
 def testGlobalStepIsNotWrappedOnOneGPU(self):
   strategy, _, _ = create_test_objects(num_gpus=1)
   with ops.Graph().as_default(), strategy.scope():
     created_step = training_util.create_global_step()
     get_step = training_util.get_global_step()
     self.assertEqual(created_step, get_step,
                      msg=('created_step %s type %s vs. get_step %s type %s' %
                           (id(created_step), created_step.__class__.__name__,
                            id(get_step), get_step.__class__.__name__)))
     self.assertIs(resource_variable_ops.ResourceVariable, type(created_step))
     self.assertIs(resource_variable_ops.ResourceVariable, type(get_step))
     # All variables have an _distribute_strategy parameter. Only variable
     # subclasses in distribution strategy expose it publicly.
     self.assertFalse(hasattr(strategy, 'distribute_strategy'))
     self.assertIs(strategy, created_step._distribute_strategy)
 def testGlobalStepIsNotWrappedOnOneGPU(self, use_core_strategy):
   strategy, _, _ = create_test_objects(
       num_gpus=1, use_core_strategy=use_core_strategy)
   with ops.Graph().as_default(), strategy.scope():
     created_step = training_util.create_global_step()
     get_step = training_util.get_global_step()
     self.assertEqual(created_step, get_step,
                      msg=('created_step %s type %s vs. get_step %s type %s' %
                           (id(created_step), created_step.__class__.__name__,
                            id(get_step), get_step.__class__.__name__)))
     self.assertIs(resource_variable_ops.ResourceVariable, type(created_step))
     self.assertIs(resource_variable_ops.ResourceVariable, type(get_step))
     # All variables have an _distribute_strategy parameter. Only variable
     # subclasses in distribution strategy expose it publicly.
     self.assertFalse(hasattr(strategy, 'distribute_strategy'))
     self.assertIs(strategy, created_step._distribute_strategy)
  def global_step(self):
    if self._global_step is None:
      # Get the default create_global_step utility to actually call
      # self.add_variable, by setting a custom creator.
      def _owned_variable_as_creator(
          next_creator, initial_value, **kwargs):
        def _creator_as_getter(initializer, **kwargs):
          return next_creator(initial_value=initializer, **kwargs)
        return self.add_variable(
            getter=_creator_as_getter, initializer=initial_value, shape=[],
            **kwargs)

      with variable_scope.variable_creator_scope(
          _owned_variable_as_creator):
        self._global_step = training_util.create_global_step()
    return self._global_step
Example #50
0
def create_global_step(graph=None):
  """Create global step tensor in graph.

  This API is deprecated. Use core framework training version instead.

  Args:
    graph: The graph in which to create the global step tensor. If missing,
      use default graph.

  Returns:
    Global step tensor.

  Raises:
    ValueError: if global step tensor is already defined.
  """
  return training_util.create_global_step(graph)
Example #51
0
  def testIntegerSummaries(self):
    step = training_util.create_global_step()
    writer = self.create_db_writer()

    def adder(x, y):
      state_ops.assign_add(step, 1)
      summary_ops.generic('x', x)
      summary_ops.generic('y', y)
      sum_ = x + y
      summary_ops.generic('sum', sum_)
      return sum_

    with summary_ops.always_record_summaries():
      with writer.as_default():
        self.assertEqual(5, adder(int64(2), int64(3)).numpy())

    six.assertCountEqual(
        self, [1, 1, 1],
        get_all(self.db, 'SELECT step FROM Tensors WHERE dtype IS NOT NULL'))
    six.assertCountEqual(self, ['x', 'y', 'sum'],
                         get_all(self.db, 'SELECT tag_name FROM Tags'))
    x_id = get_one(self.db, 'SELECT tag_id FROM Tags WHERE tag_name = "x"')
    y_id = get_one(self.db, 'SELECT tag_id FROM Tags WHERE tag_name = "y"')
    sum_id = get_one(self.db, 'SELECT tag_id FROM Tags WHERE tag_name = "sum"')

    with summary_ops.always_record_summaries():
      with writer.as_default():
        self.assertEqual(9, adder(int64(4), int64(5)).numpy())

    six.assertCountEqual(
        self, [1, 1, 1, 2, 2, 2],
        get_all(self.db, 'SELECT step FROM Tensors WHERE dtype IS NOT NULL'))
    six.assertCountEqual(self, [x_id, y_id, sum_id],
                         get_all(self.db, 'SELECT tag_id FROM Tags'))
    self.assertEqual(2, get_tensor(self.db, x_id, 1))
    self.assertEqual(3, get_tensor(self.db, y_id, 1))
    self.assertEqual(5, get_tensor(self.db, sum_id, 1))
    self.assertEqual(4, get_tensor(self.db, x_id, 2))
    self.assertEqual(5, get_tensor(self.db, y_id, 2))
    self.assertEqual(9, get_tensor(self.db, sum_id, 2))
    six.assertCountEqual(
        self, ['experiment'],
        get_all(self.db, 'SELECT experiment_name FROM Experiments'))
    six.assertCountEqual(self, ['run'],
                         get_all(self.db, 'SELECT run_name FROM Runs'))
    six.assertCountEqual(self, ['user'],
                         get_all(self.db, 'SELECT user_name FROM Users'))
Example #52
0
  def test_supervisor_run_gan_model_train_ops_multiple_steps(self):
    step = training_util.create_global_step()
    train_ops = namedtuples.GANTrainOps(
        generator_train_op=constant_op.constant(3.0),
        discriminator_train_op=constant_op.constant(2.0),
        global_step_inc_op=step.assign_add(1))
    train_steps = namedtuples.GANTrainSteps(
        generator_train_steps=3, discriminator_train_steps=4)

    final_loss = slim_learning.train(
        train_op=train_ops,
        logdir='',
        global_step=step,
        number_of_steps=1,
        train_step_fn=train.get_sequential_train_steps(train_steps))
    self.assertTrue(np.isscalar(final_loss))
    self.assertEqual(17.0, final_loss)
Example #53
0
  def assert_increasing_global_step(self, optimizer):
    keras_model, _, _, train_input_fn, _ = get_resource_for_simple_model(
        model_type='sequential', is_evaluate=True)
    keras_model.compile(
        loss='categorical_crossentropy',
        optimizer=optimizer,
        metrics=['mse', keras.metrics.CategoricalAccuracy()])
    with self.cached_session() as sess:
      keras_model_fn = keras_lib._create_keras_model_fn(keras_model)
      global_step = training_util.create_global_step()
      features, labels = train_input_fn().make_one_shot_iterator().get_next()
      spec = keras_model_fn(features, labels, mode=model_fn_lib.ModeKeys.TRAIN)

      sess.run(variables.global_variables_initializer())
      sess.run(variables.local_variables_initializer())

      self.assertEqual(global_step.eval(), 0)  # Sanity check
      sess.run(spec.train_op)
      self.assertEqual(global_step.eval(), 1)
Example #54
0
  def test_calls_and_steps(self):
    with ops.Graph().as_default(), session_lib.Session() as sess:
      global_step_tensor = training_util.create_global_step()
      inc_5 = state_ops.assign_add(global_step_tensor, 5)
      mock_mon = FakeMonitor()
      mock_mon2 = FakeMonitor()

      hook = learn.monitors.RunHookAdapterForMonitors([mock_mon, mock_mon2])
      hook.begin()
      for mon in [mock_mon, mock_mon2]:
        self.assertEqual(mon.call_counter['begin'], 1)

      sess.run(variables.global_variables_initializer())
      sess.run(global_step_tensor.assign(10))

      mon_sess = monitored_session._HookedSession(sess=sess, hooks=[hook])

      mon_sess.run(inc_5)
      for mon in [mock_mon, mock_mon2]:
        self.assertEqual(mon.output, {})
        self.assertEqual(mon.last_begin_step, 11)
        self.assertEqual(mon.last_end_step, 11)
        self.assertEqual(mon.last_post_step, 11)
        self.assertEqual(mon.call_counter['step_end'], 1)
        self.assertEqual(mon.call_counter['step_begin'], 1)
        self.assertEqual(mon.call_counter['post_step'], 1)

      mon_sess.run(inc_5)
      for mon in [mock_mon, mock_mon2]:
        self.assertEqual(mon.output, {})
        self.assertEqual(mon.last_begin_step, 16)
        self.assertEqual(mon.last_end_step, 16)
        self.assertEqual(mon.last_post_step, 16)
        self.assertEqual(mon.call_counter['step_end'], 2)
        self.assertEqual(mon.call_counter['step_begin'], 2)
        self.assertEqual(mon.call_counter['post_step'], 2)

      hook.end(sess)
      for mon in [mock_mon, mock_mon2]:
        self.assertEqual(mon.call_counter['end'], 1)
Example #55
0
def _export_estimator(estimator,
                      export_dir,
                      signature_fn,
                      input_fn,
                      default_batch_size,
                      exports_to_keep,
                      input_feature_key=None,
                      use_deprecated_input_fn=True,
                      prediction_key=None,
                      checkpoint_path=None):
  if use_deprecated_input_fn:
    input_fn = input_fn or _default_input_fn
  elif input_fn is None:
    raise ValueError('input_fn must be defined.')

  # If checkpoint_path is specified, use the specified checkpoint path.
  checkpoint_path = (checkpoint_path or
                     checkpoint_management.latest_checkpoint(
                         estimator._model_dir))
  with ops.Graph().as_default() as g:
    training_util.create_global_step(g)

    if use_deprecated_input_fn:
      examples = array_ops.placeholder(dtype=dtypes.string,
                                       shape=[default_batch_size],
                                       name='input_example_tensor')
      features = input_fn(estimator, examples)
    else:
      features, _ = input_fn()
      examples = None
      if input_feature_key is not None:
        examples = features.pop(input_feature_key)

    if (not features) and (examples is None):
      raise ValueError('Either features or examples must be defined.')

    predictions = estimator._get_predict_ops(features).predictions

    if prediction_key is not None:
      predictions = predictions[prediction_key]

    # Explicit signature_fn takes priority
    if signature_fn:
      default_signature, named_graph_signatures = signature_fn(examples,
                                                               features,
                                                               predictions)
    else:
      try:
        # Some estimators provide a signature function.
        # TODO(zakaria): check if the estimator has this function,
        #   raise helpful error if not
        signature_fn = estimator._create_signature_fn()

        default_signature, named_graph_signatures = (
            signature_fn(examples, features, predictions))
      except AttributeError:
        logging.warn(
            'Change warning: `signature_fn` will be required after'
            '2016-08-01.\n'
            'Using generic signatures for now.  To maintain this behavior, '
            'pass:\n'
            '  signature_fn=export.generic_signature_fn\n'
            'Also consider passing a regression or classification signature; '
            'see cl/126430915 for an example.')
        default_signature, named_graph_signatures = generic_signature_fn(
            examples, features, predictions)
    if exports_to_keep is not None:
      exports_to_keep = gc.largest_export_versions(exports_to_keep)
    return _export_graph(
        g,
        _get_saver(),
        checkpoint_path,
        export_dir,
        default_graph_signature=default_signature,
        named_graph_signatures=named_graph_signatures,
        exports_to_keep=exports_to_keep)
Example #56
0
 def test_global_step_read_is_none_if_there_is_no_global_step(self):
   with ops.Graph().as_default():
     self.assertIsNone(training_util._get_or_create_global_step_read())
     training_util.create_global_step()
     self.assertIsNotNone(training_util._get_or_create_global_step_read())
Example #57
0
 def test_reads_from_cache(self):
   with ops.Graph().as_default():
     training_util.create_global_step()
     first = training_util._get_or_create_global_step_read()
     second = training_util._get_or_create_global_step_read()
     self.assertEqual(first, second)