Example #1
0
    def testEvaluationLoopTimeout(self):
        checkpoint_dir = tempfile.mkdtemp('evaluation_loop_timeout')
        if not gfile.Exists(checkpoint_dir):
            gfile.MakeDirs(checkpoint_dir)

        # We need a variable that the saver will try to restore.
        variables.get_or_create_global_step()

        # Run with placeholders. If we actually try to evaluate this, we'd fail
        # since we're not using a feed_dict.
        cant_run_op = array_ops.placeholder(dtype=dtypes.float32)

        start = time.time()
        final_values = evaluation.evaluate_repeatedly(
            checkpoint_dir=checkpoint_dir,
            eval_ops=cant_run_op,
            hooks=[evaluation.StopAfterNEvalsHook(10)],
            timeout=6)
        end = time.time()
        self.assertFalse(final_values)

        # Assert that we've waited for the duration of the timeout (minus the sleep
        # time).
        self.assertGreater(end - start, 5.0)

        # Then the timeout kicked in and stops the loop.
        self.assertLess(end - start, 7)
Example #2
0
    def testEvaluateWithEvalFeedDict(self):
        # Create a checkpoint.
        checkpoint_dir = tempfile.mkdtemp('evaluate_with_eval_feed_dict')
        self._train_model(checkpoint_dir, num_steps=1)

        # We need a variable that the saver will try to restore.
        variables.get_or_create_global_step()

        # Create a variable and an eval op that increments it with a placeholder.
        my_var = variables.local_variable(0.0, name='my_var')
        increment = array_ops.placeholder(dtype=dtypes.float32)
        eval_ops = state_ops.assign_add(my_var, increment)

        increment_value = 3
        num_evals = 5
        expected_value = increment_value * num_evals
        final_values = evaluation.evaluate_repeatedly(
            checkpoint_dir=checkpoint_dir,
            eval_ops=eval_ops,
            feed_dict={increment: 3},
            final_ops={'my_var': array_ops.identity(my_var)},
            hooks=[
                evaluation.StopAfterNEvalsHook(num_evals),
            ],
            max_number_of_evaluations=1)
        self.assertEqual(final_values['my_var'], expected_value)
Example #3
0
    def testEvalOpAndFinalOp(self):
        checkpoint_dir = tempfile.mkdtemp('eval_ops_and_final_ops')

        # Train a model for a single step to get a checkpoint.
        self._train_model(checkpoint_dir, num_steps=1)
        checkpoint_path = evaluation.wait_for_new_checkpoint(checkpoint_dir)

        # Create the model so we have something to restore.
        inputs = constant_op.constant(self._inputs, dtype=dtypes.float32)
        logistic_classifier(inputs)

        num_evals = 5
        final_increment = 9.0

        my_var = variables.local_variable(0.0, name='MyVar')
        eval_ops = state_ops.assign_add(my_var, 1.0)
        final_ops = array_ops.identity(my_var) + final_increment

        final_ops_values = evaluation.evaluate_once(
            checkpoint_path=checkpoint_path,
            eval_ops=eval_ops,
            final_ops={'value': final_ops},
            hooks=[
                evaluation.StopAfterNEvalsHook(num_evals),
            ])
        self.assertEqual(
            final_ops_values['value'], num_evals + final_increment)
Example #4
0
    def testEvaluatePerfectModel(self):
        checkpoint_dir = tempfile.mkdtemp('evaluate_perfect_model_once')

        # Train a Model to completion:
        self._train_model(checkpoint_dir, num_steps=300)

        # Run
        inputs = constant_op.constant(self._inputs, dtype=dtypes.float32)
        labels = constant_op.constant(self._labels, dtype=dtypes.float32)
        logits = logistic_classifier(inputs)
        predictions = math_ops.round(logits)

        accuracy, update_op = metrics.accuracy(
            predictions=predictions, labels=labels)

        checkpoint_path = evaluation.wait_for_new_checkpoint(checkpoint_dir)

        final_ops_values = evaluation.evaluate_once(
            checkpoint_path=checkpoint_path,
            eval_ops=update_op,
            final_ops={'accuracy': accuracy},
            hooks=[
                evaluation.StopAfterNEvalsHook(1),
            ])
        self.assertGreater(final_ops_values['accuracy'], .99)
Example #5
0
    def testEvaluationLoopTimeoutWithTimeoutFn(self):
        checkpoint_dir = tempfile.mkdtemp(
            'evaluation_loop_timeout_with_timeout_fn')

        # Train a Model to completion:
        self._train_model(checkpoint_dir, num_steps=300)

        # Run
        inputs = constant_op.constant(self._inputs, dtype=dtypes.float32)
        labels = constant_op.constant(self._labels, dtype=dtypes.float32)
        logits = logistic_classifier(inputs)
        predictions = math_ops.round(logits)

        accuracy, update_op = metrics.accuracy(
            predictions=predictions, labels=labels)

        timeout_fn_calls = [0]

        def timeout_fn():
            timeout_fn_calls[0] += 1
            return timeout_fn_calls[0] > 3

        final_values = evaluation.evaluate_repeatedly(
            checkpoint_dir=checkpoint_dir,
            eval_ops=update_op,
            final_ops={'accuracy': accuracy},
            hooks=[
                evaluation.StopAfterNEvalsHook(1),
            ],
            eval_interval_secs=1,
            max_number_of_evaluations=2,
            timeout=0.1,
            timeout_fn=timeout_fn)
        # We should have evaluated once.
        self.assertGreater(final_values['accuracy'], .99)
        # And called 4 times the timeout fn
        self.assertEqual(4, timeout_fn_calls[0])
Example #6
0
def evaluation_loop(master,
                    checkpoint_dir,
                    logdir,
                    num_evals=1,
                    initial_op=None,
                    initial_op_feed_dict=None,
                    init_fn=None,
                    eval_op=None,
                    eval_op_feed_dict=None,
                    final_op=None,
                    final_op_feed_dict=None,
                    summary_op=_USE_DEFAULT,
                    summary_op_feed_dict=None,
                    variables_to_restore=None,
                    eval_interval_secs=60,
                    max_number_of_evaluations=None,
                    session_config=None,
                    timeout=None,
                    timeout_fn=None,
                    hooks=None):
    """Runs TF-Slim's Evaluation Loop.

    Args:
      master: The BNS address of the TensorFlow master.
      checkpoint_dir: The directory where checkpoints are stored.
      logdir: The directory where the TensorFlow summaries are written to.
      num_evals: The number of times to run `eval_op`.
      initial_op: An operation run at the beginning of evaluation.
      initial_op_feed_dict: A feed dictionary to use when executing `initial_op`.
      init_fn: An optional callable to be executed after `init_op` is called. The
        callable must accept one argument, the session being initialized.
      eval_op: A operation run `num_evals` times.
      eval_op_feed_dict: The feed dictionary to use when executing the `eval_op`.
      final_op: An operation to execute after all of the `eval_op` executions. The
        value of `final_op` is returned.
      final_op_feed_dict: A feed dictionary to use when executing `final_op`.
      summary_op: The summary_op to evaluate after running TF-Slims metric ops. By
        default the summary_op is set to tf.compat.v1.summary.merge_all().
      summary_op_feed_dict: An optional feed dictionary to use when running the
        `summary_op`.
      variables_to_restore: A list of TensorFlow variables to restore during
        evaluation. If the argument is left as `None` then
        slim.variables.GetVariablesToRestore() is used.
      eval_interval_secs: The minimum number of seconds between evaluations.
      max_number_of_evaluations: the max number of iterations of the evaluation.
        If the value is left as 'None', the evaluation continues indefinitely.
      session_config: An instance of `tf.compat.v1.ConfigProto` that will be used
        to configure the `Session`. If left as `None`, the default will be used.
      timeout: The maximum amount of time to wait between checkpoints. If left as
        `None`, then the process will wait indefinitely.
      timeout_fn: Optional function to call after a timeout.  If the function
        returns True, then it means that no new checkpoints will be generated and
        the iterator will exit.  The function is called with no arguments.
      hooks: A list of additional `SessionRunHook` objects to pass during repeated
        evaluations.

    Returns:
      The value of `final_op` or `None` if `final_op` is `None`.
    """
    if summary_op == _USE_DEFAULT:
        summary_op = summary.merge_all()

    all_hooks = [
        evaluation.StopAfterNEvalsHook(num_evals),
    ]

    if summary_op is not None:
        all_hooks.append(
            evaluation.SummaryAtEndHook(
                log_dir=logdir,
                summary_op=summary_op,
                feed_dict=summary_op_feed_dict))

    if hooks is not None:
        # Add custom hooks if provided.
        all_hooks.extend(hooks)

    saver = None
    if variables_to_restore is not None:
        saver = tf_saver.Saver(variables_to_restore)

    return evaluation.evaluate_repeatedly(
        checkpoint_dir,
        master=master,
        scaffold=monitored_session.Scaffold(
            init_op=initial_op,
            init_feed_dict=initial_op_feed_dict,
            init_fn=init_fn,
            saver=saver),
        eval_ops=eval_op,
        feed_dict=eval_op_feed_dict,
        final_ops=final_op,
        final_ops_feed_dict=final_op_feed_dict,
        eval_interval_secs=eval_interval_secs,
        hooks=all_hooks,
        config=session_config,
        max_number_of_evaluations=max_number_of_evaluations,
        timeout=timeout,
        timeout_fn=timeout_fn)
Example #7
0
def evaluate_once(master,
                  checkpoint_path,
                  logdir,
                  num_evals=1,
                  initial_op=None,
                  initial_op_feed_dict=None,
                  eval_op=None,
                  eval_op_feed_dict=None,
                  final_op=None,
                  final_op_feed_dict=None,
                  summary_op=_USE_DEFAULT,
                  summary_op_feed_dict=None,
                  variables_to_restore=None,
                  session_config=None,
                  hooks=None):
    """Evaluates the model at the given checkpoint path.

    Args:
      master: The BNS address of the TensorFlow master.
      checkpoint_path: The path to a checkpoint to use for evaluation.
      logdir: The directory where the TensorFlow summaries are written to.
      num_evals: The number of times to run `eval_op`.
      initial_op: An operation run at the beginning of evaluation.
      initial_op_feed_dict: A feed dictionary to use when executing `initial_op`.
      eval_op: A operation run `num_evals` times.
      eval_op_feed_dict: The feed dictionary to use when executing the `eval_op`.
      final_op: An operation to execute after all of the `eval_op` executions. The
        value of `final_op` is returned.
      final_op_feed_dict: A feed dictionary to use when executing `final_op`.
      summary_op: The summary_op to evaluate after running TF-Slims metric ops. By
        default the summary_op is set to tf.compat.v1.summary.merge_all().
      summary_op_feed_dict: An optional feed dictionary to use when running the
        `summary_op`.
      variables_to_restore: A list of TensorFlow variables to restore during
        evaluation. If the argument is left as `None` then
        slim.variables.GetVariablesToRestore() is used.
      session_config: An instance of `tf.compat.v1.ConfigProto` that will be used
        to configure the `Session`. If left as `None`, the default will be used.
      hooks: A list of additional `SessionRunHook` objects to pass during the
        evaluation.

    Returns:
      The value of `final_op` or `None` if `final_op` is `None`.
    """
    if summary_op == _USE_DEFAULT:
        summary_op = summary.merge_all()

    all_hooks = [
        evaluation.StopAfterNEvalsHook(num_evals),
    ]

    if summary_op is not None:
        all_hooks.append(
            evaluation.SummaryAtEndHook(
                log_dir=logdir,
                summary_op=summary_op,
                feed_dict=summary_op_feed_dict))
    if hooks is not None:
        all_hooks.extend(hooks)

    saver = None
    if variables_to_restore is not None:
        saver = tf_saver.Saver(variables_to_restore)

    return evaluation.evaluate_once(
        checkpoint_path,
        master=master,
        scaffold=monitored_session.Scaffold(
            init_op=initial_op, init_feed_dict=initial_op_feed_dict, saver=saver),
        eval_ops=eval_op,
        feed_dict=eval_op_feed_dict,
        final_ops=final_op,
        final_ops_feed_dict=final_op_feed_dict,
        hooks=all_hooks,
        config=session_config)