예제 #1
0
  def testDumpingDebugHookWithStatefulLegacyWatchFnWorks(self):
    watch_fn_state = {"run_counter": 0}

    def counting_watch_fn(fetches, feed_dict):
      del fetches, feed_dict
      watch_fn_state["run_counter"] += 1
      if watch_fn_state["run_counter"] % 2 == 1:
        # If odd-index run (1-based), watch everything.
        return "DebugIdentity", r".*", r".*"
      else:
        # If even-index run, watch nothing.
        return "DebugIdentity", r"$^", r"$^"

    dumping_hook = hooks.DumpingDebugHook(
        self.session_root, watch_fn=counting_watch_fn, log_usage=False)
    mon_sess = monitored_session._HookedSession(self.sess, [dumping_hook])
    for _ in range(4):
      mon_sess.run(self.inc_v)

    dump_dirs = glob.glob(os.path.join(self.session_root, "run_*"))
    dump_dirs = sorted(
        dump_dirs, key=lambda x: int(os.path.basename(x).split("_")[1]))
    self.assertEqual(4, len(dump_dirs))

    for i, dump_dir in enumerate(dump_dirs):
      self._assert_correct_run_subdir_naming(os.path.basename(dump_dir))
      dump = debug_data.DebugDumpDir(dump_dir)
      if i % 2 == 0:
        self.assertAllClose([10.0 + 1.0 * i],
                            dump.get_tensors("v", 0, "DebugIdentity"))
      else:
        self.assertEqual(0, dump.size)

      self.assertEqual(repr(self.inc_v), dump.run_fetches_info)
      self.assertEqual(repr(None), dump.run_feed_keys_info)
예제 #2
0
    def testDebugDumpDir_invalidFileNamingPattern(self):
        # File name with too few underscores should lead to an exception.
        open(os.path.join(self._dump_root, "node1_DebugIdentity_1234"), "wb")

        with self.assertRaisesRegexp(ValueError,
                                     "does not conform to the naming pattern"):
            debug_data.DebugDumpDir(self._dump_root)
예제 #3
0
    def _session_run_for_graph_structure_lookup(self):
        with session.Session() as sess:
            u_name = "testDumpGraphStructureLookup/u"
            v_name = "testDumpGraphStructureLookup/v"
            w_name = "testDumpGraphStructureLookup/w"

            u_init = constant_op.constant([2.0, 4.0])
            u = variables.Variable(u_init, name=u_name)
            v = math_ops.add(u, u, name=v_name)
            w = math_ops.add(v, v, name=w_name)

            u.initializer.run()

            run_options = config_pb2.RunOptions(output_partition_graphs=True)
            debug_utils.watch_graph(run_options,
                                    sess.graph,
                                    debug_ops=["DebugIdentity"],
                                    debug_urls=self._debug_urls())

            run_metadata = config_pb2.RunMetadata()
            sess.run(w, options=run_options, run_metadata=run_metadata)

        self.assertEqual(self._expected_partition_graph_count,
                         len(run_metadata.partition_graphs))

        dump = debug_data.DebugDumpDir(
            self._dump_root, partition_graphs=run_metadata.partition_graphs)

        return u_name, v_name, w_name, dump
예제 #4
0
    def testDuplicateNodeNamesInGraphDefOfSingleDeviceRaisesException(self):
        self._makeDataDirWithMultipleDevicesAndDuplicateNodeNames()
        graph_cpu_0 = graph_pb2.GraphDef()
        node = graph_cpu_0.node.add()
        node.name = "node_foo_1"
        node.op = "FooOp"
        node.device = "/job:localhost/replica:0/task:0/cpu:0"
        graph_gpu_0 = graph_pb2.GraphDef()
        node = graph_gpu_0.node.add()
        node.name = "node_foo_1"
        node.op = "FooOp"
        node.device = "/job:localhost/replica:0/task:0/device:GPU:0"
        graph_gpu_1 = graph_pb2.GraphDef()
        node = graph_gpu_1.node.add()
        node.name = "node_foo_1"
        node.op = "FooOp"
        node.device = "/job:localhost/replica:0/task:0/device:GPU:1"
        node = graph_gpu_1.node.add()  # Here is the duplicate.
        node.name = "node_foo_1"
        node.op = "FooOp"
        node.device = "/job:localhost/replica:0/task:0/device:GPU:1"

        with self.assertRaisesRegex(ValueError,
                                    r"Duplicate node name on device "):
            debug_data.DebugDumpDir(
                self._dump_root,
                partition_graphs=[graph_cpu_0, graph_gpu_0, graph_gpu_1])
예제 #5
0
    def testGrpcDebugWrapperSessionWithWatchFnWorks(self):
        def watch_fn(feeds, fetch_keys):
            del feeds, fetch_keys
            return ["DebugIdentity", "DebugNumericSummary"], r".*/read", None

        u = variables.Variable(2.1, name="u")
        v = variables.Variable(20.0, name="v")
        w = math_ops.multiply(u, v, name="w")

        sess = session.Session(
            config=session_debug_testlib.no_rewrite_session_config())
        sess.run(u.initializer)
        sess.run(v.initializer)

        sess = grpc_wrapper.GrpcDebugWrapperSession(sess,
                                                    "localhost:%d" %
                                                    self._server_port,
                                                    watch_fn=watch_fn)
        w_result = sess.run(w)
        self.assertAllClose(42.0, w_result)

        dump = debug_data.DebugDumpDir(self._dump_root)
        self.assertEqual(4, dump.size)
        self.assertAllClose([2.1],
                            dump.get_tensors("u/read", 0, "DebugIdentity"))
        self.assertEqual(
            14, len(dump.get_tensors("u/read", 0, "DebugNumericSummary")[0]))
        self.assertAllClose([20.0],
                            dump.get_tensors("v/read", 0, "DebugIdentity"))
        self.assertEqual(
            14, len(dump.get_tensors("v/read", 0, "DebugNumericSummary")[0]))
예제 #6
0
    def testAdditionalHooks(self):
        checkpoint_path = os.path.join(self.get_temp_dir(), 'model.ckpt')
        log_dir = os.path.join(self.get_temp_dir(), 'log_dir1/')

        # First, save out the current model to a checkpoint:
        self._prepareCheckpoint(checkpoint_path)

        # Next, determine the metric to evaluate:
        value_op, update_op = metric_ops.streaming_accuracy(
            self._predictions, self._labels)

        dumping_root = os.path.join(self.get_temp_dir(), 'tfdbg_dump_dir')
        dumping_hook = hooks.DumpingDebugHook(dumping_root, log_usage=False)
        try:
            # Run the evaluation and verify the results:
            accuracy_value = evaluation.evaluate_once('',
                                                      checkpoint_path,
                                                      log_dir,
                                                      eval_op=update_op,
                                                      final_op=value_op,
                                                      hooks=[dumping_hook])
            self.assertAlmostEqual(accuracy_value, self._expected_accuracy)

            dump = debug_data.DebugDumpDir(
                glob.glob(os.path.join(dumping_root, 'run_*'))[0])
            # Here we simply assert that the dumped data has been loaded and is
            # non-empty. We do not care about the detailed model-internal tensors or
            # their values.
            self.assertTrue(dump.dumped_tensor_data)
        finally:
            if os.path.isdir(dumping_root):
                shutil.rmtree(dumping_root)
예제 #7
0
  def testTrainWithSessionWrapper(self):
    """Test that slim.learning.train can take `session_wrapper` args.

    One of the applications of `session_wrapper` is the wrappers of TensorFlow
    Debugger (tfdbg), which intercept methods calls to `tf.Session` (e.g., run)
    to achieve debugging. `DumpingDebugWrapperSession` is used here for testing
    purpose.
    """
    dump_root = tempfile.mkdtemp()

    def dumping_wrapper(sess):  # pylint: disable=invalid-name
      return dumping_wrapper_lib.DumpingDebugWrapperSession(sess, dump_root)

    with ops.Graph().as_default():
      random_seed.set_random_seed(0)
      tf_inputs = constant_op.constant(self._inputs, dtype=dtypes.float32)
      tf_labels = constant_op.constant(self._labels, dtype=dtypes.float32)

      tf_predictions = LogisticClassifier(tf_inputs)
      loss_ops.log_loss(tf_predictions, tf_labels)
      total_loss = loss_ops.get_total_loss()

      optimizer = gradient_descent.GradientDescentOptimizer(learning_rate=1.0)

      train_op = learning.create_train_op(total_loss, optimizer)

      loss = learning.train(
          train_op, None, number_of_steps=1, session_wrapper=dumping_wrapper)
    self.assertIsNotNone(loss)

    run_root = glob.glob(os.path.join(dump_root, 'run_*'))[-1]
    dump = debug_data.DebugDumpDir(run_root)
    self.assertAllEqual(0,
                        dump.get_tensors('global_step', 0, 'DebugIdentity')[0])
예제 #8
0
    def createAndRunGraphWithWhileLoop(self):
        """Create and run a TensorFlow Graph with a while loop to generate dumps."""

        self.dump_root = self.get_temp_dir()
        self.curr_file_path = os.path.abspath(
            tf_inspect.getfile(tf_inspect.currentframe()))

        # Run a simple TF graph to generate some debug dumps that can be used in
        # source annotation.
        with session.Session() as sess:
            loop_body = lambda i: math_ops.add(i, 2)
            self.traceback_first_line = line_number_above()

            loop_cond = lambda i: math_ops.less(i, 16)

            i = constant_op.constant(10, name="i")
            loop = control_flow_ops.while_loop(loop_cond, loop_body, [i])

            run_options = config_pb2.RunOptions(output_partition_graphs=True)
            debug_utils.watch_graph(run_options,
                                    sess.graph,
                                    debug_urls=["file://%s" % self.dump_root])
            run_metadata = config_pb2.RunMetadata()
            sess.run(loop, options=run_options, run_metadata=run_metadata)

            self.dump = debug_data.DebugDumpDir(
                self.dump_root, partition_graphs=run_metadata.partition_graphs)
            self.dump.set_python_graph(sess.graph)
예제 #9
0
    def testOutputSlotWithoutOutgoingEdgeCanBeWatched(self):
        """Test watching output slots not attached to any outgoing edges."""

        with session.Session() as sess:
            u_init_val = np.array([[5.0, 3.0], [-1.0, 0.0]])
            u = constant_op.constant(u_init_val, shape=[2, 2], name="u")

            # Create a control edge from a node with an output: From u to z.
            # Node u will get executed only because of the control edge. The output
            # tensor u:0 is not attached to any outgoing edge in the graph. This test
            # checks that the debugger can watch such a tensor.
            with ops.control_dependencies([u]):
                z = control_flow_ops.no_op(name="z")

            run_options = config_pb2.RunOptions(output_partition_graphs=True)
            debug_utils.watch_graph(run_options,
                                    sess.graph,
                                    debug_ops=["DebugIdentity"],
                                    debug_urls=self._debug_urls())

            run_metadata = config_pb2.RunMetadata()
            sess.run(z, options=run_options, run_metadata=run_metadata)

            dump = debug_data.DebugDumpDir(
                self._dump_root,
                partition_graphs=run_metadata.partition_graphs)

            # Assert that the DebugIdentity watch on u works properly.
            self.assertEqual(1, len(dump.dumped_tensor_data))
            datum = dump.dumped_tensor_data[0]
            self.assertEqual("u", datum.node_name)
            self.assertEqual(0, datum.output_slot)
            self.assertEqual("DebugIdentity", datum.debug_op)
            self.assertAllClose([[5.0, 3.0], [-1.0, 0.0]], datum.get_tensor())
예제 #10
0
    def testDumpingWithWatchFnWithNonDefaultDebugOpsWorks(self):
        """Use a watch_fn that specifies non-default debug ops."""
        def watch_fn(fetches, feeds):
            del fetches, feeds
            return framework.WatchOptions(
                debug_ops=["DebugIdentity", "DebugNumericSummary"],
                node_name_regex_whitelist=r"^v.*",
                op_type_regex_whitelist=r".*",
                tensor_dtype_regex_whitelist=".*_ref")

        sess = dumping_wrapper.DumpingDebugWrapperSession(
            self.sess,
            session_root=self.session_root,
            watch_fn=watch_fn,
            log_usage=False)

        sess.run(self.inc_v)

        dump_dirs = glob.glob(os.path.join(self.session_root, "run_*"))
        self.assertEqual(1, len(dump_dirs))
        dump = debug_data.DebugDumpDir(dump_dirs[0])

        self.assertAllClose([10.0], dump.get_tensors("v", 0, "DebugIdentity"))
        self.assertEqual(
            14, len(dump.get_tensors("v", 0, "DebugNumericSummary")[0]))

        dumped_nodes = [dump.node_name for dump in dump.dumped_tensor_data]
        self.assertNotIn("inc_v", dumped_nodes)
        self.assertNotIn("delta", dumped_nodes)
예제 #11
0
    def testDebugDumpDir_usesGfileGlob(self):
        if platform.system() == "Windows":
            self.skipTest("gfile.Glob is not used on Windows.")

        self._makeDataDirWithMultipleDevicesAndDuplicateNodeNames()

        def fake_gfile_glob(glob_pattern):
            del glob_pattern
            return []

        with test.mock.patch.object(gfile,
                                    "Glob",
                                    side_effect=fake_gfile_glob,
                                    autospec=True) as fake:
            debug_data.DebugDumpDir(self._dump_root)
            expected_calls = [
                test.mock.call(
                    os.path.join(self._dump_root,
                                 (debug_data.METADATA_FILE_PREFIX +
                                  debug_data.CORE_METADATA_TAG + "*"))),
                test.mock.call(
                    os.path.join(self._dump_root,
                                 (debug_data.METADATA_FILE_PREFIX +
                                  debug_data.FETCHES_INFO_FILE_TAG + "*"))),
                test.mock.call(
                    os.path.join(self._dump_root,
                                 (debug_data.METADATA_FILE_PREFIX +
                                  debug_data.FEED_KEYS_INFO_FILE_TAG + "*"))),
                test.mock.call(
                    os.path.join(self._dump_root,
                                 (debug_data.METADATA_FILE_PREFIX +
                                  debug_data.DEVICE_TAG + "*")))
            ]
            fake.assert_has_calls(expected_calls, any_order=True)
예제 #12
0
    def testDebugNumericSummaryOnUninitializedTensorGivesCorrectResult(self):
        with session.Session() as sess:
            a = variables.Variable([42],
                                   dtype=np.float32,
                                   name="numeric_summary_uninit/a")

            run_metadata = config_pb2.RunMetadata()
            run_options = config_pb2.RunOptions(output_partition_graphs=True)
            debug_utils.watch_graph(run_options,
                                    sess.graph,
                                    debug_ops=["DebugNumericSummary"],
                                    debug_urls=self._debug_urls())

            sess.run(a.initializer,
                     options=run_options,
                     run_metadata=run_metadata)

            dump = debug_data.DebugDumpDir(
                self._dump_root,
                partition_graphs=run_metadata.partition_graphs)
            self.assertTrue(dump.loaded_partition_graphs())

            # DebugNumericSummary output should reflect the uninitialized state of
            # the watched tensor.
            numeric_summary = dump.get_tensors("numeric_summary_uninit/a", 0,
                                               "DebugNumericSummary")[0]
            self.assertAllClose([0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
                                numeric_summary[0:8])
            self.assertTrue(np.isinf(numeric_summary[8]))
            self.assertGreater(numeric_summary[8], 0.0)
            self.assertTrue(np.isinf(numeric_summary[9]))
            self.assertLess(numeric_summary[9], 0.0)
            self.assertTrue(np.isnan(numeric_summary[10]))
            self.assertTrue(np.isnan(numeric_summary[11]))
예제 #13
0
    def testDebugNumericSummaryOnInitializedTensorGivesCorrectResult(self):
        with session.Session() as sess:
            a = variables.Variable([
                np.nan, np.nan, 0.0, 0.0, 0.0, -1.0, -3.0, 3.0, 7.0, -np.inf,
                -np.inf, np.inf, np.inf, np.inf, np.inf, np.inf, np.nan, np.nan
            ],
                                   dtype=np.float32,
                                   name="numeric_summary/a")
            b = variables.Variable([0.0] * 18,
                                   dtype=np.float32,
                                   name="numeric_summary/b")
            c = math_ops.add(a, b, name="numeric_summary/c")

            sess.run(variables.global_variables_initializer())

            run_metadata = config_pb2.RunMetadata()
            run_options = config_pb2.RunOptions(output_partition_graphs=True)
            debug_utils.watch_graph(run_options,
                                    sess.graph,
                                    debug_ops=["DebugNumericSummary"],
                                    debug_urls=self._debug_urls())

            sess.run(c, options=run_options, run_metadata=run_metadata)

            dump = debug_data.DebugDumpDir(
                self._dump_root,
                partition_graphs=run_metadata.partition_graphs)
            self.assertTrue(dump.loaded_partition_graphs())

            self.assertAllClose([[
                1.0, 18.0, 2.0, 2.0, 3.0, 2.0, 5.0, 4.0, -3.0, 7.0, 0.85714286,
                8.97959184
            ]],
                                dump.get_tensors("numeric_summary/a/read", 0,
                                                 "DebugNumericSummary"))
  def on_run_end(self, request):
    """Overrides on-run-end callback.

    Actions taken:
      1) Load the debug dump.
      2) Bring up the Analyzer CLI.

    Args:
      request: An instance of OnSessionInitRequest.

    Returns:
      An instance of OnSessionInitResponse.
    """

    self._is_run_start = False
    if request.performed_action == framework.OnRunStartAction.DEBUG_RUN:
      partition_graphs = None
      if request.run_metadata and request.run_metadata.partition_graphs:
        partition_graphs = request.run_metadata.partition_graphs
      elif request.client_graph_def:
        partition_graphs = [request.client_graph_def]

      if request.tf_error and not os.path.isdir(self._dump_root):
        # It is possible that the dump root may not exist due to errors that
        # have occurred prior to graph execution (e.g., invalid device
        # assignments), in which case we will just raise the exception as the
        # unwrapped Session does.
        raise request.tf_error

      debug_dump = debug_data.DebugDumpDir(
          self._dump_root, partition_graphs=partition_graphs)
      debug_dump.set_python_graph(self._sess.graph)

      passed_filter = None
      if self._active_tensor_filter:
        if not debug_dump.find(
            self._tensor_filters[self._active_tensor_filter], first_n=1):
          # No dumped tensor passes the filter in this run. Clean up the dump
          # directory and move on.
          self._remove_dump_root()
          return framework.OnRunEndResponse()
        else:
          # Some dumped tensor(s) from this run passed the filter.
          passed_filter = self._active_tensor_filter
          self._active_tensor_filter = None

      self._prep_cli_for_run_end(debug_dump, request.tf_error, passed_filter)

      self._run_start_response = self._launch_cli()

      # Clean up the dump generated by this run.
      self._remove_dump_root()
    else:
      # No debug information to show following a non-debug run() call.
      self._run_start_response = None

    # Return placeholder response that currently holds no additional
    # information.
    return framework.OnRunEndResponse()
예제 #15
0
 def _load_dumped_intermediate_tensors(self, dump_path, target_name):
     dump_dir = debug_data.DebugDumpDir(dump_path, validate=False)
     for dump in dump_dir.dumped_tensor_data:
         if (dump.tensor_name not in self._ref_tensor_names
                 and dump.tensor_name not in self._tensor_handles
                 and dump.tensor_name not in self._override_tensors
                 and dump.tensor_name != target_name):
             self._dumped_intermediate_tensors[dump.tensor_name] = dump
예제 #16
0
    def testGraphStructureLookupWithoutPartitionGraphsDoesNotErrorOut(self):
        _, _, _, dump = self._session_run_for_graph_structure_lookup()

        # Now load the dump again, without the partition graphs, so we can check
        # errors are not raised because the partition graphs are loaded from the
        # dump directory.
        dump = debug_data.DebugDumpDir(self._dump_root, validate=False)
        self.assertTrue(dump.loaded_partition_graphs())
예제 #17
0
    def testDebugDumpDir_invalidFileNamingPattern(self):
        # File name with too few underscores should lead to an exception.
        device_dir = os.path.join(
            self._dump_root, debug_data.METADATA_FILE_PREFIX +
            debug_data.DEVICE_TAG + ",job_localhost,replica_0,task_0,cpu_0")
        os.makedirs(device_dir)
        open(os.path.join(device_dir, "node1_DebugIdentity_1234"), "wb")

        with self.assertRaisesRegex(ValueError,
                                    "does not conform to the naming pattern"):
            debug_data.DebugDumpDir(self._dump_root)
예제 #18
0
    def testDumpUninitializedVariable(self):
        op_namespace = "testDumpUninitializedVariable"
        with session.Session() as sess:
            u_init_val = np.array([[5.0, 3.0], [-1.0, 0.0]])
            s_init_val = b"str1"

            u_name = "%s/u" % op_namespace
            s_name = "%s/s" % op_namespace

            u_init = constant_op.constant(u_init_val, shape=[2, 2])
            u = variables.Variable(u_init, name=u_name)
            s_init = constant_op.constant(s_init_val)
            s = variables.Variable(s_init, name=s_name)

            run_options = config_pb2.RunOptions(output_partition_graphs=True)
            debug_urls = self._debug_urls()

            # Add debug tensor watch for u.
            debug_utils.add_debug_tensor_watch(run_options,
                                               "%s" % u_name,
                                               0,
                                               debug_urls=debug_urls)
            debug_utils.add_debug_tensor_watch(run_options,
                                               "%s" % s_name,
                                               0,
                                               debug_urls=debug_urls)

            run_metadata = config_pb2.RunMetadata()

            # Initialize u and s.
            sess.run(variables.global_variables_initializer(),
                     options=run_options,
                     run_metadata=run_metadata)

            # Verify the dump file for the uninitialized value of u.
            dump = debug_data.DebugDumpDir(
                self._dump_root,
                partition_graphs=run_metadata.partition_graphs)

            self.assertEqual(2, dump.size)
            self.assertEqual(self._expected_partition_graph_count,
                             len(run_metadata.partition_graphs))

            # Verify that the variable is properly initialized by the run() call.
            u_vals = dump.get_tensors(u_name, 0, "DebugIdentity")
            s_vals = dump.get_tensors(s_name, 0, "DebugIdentity")
            self.assertEqual(1, len(u_vals))
            self.assertIsNone(u_vals[0])
            self.assertEqual(1, len(s_vals))
            self.assertIsNone(s_vals[0])

            # Call run() again, to check that u is initialized properly.
            self.assertAllClose(u_init_val, sess.run(u))
            self.assertEqual(s_init_val, sess.run(s))
예제 #19
0
    def on_run_end(self, request):
        """Overrides on-run-end callback.

    Actions taken:
      1) Load the debug dump.
      2) Bring up the Analyzer CLI.

    Args:
      request: An instance of OnSessionInitRequest.

    Returns:
      An instance of OnSessionInitResponse.
    """

        self._is_run_start = False
        if request.performed_action == framework.OnRunStartAction.DEBUG_RUN:
            partition_graphs = None
            if request.run_metadata and request.run_metadata.partition_graphs:
                partition_graphs = request.run_metadata.partition_graphs
            elif request.client_graph_def:
                partition_graphs = [request.client_graph_def]

            debug_dump = debug_data.DebugDumpDir(
                self._dump_root, partition_graphs=partition_graphs)
            debug_dump.set_python_graph(self._sess.graph)

            passed_filter = None
            if self._active_tensor_filter:
                if not debug_dump.find(
                        self._tensor_filters[self._active_tensor_filter],
                        first_n=1):
                    # No dumped tensor passes the filter in this run. Clean up the dump
                    # directory and move on.
                    self._remove_dump_root()
                    return framework.OnRunEndResponse()
                else:
                    # Some dumped tensor(s) from this run passed the filter.
                    passed_filter = self._active_tensor_filter
                    self._active_tensor_filter = None

            self._prep_cli_for_run_end(debug_dump, request.tf_error,
                                       passed_filter)

            self._run_start_response = self._launch_cli()

            # Clean up the dump generated by this run.
            self._remove_dump_root()
        else:
            # No debug information to show following a non-debug run() call.
            self._run_start_response = None

        # Return placeholder response that currently holds no additional
        # information.
        return framework.OnRunEndResponse()
예제 #20
0
    def _generate_dump_from_simple_addition_graph(self):
        with session.Session() as sess:
            u_init_val = np.array([[5.0, 3.0], [-1.0, 0.0]])
            v_init_val = np.array([[2.0], [-1.0]])

            # Use node names with overlapping namespace (i.e., parent directory) to
            # test concurrent, non-racing directory creation.
            u_name = "u"
            v_name = "v"
            w_name = "w"

            u_init = constant_op.constant(u_init_val, shape=[2, 2])
            u = variables.Variable(u_init, name=u_name)
            v_init = constant_op.constant(v_init_val, shape=[2, 1])
            v = variables.Variable(v_init, name=v_name)

            w = math_ops.matmul(u, v, name=w_name)

            u.initializer.run()
            v.initializer.run()

            run_options = config_pb2.RunOptions(output_partition_graphs=True)
            debug_urls = "file://%s" % self._dump_root

            # Add debug tensor watch for u.
            debug_utils.add_debug_tensor_watch(run_options,
                                               "%s/read" % u_name,
                                               0,
                                               debug_urls=debug_urls)
            # Add debug tensor watch for v.
            debug_utils.add_debug_tensor_watch(run_options,
                                               "%s/read" % v_name,
                                               0,
                                               debug_urls=debug_urls)

            run_metadata = config_pb2.RunMetadata()

            # Invoke Session.run().
            sess.run(w, options=run_options, run_metadata=run_metadata)

            self.assertEqual(self._expected_partition_graph_count,
                             len(run_metadata.partition_graphs))

            dump = debug_data.DebugDumpDir(
                self._dump_root,
                partition_graphs=run_metadata.partition_graphs)

        simple_add_results = collections.namedtuple("SimpleAddResults", [
            "u_init_val", "v_init_val", "u", "v", "w", "u_name", "v_name",
            "w_name", "dump"
        ])
        return simple_add_results(u_init_val, v_init_val, u, v, w, u_name,
                                  v_name, w_name, dump)
예제 #21
0
    def testWatchingVariableUpdateOpsSeesUpdatedValues(self):
        """Watch output slots on Variable-updating ops, with no emitted edges."""

        with session.Session() as sess:
            u_init = constant_op.constant(10.0)
            u = variables.Variable(u_init, name="gdo/u")
            v_init = constant_op.constant(20.0)
            v = variables.Variable(v_init, name="gdo/v")

            w = math_ops.multiply(u, v, name="gdo/w")
            # gdo stands for GradientDescentOptimizer.

            train_op = gradient_descent.GradientDescentOptimizer(
                learning_rate=0.1).minimize(w, name="gdo/train")

            u.initializer.run()
            v.initializer.run()

            run_options = config_pb2.RunOptions(output_partition_graphs=True)
            debug_utils.watch_graph(run_options,
                                    sess.graph,
                                    debug_ops=["DebugIdentity"],
                                    debug_urls=self._debug_urls())

            run_metadata = config_pb2.RunMetadata()
            sess.run(train_op, options=run_options, run_metadata=run_metadata)

            dump = debug_data.DebugDumpDir(
                self._dump_root,
                partition_graphs=run_metadata.partition_graphs)

            update_u_data = dump.watch_key_to_data(
                "gdo/train/update_gdo/u/ApplyGradientDescent:0:DebugIdentity")
            self.assertEqual(1, len(update_u_data))

            # Gradient descent on u: w = u * v, so dw / du = v.
            # Updated value of u should be:
            #   10.0 - learning_rate * v = 10.0 - 0.1 * 20.0 = 8.0
            self.assertAllClose(8.0, update_u_data[0].get_tensor())

            update_v_data = dump.watch_key_to_data(
                "gdo/train/update_gdo/v/ApplyGradientDescent:0:DebugIdentity")
            self.assertEqual(1, len(update_v_data))

            # Gradient descent on u: w = u * v, so dw / dv = u.
            # Updated value of u should be:
            #   20.0 - learning_rate * u = 20.0 - 0.1 * 10.0 = 19.0
            self.assertAllClose(19.0, update_v_data[0].get_tensor())

            # Verify that the Variables u and v are updated properly.
            self.assertAllClose(8.0, sess.run(u))
            self.assertAllClose(19.0, sess.run(v))
예제 #22
0
 def testDumpingOnASingleRunWorksWithRelativePathForDebugDumpDir(self):
   sess = dumping_wrapper.DumpingDebugWrapperSession(
       self.sess, session_root=self.session_root, log_usage=False)
   sess.run(self.inc_v)
   dump_dirs = glob.glob(os.path.join(self.session_root, "run_*"))
   cwd = os.getcwd()
   try:
     os.chdir(self.session_root)
     dump = debug_data.DebugDumpDir(
         os.path.relpath(dump_dirs[0], self.session_root))
     self.assertAllClose([10.0], dump.get_tensors("v", 0, "DebugIdentity"))
   finally:
     os.chdir(cwd)
예제 #23
0
    def testGradientsValuesFromDumpWorks(self):
        y = math_ops.add(self.w, -1.0, name="y")
        z = math_ops.square(y, name="z")

        grad_debugger = debug_gradients.GradientsDebugger()
        with grad_debugger.watch_gradients_by_tensors(self.sess.graph,
                                                      [self.w, self.u, y]):
            train_op = gradient_descent.GradientDescentOptimizer(0.1).minimize(
                z)

        self.sess.run(variables.global_variables_initializer())

        run_options = config_pb2.RunOptions(output_partition_graphs=True)
        dump_dir = tempfile.mkdtemp()
        debug_url = "file://" + dump_dir
        debug_utils.watch_graph(run_options,
                                self.sess.graph,
                                debug_urls=debug_url)
        run_metadata = config_pb2.RunMetadata()
        self.assertAllClose(2.0, self.sess.run(self.u))
        self.sess.run(train_op, options=run_options, run_metadata=run_metadata)
        self.assertAllClose(-1.0, self.sess.run(self.u))

        dump = debug_data.DebugDumpDir(
            dump_dir, partition_graphs=run_metadata.partition_graphs)
        dump.set_python_graph(self.sess.graph)

        y_grad_values = debug_gradients.gradient_values_from_dump(
            grad_debugger, y, dump)
        self.assertEqual(1, len(y_grad_values))
        self.assertAllClose(10.0, y_grad_values[0])

        w_grad_values = debug_gradients.gradient_values_from_dump(
            grad_debugger, self.w, dump)
        self.assertEqual(1, len(w_grad_values))
        self.assertAllClose(10.0, w_grad_values[0])

        u_grad_values = debug_gradients.gradient_values_from_dump(
            grad_debugger, self.u, dump)
        self.assertEqual(1, len(u_grad_values))
        self.assertAllClose(30.0, u_grad_values[0])

        with self.assertRaisesRegexp(
                LookupError,
                r"This GradientsDebugger has not received any gradient tensor for "
                r"x-tensor v:0"):
            debug_gradients.gradient_values_from_dump(grad_debugger, self.v,
                                                      dump)

        # Cleanup.
        shutil.rmtree(dump_dir)
예제 #24
0
  def testDumpingDebugHookWithoutWatchFnWorks(self):
    dumping_hook = hooks.DumpingDebugHook(self.session_root, log_usage=False)
    mon_sess = monitored_session._HookedSession(self.sess, [dumping_hook])
    mon_sess.run(self.inc_v)

    dump_dirs = glob.glob(os.path.join(self.session_root, "run_*"))
    self.assertEqual(1, len(dump_dirs))

    self._assert_correct_run_subdir_naming(os.path.basename(dump_dirs[0]))
    dump = debug_data.DebugDumpDir(dump_dirs[0])
    self.assertAllClose([10.0], dump.get_tensors("v", 0, "DebugIdentity"))

    self.assertEqual(repr(self.inc_v), dump.run_fetches_info)
    self.assertEqual(repr(None), dump.run_feed_keys_info)
예제 #25
0
    def testLookUpNodePythonTracebackWorks(self):
        with session.Session() as sess:
            u_init = constant_op.constant(10.0)
            u = variables.Variable(u_init, name="traceback/u")
            v_init = constant_op.constant(20.0)
            v = variables.Variable(v_init, name="traceback/v")

            w = math_ops.multiply(u, v, name="traceback/w")

            sess.run(variables.global_variables_initializer())

            run_metadata = config_pb2.RunMetadata()
            run_options = config_pb2.RunOptions(output_partition_graphs=True)
            debug_utils.watch_graph(run_options,
                                    sess.graph,
                                    debug_urls=self._debug_urls())

            sess.run(w, options=run_options, run_metadata=run_metadata)
            dump = debug_data.DebugDumpDir(
                self._dump_root,
                partition_graphs=run_metadata.partition_graphs)

            # Prior to setting the Python graph, attempts to do traceback lookup
            # should lead to exceptions.
            with self.assertRaisesRegexp(
                    LookupError,
                    "Python graph is not available for traceback lookup"):
                dump.node_traceback("traceback/w")

            dump.set_python_graph(sess.graph)

            # After setting the Python graph, attempts to look up nonexistent nodes
            # should lead to exceptions.
            with self.assertRaisesRegexp(
                    KeyError, r"Cannot find node \"foo\" in Python graph"):
                dump.node_traceback("foo")

            # Lookup should work with node name input.
            traceback = dump.node_traceback("traceback/w")
            self.assertIsInstance(traceback, list)
            self.assertGreater(len(traceback), 0)
            for trace in traceback:
                self.assertIsInstance(trace, tuple)

            # Lookup should also work with tensor name input.
            traceback = dump.node_traceback("traceback/w:0")
            self.assertIsInstance(traceback, list)
            self.assertGreater(len(traceback), 0)
            for trace in traceback:
                self.assertIsInstance(trace, tuple)
예제 #26
0
    def testDumpingOnASingleRunWorks(self):
        sess = dumping_wrapper.DumpingDebugWrapperSession(
            self.sess, session_root=self.session_root, log_usage=False)
        sess.run(self.inc_v)

        dump_dirs = glob.glob(os.path.join(self.session_root, "run_*"))
        self.assertEqual(1, len(dump_dirs))

        self._assert_correct_run_subdir_naming(os.path.basename(dump_dirs[0]))
        dump = debug_data.DebugDumpDir(dump_dirs[0])
        self.assertAllClose([10.0], dump.get_tensors("v", 0, "DebugIdentity"))

        self.assertEqual(repr(self.inc_v), dump.run_fetches_info)
        self.assertEqual(repr(None), dump.run_feed_keys_info)
    def testMultiGPUSessionRun(self):
        local_devices = device_lib.list_local_devices()
        gpu_device_names = []
        for device in local_devices:
            if device.device_type == "GPU":
                gpu_device_names.append(device.name)
        gpu_device_names = sorted(gpu_device_names)

        if len(gpu_device_names) < 2:
            self.skipTest(
                "This test requires at least 2 GPUs, but only %d is available."
                % len(gpu_device_names))

        with session.Session() as sess:
            v = variables.Variable([10.0, 15.0],
                                   dtype=dtypes.float32,
                                   name="v")
            with ops.device(gpu_device_names[0]):
                u0 = math_ops.add(v, v, name="u0")
            with ops.device(gpu_device_names[1]):
                u1 = math_ops.multiply(v, v, name="u1")
            w = math_ops.subtract(u1, u0, name="w")

            sess.run(v.initializer)

            run_options = config_pb2.RunOptions(output_partition_graphs=True)
            debug_utils.watch_graph(run_options,
                                    sess.graph,
                                    debug_urls="file://" + self._dump_root)
            run_metadata = config_pb2.RunMetadata()
            self.assertAllClose([80.0, 195.0],
                                sess.run(w,
                                         options=run_options,
                                         run_metadata=run_metadata))

            debug_dump_dir = debug_data.DebugDumpDir(
                self._dump_root,
                partition_graphs=run_metadata.partition_graphs)
            self.assertEqual(3, len(debug_dump_dir.devices()))
            self.assertAllClose([10.0, 15.0],
                                debug_dump_dir.get_tensors(
                                    "v", 0, "DebugIdentity")[0])
            self.assertAllClose([20.0, 30.0],
                                debug_dump_dir.get_tensors(
                                    "u0", 0, "DebugIdentity")[0])
            self.assertAllClose([100.0, 225.0],
                                debug_dump_dir.get_tensors(
                                    "u1", 0, "DebugIdentity")[0])
    def _compareOriginalAndReconstructedGraphDefs(self,
                                                  sess,
                                                  fetches,
                                                  feed_dict=None,
                                                  expected_output=None):
        run_options = config_pb2.RunOptions(output_partition_graphs=True)
        run_metadata = config_pb2.RunMetadata()
        output = sess.run(fetches,
                          feed_dict=feed_dict,
                          options=run_options,
                          run_metadata=run_metadata)
        if expected_output is not None:
            self.assertAllClose(expected_output, output)
        non_debug_graph_defs = run_metadata.partition_graphs

        debug_utils.watch_graph(run_options,
                                sess.graph,
                                debug_urls=self._debug_url)
        run_metadata = config_pb2.RunMetadata()
        output = sess.run(fetches,
                          feed_dict=feed_dict,
                          options=run_options,
                          run_metadata=run_metadata)
        if expected_output is not None:
            self.assertAllClose(expected_output, output)

        dump = debug_data.DebugDumpDir(
            self._dump_dir,
            partition_graphs=run_metadata.partition_graphs,
            validate=True)
        reconstructed = dump.reconstructed_non_debug_partition_graphs()

        self.assertEqual(len(non_debug_graph_defs), len(reconstructed))
        for i, non_debug_graph_def in enumerate(non_debug_graph_defs):
            device_name = debug_graphs._infer_device_name(non_debug_graph_def)
            test_util.assert_equal_graph_def(
                self._graphDefWithoutBlacklistedNodes(
                    reconstructed[device_name]),
                self._graphDefWithoutBlacklistedNodes(non_debug_graph_def))

            # Test debug_graphs.reconstruct_non_debug_graph_def.
            reconstructed_again = (
                debug_graphs.reconstruct_non_debug_graph_def(
                    run_metadata.partition_graphs[i]))
            test_util.assert_equal_graph_def(
                self._graphDefWithoutBlacklistedNodes(reconstructed_again),
                self._graphDefWithoutBlacklistedNodes(non_debug_graph_def))
예제 #29
0
    def createAndRunGraphHelper(self):
        """Create and run a TensorFlow Graph to generate debug dumps.

    This is intentionally done in separate method, to make it easier to test
    the stack-top mode of source annotation.
    """

        self.dump_root = self.get_temp_dir()
        self.curr_file_path = os.path.abspath(
            tf_inspect.getfile(tf_inspect.currentframe()))

        # Run a simple TF graph to generate some debug dumps that can be used in
        # source annotation.
        with session.Session() as sess:
            self.u_init = constant_op.constant(np.array([[5.0, 3.0],
                                                         [-1.0, 0.0]]),
                                               shape=[2, 2],
                                               name="u_init")
            self.u_init_line_number = line_number_above()

            self.u = variables.Variable(self.u_init, name="u")
            self.u_line_number = line_number_above()

            self.v_init = constant_op.constant(np.array([[2.0], [-1.0]]),
                                               shape=[2, 1],
                                               name="v_init")
            self.v_init_line_number = line_number_above()

            self.v = variables.Variable(self.v_init, name="v")
            self.v_line_number = line_number_above()

            self.w = math_ops.matmul(self.u, self.v, name="w")
            self.w_line_number = line_number_above()

            self.evaluate(self.u.initializer)
            self.evaluate(self.v.initializer)

            run_options = config_pb2.RunOptions(output_partition_graphs=True)
            debug_utils.watch_graph(run_options,
                                    sess.graph,
                                    debug_urls=["file://%s" % self.dump_root])
            run_metadata = config_pb2.RunMetadata()
            sess.run(self.w, options=run_options, run_metadata=run_metadata)

            self.dump = debug_data.DebugDumpDir(
                self.dump_root, partition_graphs=run_metadata.partition_graphs)
            self.dump.set_python_graph(sess.graph)
예제 #30
0
    def testDumpingOnMultipleRunsWorks(self):
        sess = dumping_wrapper.DumpingDebugWrapperSession(
            self.sess, session_root=self.session_root, log_usage=False)
        for _ in range(3):
            sess.run(self.inc_v)

        dump_dirs = glob.glob(os.path.join(self.session_root, "run_*"))
        dump_dirs = sorted(
            dump_dirs, key=lambda x: int(os.path.basename(x).split("_")[1]))
        self.assertEqual(3, len(dump_dirs))
        for i, dump_dir in enumerate(dump_dirs):
            self._assert_correct_run_subdir_naming(os.path.basename(dump_dir))
            dump = debug_data.DebugDumpDir(dump_dir)
            self.assertAllClose([10.0 + 1.0 * i],
                                dump.get_tensors("v", 0, "DebugIdentity"))
            self.assertEqual(repr(self.inc_v), dump.run_fetches_info)
            self.assertEqual(repr(None), dump.run_feed_keys_info)