def testArithmeticOptimizationActive(self):
        """Tests that tfdbg can dump the tensor from nodes created by Grappler."""
        with session.Session(
                config=_grappler_enabled_session_config()) as sess:
            u = variables.VariableV1([[1, 2], [3, 4]],
                                     name="u",
                                     dtype=dtypes.float32)
            # The next two ops should be optimized by Grappler into a single op:
            # either an AddN op or a Mul op.
            x = math_ops.add(u, u)
            x = math_ops.add(x, u)
            y = math_ops.multiply(x, u)

            sess.run(variables.global_variables_initializer())

            run_options = config_pb2.RunOptions(output_partition_graphs=True)
            debug_utils.watch_graph(run_options,
                                    sess.graph,
                                    debug_ops=["DebugIdentity"],
                                    debug_urls=[self._debug_url])

            run_metadata = config_pb2.RunMetadata()
            run_result = sess.run(y,
                                  options=run_options,
                                  run_metadata=run_metadata)
            self.assertAllClose(run_result, [[3, 12], [27, 48]])

            dump_data = debug_data.DebugDumpDir(
                self._dump_root,
                partition_graphs=run_metadata.partition_graphs,
                validate=True)

            original_node_names = set(op.name
                                      for op in sess.graph.get_operations())
            dumped_node_names = set(dump_data.nodes())
            grappler_created_node_names = dumped_node_names - original_node_names
            grappler_removed_node_names = original_node_names - dumped_node_names

            # Assert that Grappler should have replaced some of the nodes from the
            # original graph with new nodes.
            self.assertTrue(grappler_created_node_names)
            self.assertTrue(grappler_removed_node_names)

            # Iterate through the nodes created by Grappler. One of them should be
            # be the result of replacing the original add ops with an AddN op or a
            # Mul op.
            found_optimized_node = False
            for grappler_node_name in grappler_created_node_names:
                node_op_type = dump_data.node_op_type(grappler_node_name)
                # Look for the node created by Grappler's arithmetic optimization.
                if ((test_util.IsMklEnabled()
                     and node_op_type in ("_MklAddN", "Mul"))
                        or (node_op_type in ("AddN", "Mul"))):
                    datum = dump_data.get_tensors(grappler_node_name, 0,
                                                  "DebugIdentity")
                    self.assertEqual(1, len(datum))
                    self.assertAllClose(datum[0], [[3, 6], [9, 12]])
                    found_optimized_node = True
                    break
            self.assertTrue(
                found_optimized_node,
                "Failed to find optimized node created by Grappler's arithmetic "
                "optimization.")
    def testDumpToFileOverlappingParentDir(self):
        with session.Session() as sess:
            u_init_val = np.array([[5.0, 3.0], [-1.0, 0.0]])
            v_init_val = np.array([[2.0], [-1.0]])

            # Use node names with overlapping namespace (i.e., parent directory) to
            # test concurrent, non-racing directory creation.
            u_name = "testDumpToFile/u"
            v_name = "testDumpToFile/v"

            u_init = constant_op.constant(u_init_val, shape=[2, 2])
            u = variables.Variable(u_init, name=u_name)
            v_init = constant_op.constant(v_init_val, shape=[2, 1])
            v = variables.Variable(v_init, name=v_name)

            w = math_ops.matmul(u, v, name="testDumpToFile/matmul")

            u.initializer.run()
            v.initializer.run()

            run_options = config_pb2.RunOptions(output_partition_graphs=True)
            debug_urls = "file://%s" % self._dump_root

            # Add debug tensor watch for u.
            debug_utils.add_debug_tensor_watch(run_options,
                                               "%s/read" % u_name,
                                               0,
                                               debug_urls=debug_urls)
            # Add debug tensor watch for v.
            debug_utils.add_debug_tensor_watch(run_options,
                                               "%s/read" % v_name,
                                               0,
                                               debug_urls=debug_urls)

            run_metadata = config_pb2.RunMetadata()

            # Invoke Session.run().
            sess.run(w, options=run_options, run_metadata=run_metadata)

            self.assertEqual(self._expected_partition_graph_count,
                             len(run_metadata.partition_graphs))

            dump = debug_data.DebugDumpDir(
                self._dump_root,
                partition_graphs=run_metadata.partition_graphs)
            self.assertTrue(dump.loaded_partition_graphs())

            # Verify the dumped tensor values for u and v.
            self.assertEqual(2, dump.size)

            self.assertAllClose([u_init_val],
                                dump.get_tensors("%s/read" % u_name, 0,
                                                 "DebugIdentity"))
            self.assertAllClose([v_init_val],
                                dump.get_tensors("%s/read" % v_name, 0,
                                                 "DebugIdentity"))

            self.assertGreaterEqual(
                dump.get_rel_timestamps("%s/read" % u_name, 0,
                                        "DebugIdentity")[0], 0)
            self.assertGreaterEqual(
                dump.get_rel_timestamps("%s/read" % v_name, 0,
                                        "DebugIdentity")[0], 0)
Example #3
0
    def testMinOption(self):
        ops.reset_default_graph()

        def check_min(nodes, mm=0, mam=0, mcm=0, mb=0, mpb=0, mrb=0, mob=0):
            for n in nodes:
                if mm > 0:
                    self.assertGreaterEqual(n.exec_micros, mm)
                if mam > 0:
                    self.assertGreaterEqual(n.accelerator_exec_micros, mam)
                if mcm > 0:
                    self.assertGreaterEqual(n.cpu_exec_micros, mcm)
                if mb > 0:
                    self.assertGreaterEqual(n.requested_bytes, mb)
                if mpb > 0:
                    self.assertGreaterEqual(n.peak_bytes, mpb)
                if mrb > 0:
                    self.assertGreaterEqual(n.residual_bytes, mrb)
                if mob > 0:
                    self.assertGreaterEqual(n.output_bytes, mob)
                check_min(n.children, mm, mam, mcm, mb, mpb, mrb, mob)

        with session.Session() as sess:
            x = lib.BuildSmallModel()
            sess.run(variables.global_variables_initializer())
            run_meta = config_pb2.RunMetadata()
            _ = sess.run(x,
                         options=config_pb2.RunOptions(
                             trace_level=config_pb2.RunOptions.FULL_TRACE),
                         run_metadata=run_meta)

            min_val = random.randint(0, 10000)

            opts = builder(builder.time_and_memory(
                min_micros=min_val)).with_empty_output().build()
            tfprof_node = model_analyzer.profile(sess.graph,
                                                 run_meta=run_meta,
                                                 options=opts)
            check_min(tfprof_node.children, mm=min_val)

            opts = builder(
                builder.time_and_memory(min_accelerator_micros=min_val)
            ).with_empty_output().build()
            tfprof_node = model_analyzer.profile(sess.graph,
                                                 run_meta=run_meta,
                                                 options=opts)
            check_min(tfprof_node.children, mam=min_val)

            opts = builder(builder.time_and_memory(
                min_cpu_micros=min_val)).with_empty_output().build()
            tfprof_node = model_analyzer.profile(sess.graph,
                                                 run_meta=run_meta,
                                                 options=opts)
            check_min(tfprof_node.children, mcm=min_val)

            opts = builder(builder.time_and_memory(
                min_bytes=min_val)).with_empty_output().build()
            tfprof_node = model_analyzer.profile(sess.graph,
                                                 run_meta=run_meta,
                                                 options=opts)
            check_min(tfprof_node.children, mb=min_val)

            opts = builder(builder.time_and_memory(
                min_peak_bytes=min_val)).with_empty_output().build()
            tfprof_node = model_analyzer.profile(sess.graph,
                                                 run_meta=run_meta,
                                                 options=opts)
            check_min(tfprof_node.children, mpb=min_val)

            opts = builder(builder.time_and_memory(
                min_residual_bytes=min_val)).with_empty_output().build()
            tfprof_node = model_analyzer.profile(sess.graph,
                                                 run_meta=run_meta,
                                                 options=opts)
            check_min(tfprof_node.children, mrb=min_val)

            opts = builder(builder.time_and_memory(
                min_output_bytes=min_val)).with_empty_output().build()
            tfprof_node = model_analyzer.profile(sess.graph,
                                                 run_meta=run_meta,
                                                 options=opts)
            check_min(tfprof_node.children, mob=min_val)
    def testFindNodesWithBadTensorValues(self):
        with session.Session() as sess:
            u_name = "testFindNodesWithBadTensorValues/u"
            v_name = "testFindNodesWithBadTensorValues/v"
            w_name = "testFindNodesWithBadTensorValues/w"
            x_name = "testFindNodesWithBadTensorValues/x"
            y_name = "testFindNodesWithBadTensorValues/y"
            z_name = "testFindNodesWithBadTensorValues/z"

            u_init = constant_op.constant([2.0, 4.0])
            u = variables.Variable(u_init, name=u_name)
            v_init = constant_op.constant([2.0, 1.0])
            v = variables.Variable(v_init, name=v_name)

            # Expected output: [0.0, 3.0]
            w = math_ops.sub(u, v, name=w_name)

            # Expected output: [inf, 1.3333]
            x = math_ops.div(u, w, name=x_name)

            # Expected output: [nan, 4.0]
            y = math_ops.mul(w, x, name=y_name)

            z = math_ops.mul(y, y, name=z_name)

            u.initializer.run()
            v.initializer.run()

            run_options = config_pb2.RunOptions(output_partition_graphs=True)
            debug_utils.watch_graph(run_options,
                                    sess.graph,
                                    debug_ops=["DebugIdentity"],
                                    debug_urls=self._debug_urls())

            run_metadata = config_pb2.RunMetadata()
            sess.run(z, options=run_options, run_metadata=run_metadata)

            self.assertEqual(self._expected_partition_graph_count,
                             len(run_metadata.partition_graphs))

            dump = debug_data.DebugDumpDir(
                self._dump_root,
                partition_graphs=run_metadata.partition_graphs)

            def has_bad_value(_, tensor):
                return np.any(np.isnan(tensor)) or np.any(np.isinf(tensor))

            # Find all "offending tensors".
            bad_data = dump.find(has_bad_value)

            # Verify that the nodes with bad values are caught through running find
            # on the debug dump.
            self.assertEqual(3, len(bad_data))
            self.assertEqual(x_name, bad_data[0].node_name)
            self.assertEqual(y_name, bad_data[1].node_name)
            self.assertEqual(z_name, bad_data[2].node_name)

            # Test first_n kwarg of find(): Find the first offending tensor.
            first_bad_datum = dump.find(has_bad_value, first_n=1)

            self.assertEqual(1, len(first_bad_datum))
            self.assertEqual(x_name, first_bad_datum[0].node_name)
    def testDumpCausalityCheck(self):
        with session.Session() as sess:
            u_name = "testDumpCausalityCheck/u"
            v_name = "testDumpCausalityCheck/v"
            w_name = "testDumpCausalityCheck/w"

            u_init = constant_op.constant([2.0, 4.0])
            u = variables.Variable(u_init, name=u_name)
            v = math_ops.add(u, u, name=v_name)
            w = math_ops.add(v, v, name=w_name)

            u.initializer.run()

            run_options = config_pb2.RunOptions(output_partition_graphs=True)
            debug_utils.watch_graph(run_options,
                                    sess.graph,
                                    debug_ops=["DebugIdentity"],
                                    debug_urls=self._debug_urls())

            run_metadata = config_pb2.RunMetadata()
            sess.run(w, options=run_options, run_metadata=run_metadata)

            self.assertEqual(self._expected_partition_graph_count,
                             len(run_metadata.partition_graphs))

            # First, loading the original dump without supplying the
            # partition_graphs should not cause a RuntimeError, validation occurs
            # only with partition_graphs loaded.
            debug_data.DebugDumpDir(self._dump_root)

            # Now, loading the original dump with partition graphs supplied should
            # succeed. The validation should pass quietly.
            dump = debug_data.DebugDumpDir(
                self._dump_root,
                partition_graphs=run_metadata.partition_graphs)

            # Get the dump file names and compute their timestamps.
            self.assertEqual(
                1, len(dump.get_tensor_file_paths(u_name, 0, "DebugIdentity")))
            u_file_path = dump.get_tensor_file_paths(u_name, 0,
                                                     "DebugIdentity")[0]

            self.assertEqual(
                1, len(dump.get_tensor_file_paths(v_name, 0, "DebugIdentity")))
            v_file_path = dump.get_tensor_file_paths(v_name, 0,
                                                     "DebugIdentity")[0]

            u_timestamp = int(u_file_path[u_file_path.rindex("_") + 1:])
            v_timestamp = int(v_file_path[v_file_path.rindex("_") + 1:])

            # Swap the time stamps
            new_u_file_path = u_file_path[:u_file_path.
                                          rindex("_")] + "_%d" % v_timestamp
            new_v_file_path = v_file_path[:v_file_path.
                                          rindex("_")] + "_%d" % u_timestamp

            os.rename(u_file_path, new_u_file_path)
            os.rename(v_file_path, new_v_file_path)

            # Load the dump directory again. Now a ValueError is expected to be
            # raised due to the timestamp swap.
            with self.assertRaisesRegexp(ValueError, "Causality violated"):
                dump = debug_data.DebugDumpDir(
                    self._dump_root,
                    partition_graphs=run_metadata.partition_graphs)

            # Loading the dump directory with kwarg "validate" set explicitly to
            # False should get rid of the error.
            dump = debug_data.DebugDumpDir(
                self._dump_root,
                partition_graphs=run_metadata.partition_graphs,
                validate=False)
Example #6
0
    def run_op_benchmark(self,
                         sess,
                         op_or_tensor,
                         feed_dict=None,
                         burn_iters=2,
                         min_iters=10,
                         store_trace=False,
                         name=None):
        """Run an op or tensor in the given session.  Report the results.

    Args:
      sess: `Session` object to use for timing.
      op_or_tensor: `Operation` or `Tensor` to benchmark.
      feed_dict: A `dict` of values to feed for each op iteration (see the
        `feed_dict` parameter of `Session.run`).
      burn_iters: Number of burn-in iterations to run.
      min_iters: Minimum number of iterations to use for timing.
      store_trace: Boolean, whether to run an extra untimed iteration and
        store the trace of iteration in the benchmark report.
        The trace will be stored as a string in Google Chrome trace format
        in the extras field "full_trace_chrome_format".
      name: (optional) Override the BenchmarkEntry name with `name`.
        Otherwise it is inferred from the top-level method name.
    """
        for _ in range(burn_iters):
            sess.run(op_or_tensor, feed_dict=feed_dict)

        deltas = [None] * min_iters

        for i in range(min_iters):
            start_time = time.time()
            sess.run(op_or_tensor, feed_dict=feed_dict)
            end_time = time.time()
            delta = end_time - start_time
            deltas[i] = delta

        extras = {}
        if store_trace:
            run_options = config_pb2.RunOptions(
                trace_level=config_pb2.RunOptions.FULL_TRACE)
            run_metadata = config_pb2.RunMetadata()
            sess.run(op_or_tensor,
                     feed_dict=feed_dict,
                     options=run_options,
                     run_metadata=run_metadata)
            tl = timeline.Timeline(run_metadata.step_stats)
            extras[
                "full_trace_chrome_format"] = tl.generate_chrome_trace_format(
                )

        def _median(x):
            if not x:
                return -1
            s = sorted(x)
            l = len(x)
            lm1 = l - 1
            return (s[l // 2] + s[lm1 // 2]) / 2.0

        median_delta = _median(deltas)

        self.report_benchmark(iters=min_iters,
                              wall_time=median_delta,
                              extras=extras,
                              name=name)
    def testDumpStringTensorsToFileSystem(self):
        with session.Session() as sess:
            str1_init_val = np.array(b"abc")
            str2_init_val = np.array(b"def")

            str1_init = constant_op.constant(str1_init_val)
            str2_init = constant_op.constant(str2_init_val)

            str1_name = "str1"
            str2_name = "str2"
            str1 = variables.Variable(str1_init, name=str1_name)
            str2 = variables.Variable(str2_init, name=str2_name)
            # Concatenate str1 and str2
            str_concat = math_ops.add(str1, str2, name="str_concat")

            str1.initializer.run()
            str2.initializer.run()

            run_options = config_pb2.RunOptions(output_partition_graphs=True)
            debug_urls = self._debug_urls()

            # Add debug tensor watch for u.
            debug_utils.add_debug_tensor_watch(run_options,
                                               "%s/read" % str1_name,
                                               0,
                                               debug_urls=debug_urls)
            # Add debug tensor watch for v.
            debug_utils.add_debug_tensor_watch(run_options,
                                               "%s/read" % str2_name,
                                               0,
                                               debug_urls=debug_urls)

            run_metadata = config_pb2.RunMetadata()
            sess.run(str_concat,
                     options=run_options,
                     run_metadata=run_metadata)

            # String ops are located on CPU.
            self.assertEqual(1, len(run_metadata.partition_graphs))

            dump = debug_data.DebugDumpDir(
                self._dump_root,
                partition_graphs=run_metadata.partition_graphs)

            self.assertIn(str1_name, dump.nodes())
            self.assertIn(str2_name, dump.nodes())

            self.assertEqual(2, dump.size)

            self.assertEqual([str1_init_val],
                             dump.get_tensors("%s/read" % str1_name, 0,
                                              "DebugIdentity"))
            self.assertEqual([str2_init_val],
                             dump.get_tensors("%s/read" % str2_name, 0,
                                              "DebugIdentity"))

            self.assertGreaterEqual(
                dump.get_rel_timestamps("%s/read" % str1_name, 0,
                                        "DebugIdentity")[0], 0)
            self.assertGreaterEqual(
                dump.get_rel_timestamps("%s/read" % str2_name, 0,
                                        "DebugIdentity")[0], 0)
Example #8
0
  def testAddingSummaryGraphAndRunMetadata(self):
    test_dir = self._CleanTestDir("basics")
    sw = writer.FileWriter(test_dir)

    sw.add_session_log(event_pb2.SessionLog(status=SessionLog.START), 1)
    sw.add_summary(
        summary_pb2.Summary(
            value=[summary_pb2.Summary.Value(
                tag="mee", simple_value=10.0)]),
        10)
    sw.add_summary(
        summary_pb2.Summary(
            value=[summary_pb2.Summary.Value(
                tag="boo", simple_value=20.0)]),
        20)
    with ops.Graph().as_default() as g:
      constant_op.constant([0], name="zero")
    sw.add_graph(g, global_step=30)

    run_metadata = config_pb2.RunMetadata()
    device_stats = run_metadata.step_stats.dev_stats.add()
    device_stats.device = "test"
    sw.add_run_metadata(run_metadata, "test run", global_step=40)
    sw.close()
    rr = self._EventsReader(test_dir)

    # The first event should list the file_version.
    ev = next(rr)
    self._assertRecent(ev.wall_time)
    self.assertEquals("brain.Event:2", ev.file_version)

    # The next event should be the START message.
    ev = next(rr)
    self._assertRecent(ev.wall_time)
    self.assertEquals(1, ev.step)
    self.assertEquals(SessionLog.START, ev.session_log.status)

    # The next event should have the value 'mee=10.0'.
    ev = next(rr)
    self._assertRecent(ev.wall_time)
    self.assertEquals(10, ev.step)
    self.assertProtoEquals("""
      value { tag: 'mee' simple_value: 10.0 }
      """, ev.summary)

    # The next event should have the value 'boo=20.0'.
    ev = next(rr)
    self._assertRecent(ev.wall_time)
    self.assertEquals(20, ev.step)
    self.assertProtoEquals("""
      value { tag: 'boo' simple_value: 20.0 }
      """, ev.summary)

    # The next event should have the graph_def.
    ev = next(rr)
    self._assertRecent(ev.wall_time)
    self.assertEquals(30, ev.step)
    ev_graph = graph_pb2.GraphDef()
    ev_graph.ParseFromString(ev.graph_def)
    self.assertProtoEquals(g.as_graph_def(add_shapes=True), ev_graph)

    # The next event should have metadata for the run.
    ev = next(rr)
    self._assertRecent(ev.wall_time)
    self.assertEquals(40, ev.step)
    self.assertEquals("test run", ev.tagged_run_metadata.tag)
    parsed_run_metadata = config_pb2.RunMetadata()
    parsed_run_metadata.ParseFromString(ev.tagged_run_metadata.run_metadata)
    self.assertProtoEquals(run_metadata, parsed_run_metadata)

    # We should be done.
    self.assertRaises(StopIteration, lambda: next(rr))
Example #9
0
    def test_matmul_biasadd_gelu_fusion(self, mode):
        """Test MatMul+BiasAdd+Gelu fusion."""
        self._maybe_skip(mode)
        is_bf16_supported = _pywrap_utils.IsBF16SupportedByOneDNNOnThisCPU()
        run_options = config_pb2.RunOptions(output_partition_graphs=True)
        metadata = config_pb2.RunMetadata()

        m, n, k = (3, 3, 4)  # Matrix dimensions
        for precision in ('float32', 'bfloat16'):
            for approximate in (False, True):
                # Gelu exact (approximate=False) is not supported with bfloat16
                # precision since no support for Erf with bfloat16 data type.
                # TODO(intel-tf): Enable gelu exact with bfloat16, when Erf op is
                # supported with bfloat16.
                if precision == 'bfloat16':
                    if not (approximate and is_bf16_supported):
                        continue

                # Create MatMul + BiasAdd + Gelu graph
                ops.reset_default_graph()
                x = _input([m, k])
                w = _weight([k, n])
                b = _bias([n])
                if precision == 'bfloat16':
                    x = math_ops.cast(x, dtypes.bfloat16)
                    w = math_ops.cast(w, dtypes.bfloat16)
                    b = math_ops.cast(b, dtypes.bfloat16)
                y = math_ops.matmul(x, w)
                z = nn.bias_add(y, b)
                out = nn.gelu(z, approximate=approximate)

                # Compute reference value.
                config = _get_config(remapping_on=False)
                with session.Session(config=config) as sess:
                    sess.run(variables.global_variables_initializer())
                    output_val_ref = sess.run(out,
                                              options=run_options,
                                              run_metadata=metadata)
                # Compute output with fusion.
                config = _get_config(remapping_on=True)
                with session.Session(config=config) as sess:
                    sess.run(variables.global_variables_initializer())
                    output_val = sess.run(out,
                                          options=run_options,
                                          run_metadata=metadata)
                    graph = metadata.partition_graphs[0]

                # Graph should contain fused op.
                found_fused_op = False
                gelu_type = b'GeluApproximate' if approximate else b'GeluExact'
                for node in graph.node:
                    if node.op in ('_MklNativeFusedMatMul', '_MklFusedMatMul'):
                        fused_ops = node.attr['fused_ops'].list.s
                        found_fused_op = len(fused_ops) == 2 and \
                            fused_ops[0] == b'BiasAdd' and fused_ops[1] == gelu_type
                        break
                self.assertTrue(found_fused_op)

                # Computed output value should be close to reference value.
                tol = 1e-5 if precision == 'float32' else 1e-2
                self.assertAllClose(output_val_ref,
                                    output_val,
                                    atol=tol,
                                    rtol=tol)
Example #10
0
    def run(self, fetches, feed_dict=None, options=None, run_metadata=None):
        """Wrapper around Session.run() that inserts tensor watch options.

    Args:
      fetches: Same as the fetches arg to regular Session.run()
      feed_dict: Same as the feed_dict arg to regular Session.run()
      options: Same as the options arg to regular Session.run()
      run_metadata: Same as the run_metadata to regular Session.run()

    Returns:
      Simply forwards the output of the wrapped Session.run() call.

    Raises:
      ValueError: On invalid OnRunStartAction value.
    """

        self._run_call_count += 1

        # Invoke on-run-start callback and obtain response.
        run_start_resp = self.on_run_start(
            OnRunStartRequest(fetches, feed_dict, options, run_metadata,
                              self._run_call_count))
        _check_type(run_start_resp, OnRunStartResponse)

        if run_start_resp.action == OnRunStartAction.DEBUG_RUN:
            # Decorate RunOption to fill in debugger tensor watch specifications.
            decorated_run_options = options or config_pb2.RunOptions()
            run_metadata = run_metadata or config_pb2.RunMetadata()

            self._decorate_run_options(decorated_run_options,
                                       run_start_resp.debug_urls)

            # Invoke the run() method of the wrapped Session.
            retvals = self._sess.run(fetches,
                                     feed_dict=feed_dict,
                                     options=decorated_run_options,
                                     run_metadata=run_metadata)

            # Prepare arg for the on-run-end callback.
            run_end_req = OnRunEndRequest(run_start_resp.action,
                                          run_metadata=run_metadata)
        elif run_start_resp.action == OnRunStartAction.NON_DEBUG_RUN:
            # Invoke run() method of the wrapped session.
            retvals = self._sess.run(fetches,
                                     feed_dict=feed_dict,
                                     options=options,
                                     run_metadata=run_metadata)

            # Prepare arg for the on-run-end callback.
            run_end_req = OnRunEndRequest(run_start_resp.action)
        elif run_start_resp.action == OnRunStartAction.INVOKE_STEPPER:
            # TODO(cais): Implement stepper loop.
            raise NotImplementedError(
                "OnRunStartAction INVOKE_STEPPER has not been implemented.")
        else:
            raise ValueError("Invalid OnRunStartAction value: %s" %
                             run_start_resp.action)

        # Invoke on-run-end callback and obtain response.
        run_end_resp = self.on_run_end(run_end_req)
        _check_type(run_end_resp, OnRunEndResponse)
        # Currently run_end_resp is only a placeholder. No action is taken on it.

        return retvals
Example #11
0
    def setUpClass(cls):
        cls._dump_root = tempfile.mkdtemp()

        cls._is_gpu_available = test.is_gpu_available()
        if cls._is_gpu_available:
            cls._main_device = "/job:localhost/replica:0/task:0/gpu:0"
        else:
            cls._main_device = "/job:localhost/replica:0/task:0/cpu:0"

        with session.Session() as sess:
            u_init_val = np.array([[5.0, 3.0], [-1.0, 0.0]])
            v_init_val = np.array([[2.0], [-1.0]])

            u_name = "simple_mul_add/u"
            v_name = "simple_mul_add/v"

            u_init = constant_op.constant(u_init_val, shape=[2, 2])
            u = variables.Variable(u_init, name=u_name)
            v_init = constant_op.constant(v_init_val, shape=[2, 1])
            v = variables.Variable(v_init, name=v_name)

            w = math_ops.matmul(u, v, name="simple_mul_add/matmul")

            x = math_ops.add(w, w, name="simple_mul_add/add")

            u.initializer.run()
            v.initializer.run()

            run_options = config_pb2.RunOptions(output_partition_graphs=True)
            debug_utils.watch_graph(run_options,
                                    sess.graph,
                                    debug_ops=["DebugIdentity"],
                                    debug_urls="file://%s" % cls._dump_root)

            # Invoke Session.run().
            run_metadata = config_pb2.RunMetadata()
            sess.run(x, options=run_options, run_metadata=run_metadata)

        cls._debug_dump = debug_data.DebugDumpDir(
            cls._dump_root, partition_graphs=run_metadata.partition_graphs)

        # Construct the analyzer.
        cls._analyzer = analyzer_cli.DebugAnalyzer(cls._debug_dump)

        # Construct the handler registry.
        cls._registry = debugger_cli_common.CommandHandlerRegistry()

        # Register command handlers.
        cls._registry.register_command_handler(
            "list_tensors",
            cls._analyzer.list_tensors,
            cls._analyzer.get_help("list_tensors"),
            prefix_aliases=["lt"])
        cls._registry.register_command_handler(
            "node_info",
            cls._analyzer.node_info,
            cls._analyzer.get_help("node_info"),
            prefix_aliases=["ni"])
        cls._registry.register_command_handler(
            "print_tensor",
            cls._analyzer.print_tensor,
            cls._analyzer.get_help("print_tensor"),
            prefix_aliases=["pt"])
    def testLazyCompilation(self):
        @function.Defun(compiled=True)
        def CompiledFunction(x):
            return math_ops.log(x)

        with session_lib.Session(config=NoRewriteSessionConfig()) as sess:
            x = array_ops.placeholder(dtypes.float32)
            y = CompiledFunction(x)

            # The very first run of the cluster is always compiled (non-lazily).
            run_metadata_for_first_run = config_pb2.RunMetadata()
            sess.run(y,
                     feed_dict={x: [2., 10., 19., 77., 100.]},
                     run_metadata=run_metadata_for_first_run,
                     options=config_pb2.RunOptions(
                         trace_level=config_pb2.RunOptions.FULL_TRACE))
            self.assertTrue(
                InLabels(RunMetadataLabels(run_metadata_for_first_run),
                         "_XlaCompile"))
            self.assertTrue(
                InLabels(RunMetadataLabels(run_metadata_for_first_run),
                         "_XlaRun"))

            run_metadata_before_warmup = config_pb2.RunMetadata()
            sess.run(y,
                     feed_dict={x: [2., 10.]},
                     run_metadata=run_metadata_before_warmup,
                     options=config_pb2.RunOptions(
                         trace_level=config_pb2.RunOptions.FULL_TRACE))
            self.assertTrue(
                InLabels(RunMetadataLabels(run_metadata_before_warmup),
                         "_XlaCompile"))
            self.assertFalse(
                InLabels(RunMetadataLabels(run_metadata_before_warmup),
                         "_XlaRun"))

            # We compile when we see the same shape a second time.

            run_metadata_after_warmup = config_pb2.RunMetadata()
            sess.run(y,
                     feed_dict={x: [2., 10.]},
                     run_metadata=run_metadata_after_warmup,
                     options=config_pb2.RunOptions(
                         trace_level=config_pb2.RunOptions.FULL_TRACE))
            self.assertTrue(
                InLabels(RunMetadataLabels(run_metadata_after_warmup),
                         "_XlaCompile"))
            self.assertTrue(
                InLabels(RunMetadataLabels(run_metadata_after_warmup),
                         "_XlaRun"))

            run_metadata_for_new_shape = config_pb2.RunMetadata()
            sess.run(y,
                     feed_dict={x: [2., 10., 12.]},
                     run_metadata=run_metadata_for_new_shape,
                     options=config_pb2.RunOptions(
                         trace_level=config_pb2.RunOptions.FULL_TRACE))
            self.assertTrue(
                InLabels(RunMetadataLabels(run_metadata_for_new_shape),
                         "_XlaCompile"))
            self.assertFalse(
                InLabels(RunMetadataLabels(run_metadata_for_new_shape),
                         "_XlaRun"))
Example #13
0
    def testDumpToFileWhileLoop(self):
        with session.Session() as sess:
            num_iter = 10

            # "u" is the Variable being updated in the loop.
            u_name = "testDumpToFileWhileLoop/u"
            u_namespace = u_name.split("/")[0]

            u_init_val = np.array(11.0)
            u_init = constant_op.constant(u_init_val)
            u = variables.Variable(u_init, name=u_name)

            # "v" is the increment.
            v_name = "testDumpToFileWhileLoop/v"
            v_namespace = v_name.split("/")[0]

            v_init_val = np.array(2.0)
            v_init = constant_op.constant(v_init_val)
            v = variables.Variable(v_init, name=v_name)

            u.initializer.run()
            v.initializer.run()

            i = constant_op.constant(0, name="testDumpToFileWhileLoop/i")

            def cond(i):
                return math_ops.less(i, num_iter)

            def body(i):
                new_u = state_ops.assign_add(u, v)
                new_i = math_ops.add(i, 1)
                op = control_flow_ops.group(new_u)
                new_i = control_flow_ops.with_dependencies([op], new_i)
                return [new_i]

            loop = control_flow_ops.while_loop(cond,
                                               body, [i],
                                               parallel_iterations=1)

            # Create RunOptions for debug-watching tensors
            run_options = config_pb2.RunOptions()
            debug_url = "file://%s" % self.dump_root_

            # Add debug tensor watch for u.
            self._addDebugTensorWatch(run_options,
                                      u_name,
                                      0,
                                      debug_urls=[debug_url])
            # Add debug tensor watch for v.
            self._addDebugTensorWatch(run_options,
                                      "%s/read" % v_name,
                                      0,
                                      debug_urls=[debug_url])
            # Add debug tensor watch for while/Identity.
            self._addDebugTensorWatch(run_options,
                                      "while/Identity",
                                      0,
                                      debug_urls=[debug_url])

            run_metadata = config_pb2.RunMetadata()

            r = sess.run(loop, options=run_options, run_metadata=run_metadata)

            self.assertEqual(num_iter, r)

            u_val_final = sess.run(u)
            self.assertAllClose(u_init_val + num_iter * v_init_val,
                                u_val_final)

            # Verify dump files
            self.assertTrue(os.path.isdir(self.dump_root_))

            self.assertTrue(
                os.path.isdir(os.path.join(self.dump_root_, u_namespace)))
            self.assertTrue(
                os.path.isdir(os.path.join(self.dump_root_, v_namespace, "v")))

            # Verify the dump file for tensor "u".
            dump_files = glob.glob(
                os.path.join(self.dump_root_, u_namespace, "u_0_*"))
            self.assertEqual(1, len(dump_files))
            dump_file = os.path.join(self.dump_root_, u_namespace,
                                     dump_files[0])
            self.assertTrue(os.path.isfile(dump_file))
            self._verifyTensorDumpFile(dump_file, "%s:0" % u_name,
                                       "DebugIdentity", 0, u_init_val)

            # Verify the dump file for tensor "v".
            dump_files = os.listdir(os.path.join(self.dump_root_, v_name))
            self.assertEqual(1, len(dump_files))
            self.assertTrue(dump_files[0].startswith("read_0_"))

            dump_file = os.path.join(self.dump_root_, v_name, dump_files[0])
            self._verifyTensorDumpFile(dump_file, "%s/read:0" % v_name,
                                       "DebugIdentity", 0, v_init_val)

            # Verify the dump files for tensor while/Identity
            while_identity_dump_files = sorted(
                os.listdir(os.path.join(self.dump_root_, "while")))
            self.assertEqual(num_iter, len(while_identity_dump_files))

            # Verify the content of the individual
            for k in xrange(len(while_identity_dump_files)):
                dump_file_path = os.path.join(self.dump_root_, "while",
                                              while_identity_dump_files[k])
                self._verifyTensorDumpFile(dump_file_path, "while/Identity:0",
                                           "DebugIdentity", 0, np.array(k))
Example #14
0
  def _run_with_debugging(self,
                          run_start_resp,
                          fetches,
                          feed_dict,
                          options,
                          run_metadata,
                          callable_runner,
                          callable_runner_args,
                          callable_options):
    """Perform a session.run() or callable with debugging."""
    # Decorate RunOption to fill in debugger tensor watch specifications.
    decorated_run_options = None
    if callable_options:
      callable_options_id = id(callable_options)
      if callable_options_id not in self._cached_callables_from_options:
        # Make a copy of callable_options to avoid mutating it.
        new_callable_options = config_pb2.CallableOptions()
        new_callable_options.CopyFrom(callable_options)
        decorated_run_options = new_callable_options.run_options
    else:
      decorated_run_options = options or config_pb2.RunOptions()

    run_metadata = run_metadata or config_pb2.RunMetadata()

    if decorated_run_options:
      self._decorate_run_options_for_debug(
          decorated_run_options,
          run_start_resp.debug_urls,
          debug_ops=run_start_resp.debug_ops,
          node_name_regex_allowlist=(run_start_resp.node_name_regex_allowlist),
          op_type_regex_allowlist=run_start_resp.op_type_regex_allowlist,
          tensor_dtype_regex_allowlist=(
              run_start_resp.tensor_dtype_regex_allowlist),
          tolerate_debug_op_creation_failures=(
              run_start_resp.tolerate_debug_op_creation_failures))

    # Invoke the run() method of the wrapped Session. Catch any TensorFlow
    # runtime errors.
    tf_error = None
    try:
      if callable_runner:
        retvals = callable_runner(*callable_runner_args,
                                  options=decorated_run_options,
                                  run_metadata=run_metadata)
      elif callable_options:
        # pylint:disable=protected-access
        if callable_options_id in self._cached_callables_from_options:
          callable_object = self._cached_callables_from_options[
              callable_options_id]
        else:
          callable_object = self._sess._make_callable_from_options(
              new_callable_options)
          self._cached_callables_from_options[
              callable_options_id] = callable_object
        # pylint:enable=protected-access
        retvals = callable_object(
            *callable_runner_args, run_metadata=run_metadata)
      else:
        retvals = self._sess.run(fetches,
                                 feed_dict=feed_dict,
                                 options=decorated_run_options,
                                 run_metadata=run_metadata)
    except errors.OpError as op_error:
      if self._pass_through_operrors:
        raise op_error
      tf_error = op_error
      retvals = op_error

    return retvals, OnRunEndRequest(
        run_start_resp.action,
        run_metadata=run_metadata,
        client_graph_def=self._sess.graph.as_graph_def(),
        tf_error=tf_error)
Example #15
0
    def testMultiStepProfile(self):
        ops.reset_default_graph()
        opts = builder.time_and_memory()

        with session.Session() as sess:
            r1, r2, r3 = lib.BuildSplitableModel()
            sess.run(variables.global_variables_initializer())

            profiler = model_analyzer.Profiler(sess.graph)
            pb0 = profiler.profile_name_scope(opts)

            run_meta = config_pb2.RunMetadata()
            _ = sess.run(r1,
                         options=config_pb2.RunOptions(
                             trace_level=config_pb2.RunOptions.FULL_TRACE),
                         run_metadata=run_meta)
            profiler.add_step(1, run_meta)
            pb1 = profiler.profile_name_scope(opts)

            self.assertNotEqual(lib.SearchTFProfNode(pb1, 'DW'), None)
            self.assertEqual(lib.SearchTFProfNode(pb1, 'DW2'), None)
            self.assertEqual(lib.SearchTFProfNode(pb1, 'add'), None)

            run_meta2 = config_pb2.RunMetadata()
            _ = sess.run(r2,
                         options=config_pb2.RunOptions(
                             trace_level=config_pb2.RunOptions.FULL_TRACE),
                         run_metadata=run_meta2)
            profiler.add_step(2, run_meta2)
            pb2 = profiler.profile_name_scope(opts)

            self.assertNotEqual(lib.SearchTFProfNode(pb2, 'DW'), None)
            self.assertNotEqual(lib.SearchTFProfNode(pb2, 'DW2'), None)
            self.assertEqual(lib.SearchTFProfNode(pb2, 'add'), None)

            run_meta3 = config_pb2.RunMetadata()
            _ = sess.run(r3,
                         options=config_pb2.RunOptions(
                             trace_level=config_pb2.RunOptions.FULL_TRACE),
                         run_metadata=run_meta3)
            profiler.add_step(3, run_meta3)
            pb3 = profiler.profile_name_scope(opts)

            self.assertNotEqual(lib.SearchTFProfNode(pb3, 'DW'), None)
            self.assertNotEqual(lib.SearchTFProfNode(pb3, 'DW2'), None)
            self.assertNotEqual(lib.SearchTFProfNode(pb3, 'add'), None)

            self.assertEqual(lib.SearchTFProfNode(pb0, 'Conv2D'), None)
            self.assertGreater(
                lib.SearchTFProfNode(pb1, 'Conv2D').exec_micros, 0)
            self.assertEqual(lib.SearchTFProfNode(pb1, 'Conv2D_1'), None)
            self.assertGreater(
                lib.SearchTFProfNode(pb2, 'Conv2D_1').exec_micros, 0)
            self.assertEqual(lib.SearchTFProfNode(pb2, 'add'), None)
            self.assertGreater(lib.SearchTFProfNode(pb3, 'add').exec_micros, 0)

            advice_pb = profiler.advise(model_analyzer.ALL_ADVICE)
            self.assertTrue(
                'AcceleratorUtilizationChecker' in advice_pb.checkers)
            self.assertTrue('ExpensiveOperationChecker' in advice_pb.checkers)
            self.assertTrue('OperationChecker' in advice_pb.checkers)

            checker = advice_pb.checkers['AcceleratorUtilizationChecker']
            if test.is_gpu_available():
                self.assertGreater(len(checker.reports), 0)
            else:
                self.assertEqual(len(checker.reports), 0)
            checker = advice_pb.checkers['ExpensiveOperationChecker']
            self.assertGreater(len(checker.reports), 0)
Example #16
0
def _profiled_run(self,
                  fetches,
                  feed_dict=None,
                  options=None,
                  run_metadata=None):
    """Overwrites the session.run()."""
    # pylint: disable=protected-access
    # Count the session steps.
    with self.profile_context._new_step() as state:
        step, locked = state
        # Fast path if no need for profiling.
        if locked and not self.profile_context._is_fast_path(step):
            # Maybe trace this step.
            if self.profile_context._should_trace(step, self.graph, fetches):
                if self.profile_context._debug:
                    sys.stderr.write('debug: tracing step: %d\n' % step)
                # Enable tracing, perform auto profiling or auto dump.
                if not run_metadata:
                    run_metadata = config_pb2.RunMetadata()

                if not options:
                    options = config_pb2.RunOptions(
                        trace_level=config_pb2.RunOptions.FULL_TRACE)
                    old_trace_level = options.trace_level
                else:
                    old_trace_level = options.trace_level
                    options.trace_level = config_pb2.RunOptions.FULL_TRACE

                ret = self._profiler_run_internal(fetches, feed_dict, options,
                                                  run_metadata)
                if self.profile_context._debug:
                    self.profile_context._dump_file(run_metadata,
                                                    'run_meta_%d' % step)

                self.profile_context.profiler._graph = self.graph
                self.profile_context.profiler.add_step(step, run_metadata)
                options.trace_level = old_trace_level
            else:
                ret = self._profiler_run_internal(fetches, feed_dict, options)

            # Maybe dump profile.
            self.profile_context._maybe_dump(step)

            # Maybe profile:
            to_profiles = self.profile_context._profile_candidates()
            for to_prof in to_profiles:
                cmd, opts, _ = to_prof
                saved_views = self.profile_context._views.setdefault(cmd, {})
                if self.profile_context._debug:
                    sys.stderr.write('debug: profiling %s step: %d\n' %
                                     (cmd, step))
                if cmd == 'graph':
                    saved_views[
                        step] = self.profile_context.profiler.profile_graph(
                            opts)
                elif cmd == 'scope':
                    saved_views[
                        step] = self.profile_context.profiler.profile_name_scope(
                            opts)
                elif cmd == 'op':
                    saved_views[
                        step] = self.profile_context.profiler.profile_operations(
                            opts)
                elif cmd == 'code':
                    saved_views[
                        step] = self.profile_context.profiler.profile_python(
                            opts)
                else:
                    raise ValueError('Unknown cmd: %s\n' % cmd)
            return ret
    # Fast no lock path.
    return self._profiler_run_internal(fetches, feed_dict, options,
                                       run_metadata)
Example #17
0
    def testProfileBasic(self):
        ops.reset_default_graph()
        outfile = os.path.join(test.get_temp_dir(), 'dump')
        opts = (builder(
            builder.trainable_variables_parameter()).with_file_output(
                outfile).with_accounted_types(['.*']).select([
                    'params', 'float_ops', 'micros', 'bytes', 'device',
                    'op_types', 'occurrence'
                ]).build())

        # Test the output without run_meta.
        sess = session.Session()
        r = lib.BuildFullModel()
        sess.run(variables.global_variables_initializer())

        profiler = model_analyzer.Profiler(sess.graph)
        profiler.profile_name_scope(opts)
        with gfile.Open(outfile, 'r') as f:
            profiler_str = f.read()

        model_analyzer.profile(sess.graph, cmd='scope', options=opts)
        with gfile.Open(outfile, 'r') as f:
            pma_str = f.read()
        self.assertEqual(pma_str, profiler_str)

        # Test the output with run_meta.
        run_meta = config_pb2.RunMetadata()
        _ = sess.run(r,
                     options=config_pb2.RunOptions(
                         trace_level=config_pb2.RunOptions.FULL_TRACE),
                     run_metadata=run_meta)

        profiler.add_step(1, run_meta)
        profiler.profile_graph(opts)
        with gfile.Open(outfile, 'r') as f:
            profiler_str = f.read()

        model_analyzer.profile(sess.graph,
                               cmd='graph',
                               run_meta=run_meta,
                               options=opts)
        with gfile.Open(outfile, 'r') as f:
            pma_str = f.read()
        self.assertEqual(pma_str, profiler_str)

        profiler.profile_python(opts)
        with gfile.Open(outfile, 'r') as f:
            profiler_str = f.read()

        model_analyzer.profile(sess.graph,
                               cmd='code',
                               run_meta=run_meta,
                               options=opts)
        with gfile.Open(outfile, 'r') as f:
            pma_str = f.read()
        self.assertEqual(pma_str, profiler_str)

        profiler.profile_operations(opts)
        with gfile.Open(outfile, 'r') as f:
            profiler_str = f.read()

        model_analyzer.profile(sess.graph,
                               cmd='op',
                               run_meta=run_meta,
                               options=opts)
        with gfile.Open(outfile, 'r') as f:
            pma_str = f.read()
        self.assertEqual(pma_str, profiler_str)

        model_analyzer.profile(sess.graph,
                               cmd='scope',
                               run_meta=run_meta,
                               options=opts)
        with gfile.Open(outfile, 'r') as f:
            pma_str = f.read()
        self.assertNotEqual(pma_str, profiler_str)

        opts2 = opts.copy()
        opts2['select'] = ['params', 'float_ops']
        profiler.profile_name_scope(opts2)
        with gfile.Open(outfile, 'r') as f:
            profiler_str = f.read()

        model_analyzer.profile(sess.graph,
                               cmd='scope',
                               run_meta=run_meta,
                               options=opts2)
        with gfile.Open(outfile, 'r') as f:
            pma_str = f.read()
        self.assertEqual(pma_str, profiler_str)
Example #18
0
    def run(self,
            fetches,
            feed_dict=None,
            options=None,
            run_metadata=None,
            callable_runner=None,
            callable_runner_args=None,
            callable_options=None):
        """Wrapper around Session.run() that inserts tensor watch options.

    Args:
      fetches: Same as the `fetches` arg to regular `Session.run()`.
      feed_dict: Same as the `feed_dict` arg to regular `Session.run()`.
      options: Same as the `options` arg to regular `Session.run()`.
      run_metadata: Same as the `run_metadata` arg to regular `Session.run()`.
      callable_runner: A `callable` returned by `Session.make_callable()`.
        If not `None`, `fetches` and `feed_dict` must both be `None`.
        Mutually exclusive with `callable_options`.
      callable_runner_args: An optional list of arguments to `callable_runner`
        or for `callable_options`.
      callable_options: An instance of `config_pb2.CallableOptions`, to be
        used with `Session._make_callable_from_options()`. Mutually exclusive
        with `callable_runner`.

    Returns:
      Simply forwards the output of the wrapped `Session.run()` call.

    Raises:
      ValueError: On invalid `OnRunStartAction` value. Or if `callable_runner`
        is not `None` and either or both of `fetches` and `feed_dict` is `None`.
    """
        if callable_runner and callable_options:
            raise ValueError(
                "callable_runner and callable_options are mutually exclusive, but "
                "are both specified in this call to BaseDebugWrapperSession.run()."
            )

        if callable_runner and (fetches or feed_dict):
            raise ValueError(
                "callable_runner and fetches/feed_dict are mutually exclusive, "
                "but are used simultaneously.")
        elif callable_options and (fetches or feed_dict):
            raise ValueError(
                "callable_options and fetches/feed_dict are mutually exclusive, "
                "but are used simultaneously.")

        self.increment_run_call_count()
        empty_fetches = not nest.flatten(fetches)
        if empty_fetches:
            tf_logging.info(
                "Due to empty fetches, tfdbg Session wrapper is letting a "
                "Session.run pass through without any debugging actions.")
        if self._is_disabled_thread() or empty_fetches:
            if callable_runner:
                return callable_runner(*callable_runner_args)
            elif callable_options:
                # pylint:disable=protected-access
                return self._sess._make_callable_from_options(
                    callable_options)(*callable_runner_args)
                # pylint:enable=protected-access
            else:
                return self._sess.run(fetches,
                                      feed_dict=feed_dict,
                                      options=options,
                                      run_metadata=run_metadata)

        # Invoke on-run-start callback and obtain response.
        run_start_resp = self.on_run_start(
            OnRunStartRequest(fetches,
                              feed_dict,
                              options,
                              run_metadata,
                              self._run_call_count,
                              is_callable_runner=bool(callable_runner)))
        _check_type(run_start_resp, OnRunStartResponse)

        if run_start_resp.action == OnRunStartAction.DEBUG_RUN:
            # Decorate RunOption to fill in debugger tensor watch specifications.
            decorated_run_options = None
            if callable_options:
                callable_options_id = id(callable_options)
                if callable_options_id not in self._cached_callables_from_options:
                    # Make a copy of callable_options to avoid mutating it.
                    new_callable_options = config_pb2.CallableOptions()
                    new_callable_options.CopyFrom(callable_options)
                    decorated_run_options = new_callable_options.run_options
            else:
                decorated_run_options = options or config_pb2.RunOptions()

            run_metadata = run_metadata or config_pb2.RunMetadata()

            if decorated_run_options:
                self._decorate_run_options_for_debug(
                    decorated_run_options,
                    run_start_resp.debug_urls,
                    debug_ops=run_start_resp.debug_ops,
                    node_name_regex_whitelist=run_start_resp.
                    node_name_regex_whitelist,
                    op_type_regex_whitelist=run_start_resp.
                    op_type_regex_whitelist,
                    tensor_dtype_regex_whitelist=(
                        run_start_resp.tensor_dtype_regex_whitelist),
                    tolerate_debug_op_creation_failures=(
                        run_start_resp.tolerate_debug_op_creation_failures))

            # Invoke the run() method of the wrapped Session. Catch any TensorFlow
            # runtime errors.
            tf_error = None
            try:
                if callable_runner:
                    retvals = callable_runner(*callable_runner_args,
                                              options=decorated_run_options,
                                              run_metadata=run_metadata)
                elif callable_options:
                    # pylint:disable=protected-access
                    if callable_options_id in self._cached_callables_from_options:
                        callable_object = self._cached_callables_from_options[
                            callable_options_id]
                    else:
                        callable_object = self._sess._make_callable_from_options(
                            new_callable_options)
                        self._cached_callables_from_options[
                            callable_options_id] = callable_object
                    # pylint:enable=protected-access
                    retvals = callable_object(*callable_runner_args,
                                              run_metadata=run_metadata)
                else:
                    retvals = self._sess.run(fetches,
                                             feed_dict=feed_dict,
                                             options=decorated_run_options,
                                             run_metadata=run_metadata)
            except errors.OpError as op_error:
                if self._pass_through_operrors:
                    raise op_error
                tf_error = op_error
                retvals = op_error

            run_end_req = OnRunEndRequest(
                run_start_resp.action,
                run_metadata=run_metadata,
                client_graph_def=self._sess.graph.as_graph_def(),
                tf_error=tf_error)

        elif run_start_resp.action == OnRunStartAction.PROFILE_RUN:
            decorated_run_options = options or config_pb2.RunOptions()
            run_metadata = run_metadata or config_pb2.RunMetadata()
            self._decorate_run_options_for_profile(decorated_run_options)
            if callable_runner:
                retvals = callable_runner(*callable_runner_args,
                                          options=decorated_run_options,
                                          run_metadata=run_metadata)
            else:
                retvals = self._sess.run(fetches,
                                         feed_dict=feed_dict,
                                         options=decorated_run_options,
                                         run_metadata=run_metadata)
            run_end_req = OnRunEndRequest(
                run_start_resp.action,
                run_metadata=run_metadata,
                client_graph_def=self._sess.graph.as_graph_def())
        elif (run_start_resp.action == OnRunStartAction.NON_DEBUG_RUN
              or run_start_resp.action == OnRunStartAction.INVOKE_STEPPER):
            if callable_runner:
                raise NotImplementedError(
                    "Stepper mode is not implemented for callables created by "
                    "Session.make_callable().")

            if run_start_resp.action == OnRunStartAction.INVOKE_STEPPER:
                with stepper.NodeStepper(self._sess, fetches,
                                         feed_dict) as node_stepper:
                    retvals = self.invoke_node_stepper(
                        node_stepper, restore_variable_values_on_exit=True)

            # Invoke run() method of the wrapped session.
            retvals = self._sess.run(fetches,
                                     feed_dict=feed_dict,
                                     options=options,
                                     run_metadata=run_metadata)

            # Prepare arg for the on-run-end callback.
            run_end_req = OnRunEndRequest(run_start_resp.action)
        else:
            raise ValueError("Invalid OnRunStartAction value: %s" %
                             run_start_resp.action)

        # Invoke on-run-end callback and obtain response.
        run_end_resp = self.on_run_end(run_end_req)
        _check_type(run_end_resp, OnRunEndResponse)
        # Currently run_end_resp is only a placeholder. No action is taken on it.

        return retvals
Example #19
0
  def testAllowsDifferentWatchesOnDifferentRuns(self):
    """Test watching different tensors on different runs of the same graph."""

    with session.Session(config=self._no_rewrite_session_config()) as sess:
      u_init_val = [[5.0, 3.0], [-1.0, 0.0]]
      v_init_val = [[2.0], [-1.0]]

      # Use node names with overlapping namespace (i.e., parent directory) to
      # test concurrent, non-racing directory creation.
      u_name = "diff_Watch/u"
      v_name = "diff_Watch/v"

      u_init = constant_op.constant(u_init_val, shape=[2, 2])
      u = variables.Variable(u_init, name=u_name)
      v_init = constant_op.constant(v_init_val, shape=[2, 1])
      v = variables.Variable(v_init, name=v_name)

      w = math_ops.matmul(u, v, name="diff_Watch/matmul")

      u.initializer.run()
      v.initializer.run()

      for i in range(2):
        run_options = config_pb2.RunOptions(output_partition_graphs=True)

        run_dump_root = self._debug_dump_dir(run_number=i)
        debug_urls = self._debug_urls(run_number=i)

        if i == 0:
          # First debug run: Add debug tensor watch for u.
          debug_utils.add_debug_tensor_watch(
              run_options, "%s/read" % u_name, 0, debug_urls=debug_urls)
        else:
          # Second debug run: Add debug tensor watch for v.
          debug_utils.add_debug_tensor_watch(
              run_options, "%s/read" % v_name, 0, debug_urls=debug_urls)

        run_metadata = config_pb2.RunMetadata()

        # Invoke Session.run().
        sess.run(w, options=run_options, run_metadata=run_metadata)

        self.assertEqual(self._expected_partition_graph_count,
                         len(run_metadata.partition_graphs))

        dump = debug_data.DebugDumpDir(
            run_dump_root, partition_graphs=run_metadata.partition_graphs)
        self.assertTrue(dump.loaded_partition_graphs())

        # Each run should have generated only one dumped tensor, not two.
        self.assertEqual(1, dump.size)

        if i == 0:
          self.assertAllClose([u_init_val],
                              dump.get_tensors("%s/read" % u_name, 0,
                                               "DebugIdentity"))
          self.assertGreaterEqual(
              dump.get_rel_timestamps("%s/read" % u_name, 0,
                                      "DebugIdentity")[0], 0)
        else:
          self.assertAllClose([v_init_val],
                              dump.get_tensors("%s/read" % v_name, 0,
                                               "DebugIdentity"))
          self.assertGreaterEqual(
              dump.get_rel_timestamps("%s/read" % v_name, 0,
                                      "DebugIdentity")[0], 0)
Example #20
0
    def run_op_benchmark(self,
                         sess,
                         op_or_tensor,
                         feed_dict=None,
                         burn_iters=2,
                         min_iters=10,
                         store_trace=False,
                         store_memory_usage=True,
                         name=None,
                         extras=None,
                         mbs=0):
        """Run an op or tensor in the given session.  Report the results.

    Args:
      sess: `Session` object to use for timing.
      op_or_tensor: `Operation` or `Tensor` to benchmark.
      feed_dict: A `dict` of values to feed for each op iteration (see the
        `feed_dict` parameter of `Session.run`).
      burn_iters: Number of burn-in iterations to run.
      min_iters: Minimum number of iterations to use for timing.
      store_trace: Boolean, whether to run an extra untimed iteration and
        store the trace of iteration in the benchmark report.
        The trace will be stored as a string in Google Chrome trace format
        in the extras field "full_trace_chrome_format".
      store_memory_usage: Boolean, whether to run an extra
        untimed iteration, calculate memory usage, and store that in extras
        fields.
      name: (optional) Override the BenchmarkEntry name with `name`.
        Otherwise it is inferred from the top-level method name.
      extras: (optional) Dict mapping string keys to additional benchmark info.
        Values may be either floats or values that are convertible to strings.
      mbs: (optional) The number of megabytes moved by this op, used to
        calculate the ops throughput.

    Returns:
      A `dict` containing the key-value pairs that were passed to
      `report_benchmark`.
    """
        store_memory_usage &= _benchmark_tests_can_log_memory()

        for _ in range(burn_iters):
            sess.run(op_or_tensor, feed_dict=feed_dict)

        deltas = [None] * min_iters

        for i in range(min_iters):
            start_time = time.time()
            sess.run(op_or_tensor, feed_dict=feed_dict)
            end_time = time.time()
            delta = end_time - start_time
            deltas[i] = delta

        extras = extras if extras is not None else {}
        if store_trace or store_memory_usage:
            run_options = config_pb2.RunOptions(
                trace_level=config_pb2.RunOptions.FULL_TRACE)
            run_metadata = config_pb2.RunMetadata()
            sess.run(op_or_tensor,
                     feed_dict=feed_dict,
                     options=run_options,
                     run_metadata=run_metadata)
            tl = timeline.Timeline(run_metadata.step_stats)

            if store_trace:
                extras[
                    "full_trace_chrome_format"] = tl.generate_chrome_trace_format(
                    )

            if store_memory_usage:
                step_stats_analysis = tl.analyze_step_stats(show_memory=True)
                allocator_maximums = step_stats_analysis.allocator_maximums
                for k, v in allocator_maximums.items():
                    extras["allocator_maximum_num_bytes_%s" % k] = v.num_bytes

        def _median(x):
            if not x:
                return -1
            s = sorted(x)
            l = len(x)
            lm1 = l - 1
            return (s[l // 2] + s[lm1 // 2]) / 2.0

        median_delta = _median(deltas)

        benchmark_values = {
            "iters": min_iters,
            "wall_time": median_delta,
            "extras": extras,
            "name": name,
            "throughput": mbs / median_delta
        }
        self.report_benchmark(**benchmark_values)
        return benchmark_values
    def testDumpToFileWhileLoop(self):
        with session.Session() as sess:
            num_iter = 10

            # "u" is the Variable being updated in the loop.
            u_name = "testDumpToFileWhileLoop/u"
            u_namespace = u_name.split("/")[0]

            u_init_val = np.array(11.0)
            u_init = constant_op.constant(u_init_val)
            u = variables.Variable(u_init, name=u_name)

            # "v" is the increment.
            v_name = "testDumpToFileWhileLoop/v"
            v_namespace = v_name.split("/")[0]

            v_init_val = np.array(2.0)
            v_init = constant_op.constant(v_init_val)
            v = variables.Variable(v_init, name=v_name)

            u.initializer.run()
            v.initializer.run()

            i = constant_op.constant(0, name="testDumpToFileWhileLoop/i")

            def cond(i):
                return math_ops.less(i, num_iter)

            def body(i):
                new_u = state_ops.assign_add(u, v)
                new_i = math_ops.add(i, 1)
                op = control_flow_ops.group(new_u)
                new_i = control_flow_ops.with_dependencies([op], new_i)
                return [new_i]

            loop = control_flow_ops.while_loop(cond,
                                               body, [i],
                                               parallel_iterations=1)

            # Create RunOptions for debug-watching tensors
            run_options = config_pb2.RunOptions(output_partition_graphs=True)
            debug_urls = self._debug_urls()

            # Add debug tensor watch for u.
            debug_utils.add_debug_tensor_watch(run_options,
                                               u_name,
                                               0,
                                               debug_urls=debug_urls)
            # Add debug tensor watch for v.
            debug_utils.add_debug_tensor_watch(run_options,
                                               "%s/read" % v_name,
                                               0,
                                               debug_urls=debug_urls)
            # Add debug tensor watch for while/Identity.
            debug_utils.add_debug_tensor_watch(run_options,
                                               "while/Identity",
                                               0,
                                               debug_urls=debug_urls)
            # Add debug tensor watch for while/Add/y.
            debug_utils.add_debug_tensor_watch(run_options,
                                               "while/Add/y",
                                               0,
                                               debug_urls=debug_urls)

            run_metadata = config_pb2.RunMetadata()
            r = sess.run(loop, options=run_options, run_metadata=run_metadata)

            self.assertEqual(self._expected_partition_graph_count,
                             len(run_metadata.partition_graphs))

            self.assertEqual(num_iter, r)

            u_val_final = sess.run(u)
            self.assertAllClose(u_init_val + num_iter * v_init_val,
                                u_val_final)

            # Verify dump files
            self.assertTrue(os.path.isdir(self._dump_root))

            self.assertTrue(
                os.path.isdir(os.path.join(self._dump_root, u_namespace)))
            self.assertTrue(
                os.path.isdir(os.path.join(self._dump_root, v_namespace, "v")))

            dump = debug_data.DebugDumpDir(
                self._dump_root,
                partition_graphs=run_metadata.partition_graphs)

            # Expected dumped tensors: u, v/read, 10 iterations of while/Identity,
            # and 10 iterations of while/Add/y.
            self.assertEqual(1 + 1 + num_iter + num_iter, dump.size)

            # Verify tensor values.
            self.assertAllClose([u_init_val],
                                dump.get_tensors(u_name, 0, "DebugIdentity"))
            self.assertAllClose([v_init_val],
                                dump.get_tensors("%s/read" % v_name, 0,
                                                 "DebugIdentity"))

            while_id_tensors = dump.get_tensors("while/Identity", 0,
                                                "DebugIdentity")
            self.assertEqual(10, len(while_id_tensors))
            for k in xrange(len(while_id_tensors)):
                self.assertAllClose(np.array(k), while_id_tensors[k])

            # Verify ascending timestamps from the while loops.
            while_id_rel_timestamps = dump.get_rel_timestamps(
                "while/Identity", 0, "DebugIdentity")
            self.assertEqual(10, len(while_id_rel_timestamps))
            prev_rel_time = 0
            for rel_time in while_id_rel_timestamps:
                self.assertGreaterEqual(rel_time, prev_rel_time)
                prev_rel_time = rel_time

            # Test querying debug watch keys from node name.
            watch_keys = dump.debug_watch_keys("while/Identity")
            self.assertEqual(["while/Identity:0:DebugIdentity"], watch_keys)

            # Test querying debug datum instances from debug watch key.
            self.assertEqual(10, len(dump.watch_key_to_data(watch_keys[0])))
            self.assertEqual([], dump.watch_key_to_data("foo"))
Example #22
0
    def _GenerateTestData(self):
        """Generates the test data directory.

    The test data has a single run named run1 which contains:
     - a histogram
     - an image at timestamp and step 0
     - scalar events containing the value i at step 10 * i and wall time
         100 * i, for i in [1, _SCALAR_COUNT).
     - a graph definition

    Returns:
      temp_dir: The directory the test data is generated under.
    """
        temp_dir = tempfile.mkdtemp(prefix=self.get_temp_dir())
        self.addCleanup(shutil.rmtree, temp_dir)
        run1_path = os.path.join(temp_dir, 'run1')
        os.makedirs(run1_path)
        writer = writer_lib.FileWriter(run1_path)

        histogram_value = summary_pb2.HistogramProto(min=0,
                                                     max=2,
                                                     num=3,
                                                     sum=6,
                                                     sum_squares=5,
                                                     bucket_limit=[0, 1, 2],
                                                     bucket=[1, 1, 1])
        # Add a simple graph event.
        graph_def = graph_pb2.GraphDef()
        node1 = graph_def.node.add()
        node1.name = 'a'
        node2 = graph_def.node.add()
        node2.name = 'b'
        node2.attr['very_large_attr'].s = b'a' * 2048  # 2 KB attribute

        meta_graph_def = meta_graph_pb2.MetaGraphDef(graph_def=graph_def)

        if self._only_use_meta_graph:
            writer.add_meta_graph(meta_graph_def)
        else:
            writer.add_graph(graph_def)

        # Add a simple run metadata event.
        run_metadata = config_pb2.RunMetadata()
        device_stats = run_metadata.step_stats.dev_stats.add()
        device_stats.device = 'test device'
        writer.add_run_metadata(run_metadata, 'test run')

        # 1x1 transparent GIF.
        encoded_image = base64.b64decode(
            'R0lGODlhAQABAIAAAAAAAP///yH5BAEAAAAALAAAAAABAAEAAAIBRAA7')
        image_value = summary_pb2.Summary.Image(
            height=1,
            width=1,
            colorspace=1,
            encoded_image_string=encoded_image)

        audio_value = summary_pb2.Summary.Audio(sample_rate=44100,
                                                length_frames=22050,
                                                num_channels=2,
                                                encoded_audio_string=b'',
                                                content_type='audio/wav')
        writer.add_event(
            event_pb2.Event(
                wall_time=0,
                step=0,
                summary=summary_pb2.Summary(value=[
                    summary_pb2.Summary.Value(tag='histogram',
                                              histo=histogram_value),
                    summary_pb2.Summary.Value(tag='image', image=image_value),
                    summary_pb2.Summary.Value(tag='audio', audio=audio_value)
                ])))

        # Write 100 simple values.
        for i in xrange(1, self._SCALAR_COUNT + 1):
            writer.add_event(
                event_pb2.Event(
                    # We use different values for wall time, step, and the value so we
                    # can tell them apart.
                    wall_time=100 * i,
                    step=10 * i,
                    summary=summary_pb2.Summary(value=[
                        summary_pb2.Summary.Value(tag='simple_values',
                                                  simple_value=i)
                    ])))
        writer.flush()
        writer.close()

        return temp_dir
    def testDumpGraphStructureLookup(self):
        # TODO(cais): Separate this test into multiple test methods.

        with session.Session() as sess:
            u_name = "testDumpGraphStructureLookup/u"
            v_name = "testDumpGraphStructureLookup/v"
            w_name = "testDumpGraphStructureLookup/w"

            u_init = constant_op.constant([2.0, 4.0])
            u = variables.Variable(u_init, name=u_name)
            v = math_ops.add(u, u, name=v_name)
            w = math_ops.add(v, v, name=w_name)

            u.initializer.run()

            run_options = config_pb2.RunOptions(output_partition_graphs=True)
            debug_utils.watch_graph(run_options,
                                    sess.graph,
                                    debug_ops=["DebugIdentity"],
                                    debug_urls=self._debug_urls())

            run_metadata = config_pb2.RunMetadata()
            sess.run(w, options=run_options, run_metadata=run_metadata)

            self.assertEqual(self._expected_partition_graph_count,
                             len(run_metadata.partition_graphs))
            dump = debug_data.DebugDumpDir(
                self._dump_root,
                partition_graphs=run_metadata.partition_graphs)

            u_read_name = u_name + "/read"

            # Test node name list lookup of the DebugDumpDir object.
            node_names = dump.nodes()
            self.assertTrue(u_name in node_names)
            self.assertTrue(u_read_name in node_names)

            # Test querying node attributes.
            u_attr = dump.node_attributes(u_name)
            self.assertEqual(dtypes.float32, u_attr["dtype"].type)
            self.assertEqual(1, len(u_attr["shape"].shape.dim))
            self.assertEqual(2, u_attr["shape"].shape.dim[0].size)

            with self.assertRaisesRegexp(ValueError,
                                         "No node named \"foo\" exists"):
                dump.node_attributes("foo")

            # Test querying the debug watch keys with node names.
            self.assertEqual(["%s:0:DebugIdentity" % u_name],
                             dump.debug_watch_keys(u_name))
            self.assertEqual(["%s:0:DebugIdentity" % v_name],
                             dump.debug_watch_keys(v_name))
            self.assertEqual(["%s:0:DebugIdentity" % w_name],
                             dump.debug_watch_keys(w_name))
            self.assertEqual([], dump.debug_watch_keys("foo"))

            # Test querying debug datum instances from debug watch.
            u_data = dump.watch_key_to_data(dump.debug_watch_keys(u_name)[0])
            self.assertEqual(1, len(u_data))
            self.assertEqual(u_name, u_data[0].node_name)
            self.assertEqual(0, u_data[0].output_slot)
            self.assertEqual("DebugIdentity", u_data[0].debug_op)
            self.assertGreaterEqual(u_data[0].timestamp, 0)

            self.assertEqual([], dump.watch_key_to_data("foo"))

            # Test the inputs lookup of the DebugDumpDir object.
            self.assertEqual([], dump.node_inputs(u_name))
            self.assertEqual([u_name], dump.node_inputs(u_read_name))
            self.assertEqual([u_read_name] * 2, dump.node_inputs(v_name))
            self.assertEqual([v_name] * 2, dump.node_inputs(w_name))

            self.assertEqual([], dump.node_inputs(u_name, is_control=True))
            self.assertEqual([], dump.node_inputs(u_read_name,
                                                  is_control=True))
            self.assertEqual([], dump.node_inputs(v_name, is_control=True))
            self.assertEqual([], dump.node_inputs(w_name, is_control=True))

            # Test the outputs recipient lookup of the DebugDumpDir object.
            self.assertTrue(u_read_name in dump.node_recipients(u_name))
            self.assertEqual(2,
                             dump.node_recipients(u_read_name).count(v_name))
            self.assertEqual(2, dump.node_recipients(v_name).count(w_name))

            self.assertEqual([], dump.node_recipients(u_name, is_control=True))
            self.assertEqual([],
                             dump.node_recipients(u_read_name,
                                                  is_control=True))
            self.assertEqual([], dump.node_recipients(v_name, is_control=True))
            self.assertEqual([], dump.node_recipients(w_name, is_control=True))

            # Test errors raised on invalid node names.
            with self.assertRaisesRegexp(ValueError,
                                         "does not exist in partition graphs"):
                dump.node_inputs(u_name + "foo")

            with self.assertRaisesRegexp(ValueError,
                                         "does not exist in partition graphs"):
                dump.node_recipients(u_name + "foo")

            # Test transitive_inputs().
            self.assertEqual([], dump.transitive_inputs(u_name))
            self.assertEqual([u_name], dump.transitive_inputs(u_read_name))
            self.assertEqual(set([u_name, u_read_name]),
                             set(dump.transitive_inputs(v_name)))
            self.assertEqual(set([u_name, u_read_name, v_name]),
                             set(dump.transitive_inputs(w_name)))

            with self.assertRaisesRegexp(ValueError,
                                         "does not exist in partition graphs"):
                dump.transitive_inputs(u_name + "foo")

            # Test num_devices().
            self.assertEqual(self._expected_num_devices, len(dump.devices()))

            # Test node_device().
            self.assertEqual(self._main_device, dump.node_device(u_name))

            with self.assertRaisesRegexp(ValueError,
                                         "does not exist in partition graphs"):
                dump.node_device(u_name + "foo")

            # Test node_exists().
            self.assertTrue(dump.node_exists(u_name))
            self.assertTrue(dump.node_exists(u_name + "/read"))
            self.assertFalse(dump.node_exists(u_name + "/read" + "/foo"))

            # Test node_op_type().
            self.assertEqual("Variable", dump.node_op_type(u_name))
            self.assertEqual("Identity", dump.node_op_type(u_name + "/read"))
            self.assertEqual("Add", dump.node_op_type(v_name))
            self.assertEqual("Add", dump.node_op_type(w_name))

            with self.assertRaisesRegexp(ValueError,
                                         "does not exist in partition graphs"):
                dump.node_op_type(u_name + "foo")

            # Now load the dump again, without the parition graphs, so we can check
            # the errors raised for no partition graphs loaded.
            dump = debug_data.DebugDumpDir(self._dump_root, validate=False)

            with self.assertRaisesRegexp(
                    RuntimeError, "No partition graphs have been loaded"):
                dump.partition_graphs()
            self.assertFalse(dump.loaded_partition_graphs())

            with self.assertRaisesRegexp(
                    RuntimeError,
                    "Node inputs are not loaded from partition graphs yet"):
                dump.node_inputs(u_name)

            with self.assertRaisesRegexp(
                    RuntimeError, "No partition graphs have been loaded"):
                dump.nodes()

            with self.assertRaisesRegexp(
                    RuntimeError,
                    "Node recipients are not loaded from partition graphs yet"
            ):
                dump.node_recipients(u_name)

            with self.assertRaisesRegexp(
                    RuntimeError,
                    "Node inputs are not loaded from partition graphs yet"):
                dump.transitive_inputs(u_name)

            with self.assertRaisesRegexp(
                    RuntimeError,
                    "Devices are not loaded from partition graphs yet"):
                dump.devices()

            with self.assertRaisesRegexp(
                    RuntimeError,
                    "Node devices are not loaded from partition graphs yet"):
                dump.node_device(u_name)

            with self.assertRaisesRegexp(
                    RuntimeError,
                    "Node op types are not loaded from partition graphs yet"):
                dump.node_op_type(u_name)
    def testValidProfile(self):
        output_dir = test.get_temp_dir()
        run_metadata = config_pb2.RunMetadata()

        node1 = step_stats_pb2.NodeExecStats(node_name='Add/123',
                                             op_start_rel_micros=3,
                                             op_end_rel_micros=5,
                                             all_end_rel_micros=4)

        run_metadata = config_pb2.RunMetadata()
        device1 = run_metadata.step_stats.dev_stats.add()
        device1.device = 'deviceA'
        device1.node_stats.extend([node1])

        graph = test.mock.MagicMock()
        op1 = test.mock.MagicMock()
        op1.name = 'Add/123'
        op1.traceback = [('a/b/file1', 10, 'apply_op', 'abc'),
                         ('a/c/file2', 12, 'my_op', 'def')]
        op1.type = 'add'
        graph.get_operations.return_value = [op1]

        expected_proto = """sample_type {
  type: 5
  unit: 5
}
sample_type {
  type: 6
  unit: 7
}
sample_type {
  type: 8
  unit: 7
}
sample {
  value: 1
  value: 4
  value: 2
  label {
    key: 1
    str: 2
  }
  label {
    key: 3
    str: 4
  }
}
string_table: ""
string_table: "node_name"
string_table: "Add/123"
string_table: "op_type"
string_table: "add"
string_table: "count"
string_table: "all_time"
string_table: "nanoseconds"
string_table: "op_time"
string_table: "Device 1 of 1: deviceA"
comment: 9
"""
        # Test with protos
        profiles = pprof_profiler.get_profiles(graph, run_metadata)
        self.assertEquals(1, len(profiles))
        self.assertTrue('deviceA' in profiles)
        self.assertEquals(expected_proto, str(profiles['deviceA']))
        # Test with files
        profile_files = pprof_profiler.profile(graph, run_metadata, output_dir)
        self.assertEquals(1, len(profile_files))
        with gzip.open(profile_files[0]) as profile_file:
            profile_contents = profile_file.read()
            profile = profile_pb2.Profile()
            profile.ParseFromString(profile_contents)
            self.assertEquals(expected_proto, str(profile))
    def testWatchingUnconnectedOutputTensor(self):
        """Watch an output slot not emitting any edges.

    (Not even control edges from the node.)
    """

        with session.Session() as sess:
            x_init = constant_op.constant([2, 2, 3, 5, 5])
            x = variables.Variable(x_init, name="unconnected/x")

            # The UniqueOp (tf.unique) has two output slots. Use only slot 0 in the
            # graph. Let the debugger watch the unused slot 1.
            unique_x, _ = tf.unique(x, name="unconnected/unique_x")
            y = tf.add(unique_x, [0, 1, 2], name="unconnected/y")

            x.initializer.run()

            # Verify that only slot 0 of unique_x has recipients, while slot 1 of the
            # same node does not have recipients.
            unique_x_slot_0_recipients = []
            unique_x_slot_1_recipients = []
            for op in sess.graph.get_operations():
                for inp in op.inputs:
                    if inp.name == "unconnected/unique_x:0":
                        unique_x_slot_0_recipients.append(op.name)
                    elif inp.name == "unconnected/unique_x:1":
                        unique_x_slot_1_recipients.append(op.name)

            self.assertEqual(["unconnected/y"], unique_x_slot_0_recipients)
            self.assertEqual([], unique_x_slot_1_recipients)

            run_options = config_pb2.RunOptions(output_partition_graphs=True)
            debug_utils.watch_graph(run_options,
                                    sess.graph,
                                    debug_ops=["DebugIdentity"],
                                    debug_urls=self._debug_urls())

            run_metadata = config_pb2.RunMetadata()
            result = sess.run(y,
                              options=run_options,
                              run_metadata=run_metadata)
            self.assertAllClose([2, 4, 7], result)

            dump = debug_data.DebugDumpDir(
                self._dump_root,
                partition_graphs=run_metadata.partition_graphs)

            # Assert that the connected slot (slot 0) is dumped properly.
            unique_x_slot_0_dumps = dump.watch_key_to_data(
                "unconnected/unique_x:0:DebugIdentity")
            self.assertEqual(1, len(unique_x_slot_0_dumps))
            self.assertEqual("unconnected/unique_x",
                             unique_x_slot_0_dumps[0].node_name)
            self.assertEqual(0, unique_x_slot_0_dumps[0].output_slot)
            self.assertAllClose([2, 3, 5],
                                unique_x_slot_0_dumps[0].get_tensor())

            # Assert that the unconnected slot (slot 1) is dumped properly.
            unique_x_slot_1_dumps = dump.watch_key_to_data(
                "unconnected/unique_x:1:DebugIdentity")
            self.assertEqual(1, len(unique_x_slot_1_dumps))
            self.assertEqual("unconnected/unique_x",
                             unique_x_slot_1_dumps[0].node_name)
            self.assertEqual(1, unique_x_slot_1_dumps[0].output_slot)
            self.assertAllClose([0, 0, 1, 2, 2],
                                unique_x_slot_1_dumps[0].get_tensor())
    def testClusterSpecPropagationThreeServersOneCluster(self):
        """Boots 3 servers, ensures appropriate communication across workers.

    Additionally, in this cluster, we ensure the master is not the 0-th worker.

    Note: this test only uses one session.
    """
        server1 = server_lib.Server.create_local_server()
        server2 = server_lib.Server.create_local_server()
        server3 = server_lib.Server.create_local_server()
        cluster_def = cluster_pb2.ClusterDef()
        job = cluster_def.job.add()
        job.name = 'worker'
        job.tasks[0] = server3.target[len('grpc://'):]
        job.tasks[1] = server2.target[len('grpc://'):]
        job.tasks[2] = server1.target[len('grpc://'):]
        config = config_pb2.ConfigProto(cluster_def=cluster_def)

        # Add ops to the devices in non-linear order.

        with ops.device('/job:worker/task:1'):
            feed1 = array_ops.placeholder(dtypes.float32, shape=(2))
            const1 = constant_op.constant(2.0)
            mul1 = const1 * feed1

        with ops.device('/job:worker/task:2'):
            feed2 = array_ops.placeholder(dtypes.float32, shape=(2))
            const2 = constant_op.constant(2.0)
            mul2 = const2 * feed2

        with ops.device('/job:worker/task:0'):
            feed0 = array_ops.placeholder(dtypes.float32, shape=(2))
            const0 = constant_op.constant(2.0)
            mul0 = const0 * feed0

        sum_op = mul0 + mul1 + mul2

        ones = np.ones([2])
        run_options = config_pb2.RunOptions(
            trace_level=config_pb2.RunOptions.FULL_TRACE)
        run_metadata = config_pb2.RunMetadata()

        # Run!
        with session.Session(server1.target, config=config) as sess:
            output = sess.run(sum_op,
                              options=run_options,
                              run_metadata=run_metadata,
                              feed_dict={
                                  feed1: ones,
                                  feed2: ones,
                                  feed0: ones
                              })
            self.assertAllEqual(6 * ones, output)

            self.assertEqual(
                3,
                len([
                    dev_stats.device
                    for dev_stats in run_metadata.step_stats.dev_stats
                    for node_stats in dev_stats.node_stats
                    if '/job:worker/replica:0/task:' in dev_stats.device
                    and node_stats.node_name.startswith('Const')
                ]), run_metadata)
Example #27
0
def train_step(sess, train_op, endpoint, batch, loggis, loss, global_step,
               number_of_steps, train_step_kwargs):
    """Function that takes a gradient step and specifies whether to stop.

  Args:
    sess: The current session.
    train_op: An `Operation` that evaluates the gradients and returns the
      total loss.
    global_step: A `Tensor` representing the global training step.
    train_step_kwargs: A dictionary of keyword arguments.

  Returns:
    The total loss and a boolean indicating whether or not to stop training.

  Raises:
    ValueError: if 'should_trace' is in `train_step_kwargs` but `logdir` is not.
  """
    start_time = time.time()

    trace_run_options = None
    run_metadata = None
    if 'should_trace' in train_step_kwargs:
        if 'logdir' not in train_step_kwargs:
            raise ValueError(
                'logdir must be present in train_step_kwargs when '
                'should_trace is present')
        if sess.run(train_step_kwargs['should_trace']):
            trace_run_options = config_pb2.RunOptions(
                trace_level=config_pb2.RunOptions.FULL_TRACE)
            run_metadata = config_pb2.RunMetadata()
    # print "Loggitss and loss call started..."
    # loggiss, losss,batchh,endpointt = sess.run([loggis, loss,batch,endpoint],
    #                                       options=trace_run_options,
    #                                       run_metadata=run_metadata)
    #
    # print(loggiss)
    # print(losss)
    # print "batchh..."
    # print(batchh)
    # for item in batchh:
    #   print(item)
    #   print(item.shape)
    #
    # print "endpoint..."
    # for key,val in endpointt.iteritems():
    #   print(key)
    #   print(val.shape)
    #   print(val)
    # print "Loggitss and loss call ended..."
    total_loss, np_global_step = sess.run([train_op, global_step],
                                          options=trace_run_options,
                                          run_metadata=run_metadata)
    time_elapsed = time.time() - start_time

    # print "Lossssssssssssss"
    # print total_loss
    if run_metadata is not None:
        tl = timeline.Timeline(run_metadata.step_stats)
        trace = tl.generate_chrome_trace_format()
        trace_filename = os.path.join(train_step_kwargs['logdir'],
                                      'tf_trace-%d.json' % np_global_step)
        logging.info('Writing trace to %s', trace_filename)
        file_io.write_string_to_file(trace_filename, trace)
        if 'summary_writer' in train_step_kwargs:
            train_step_kwargs['summary_writer'].add_run_metadata(
                run_metadata, 'run_metadata-%d' % np_global_step)

    if 'should_log' in train_step_kwargs:
        if sess.run(train_step_kwargs['should_log']):
            logging.info('global step %d/%d : loss = %.4f (%.2f sec/step)',
                         np_global_step, number_of_steps, total_loss,
                         time_elapsed)

    # TODO(nsilberman): figure out why we can't put this into sess.run. The
    # issue right now is that the stop check depends on the global step. The
    # increment of global step often happens via the train op, which used
    # created using optimizer.apply_gradients.
    #
    # Since running `train_op` causes the global step to be incremented, one
    # would expected that using a control dependency would allow the
    # should_stop check to be run in the same session.run call:
    #
    #   with ops.control_dependencies([train_op]):
    #     should_stop_op = ...
    #
    # However, this actually seems not to work on certain platforms.
    if 'should_stop' in train_step_kwargs:
        should_stop = sess.run(train_step_kwargs['should_stop'])
    else:
        should_stop = False

    return total_loss, should_stop
Example #28
0
  def run(self,
          fetches,
          feed_dict=None,
          options=None,
          run_metadata=None,
          callable_runner=None,
          callable_runner_args=None):
    """Wrapper around Session.run() that inserts tensor watch options.

    Args:
      fetches: Same as the `fetches` arg to regular `Session.run()`.
      feed_dict: Same as the `feed_dict` arg to regular `Session.run()`.
      options: Same as the `options` arg to regular `Session.run()`.
      run_metadata: Same as the `run_metadata` arg to regular `Session.run()`.
      callable_runner: A `callable` returned by `Session.make_callable()`.
        If not `None`, `fetches` and `feed_dict` must both be `None`.
      callable_runner_args: An optional list of arguments to `callable_runner`.

    Returns:
      Simply forwards the output of the wrapped `Session.run()` call.

    Raises:
      ValueError: On invalid `OnRunStartAction` value. Or if `callable_runner`
        is not `None` and either or both of `fetches` and `feed_dict` is `None`.
    """
    if not callable_runner:
      self.increment_run_call_count()
    else:
      if fetches or feed_dict:
        raise ValueError(
            "callable_runner and fetches/feed_dict are mutually exclusive, but "
            "are used simultaneously.")

    if self._is_disabled_thread():
      if callable_runner:
        return callable_runner(*callable_runner_args)
      else:
        return self._sess.run(fetches,
                              feed_dict=feed_dict,
                              options=options,
                              run_metadata=run_metadata)

    # Invoke on-run-start callback and obtain response.
    run_start_resp = self.on_run_start(
        OnRunStartRequest(fetches, feed_dict, options, run_metadata,
                          self._run_call_count,
                          is_callable_runner=bool(callable_runner)))
    _check_type(run_start_resp, OnRunStartResponse)

    if run_start_resp.action == OnRunStartAction.DEBUG_RUN:
      # Decorate RunOption to fill in debugger tensor watch specifications.
      decorated_run_options = options or config_pb2.RunOptions()
      run_metadata = run_metadata or config_pb2.RunMetadata()

      self._decorate_run_options_for_debug(
          decorated_run_options,
          run_start_resp.debug_urls,
          debug_ops=run_start_resp.debug_ops,
          node_name_regex_whitelist=run_start_resp.node_name_regex_whitelist,
          op_type_regex_whitelist=run_start_resp.op_type_regex_whitelist,
          tensor_dtype_regex_whitelist=(
              run_start_resp.tensor_dtype_regex_whitelist),
          tolerate_debug_op_creation_failures=(
              run_start_resp.tolerate_debug_op_creation_failures))

      # Invoke the run() method of the wrapped Session. Catch any TensorFlow
      # runtime errors.
      tf_error = None
      try:
        if callable_runner:
          retvals = callable_runner(*callable_runner_args,
                                    options=decorated_run_options,
                                    run_metadata=run_metadata)
        else:
          retvals = self._sess.run(fetches,
                                   feed_dict=feed_dict,
                                   options=decorated_run_options,
                                   run_metadata=run_metadata)
      except errors.OpError as op_error:
        if self._pass_through_operrors:
          raise op_error
        tf_error = op_error
        retvals = op_error

      run_end_req = OnRunEndRequest(
          run_start_resp.action,
          run_metadata=run_metadata,
          client_graph_def=self._sess.graph.as_graph_def(),
          tf_error=tf_error)

    elif run_start_resp.action == OnRunStartAction.PROFILE_RUN:
      decorated_run_options = options or config_pb2.RunOptions()
      run_metadata = run_metadata or config_pb2.RunMetadata()
      self._decorate_run_options_for_profile(decorated_run_options)
      if callable_runner:
        retvals = callable_runner(*callable_runner_args,
                                  options=decorated_run_options,
                                  run_metadata=run_metadata)
      else:
        retvals = self._sess.run(fetches,
                                 feed_dict=feed_dict,
                                 options=decorated_run_options,
                                 run_metadata=run_metadata)
      run_end_req = OnRunEndRequest(
          run_start_resp.action,
          run_metadata=run_metadata,
          client_graph_def=self._sess.graph.as_graph_def())
    elif (run_start_resp.action == OnRunStartAction.NON_DEBUG_RUN or
          run_start_resp.action == OnRunStartAction.INVOKE_STEPPER):
      if callable_runner:
        raise NotImplementedError(
            "Stepper mode is not implemented for callables created by "
            "Session.make_callable().")

      if run_start_resp.action == OnRunStartAction.INVOKE_STEPPER:
        with stepper.NodeStepper(
            self._sess, fetches, feed_dict) as node_stepper:
          retvals = self.invoke_node_stepper(
              node_stepper, restore_variable_values_on_exit=True)

      # Invoke run() method of the wrapped session.
      retvals = self._sess.run(
          fetches,
          feed_dict=feed_dict,
          options=options,
          run_metadata=run_metadata)

      # Prepare arg for the on-run-end callback.
      run_end_req = OnRunEndRequest(run_start_resp.action)
    else:
      raise ValueError(
          "Invalid OnRunStartAction value: %s" % run_start_resp.action)

    # Invoke on-run-end callback and obtain response.
    run_end_resp = self.on_run_end(run_end_req)
    _check_type(run_end_resp, OnRunEndResponse)
    # Currently run_end_resp is only a placeholder. No action is taken on it.

    return retvals
    def testToggleEnableTwoDebugWatchesNoCrosstalkBetweenDebugNodes(self):
        with session.Session(config=no_rewrite_session_config()) as sess:
            v_1 = variables.Variable(50.0, name="v_1")
            v_2 = variables.Variable(-50.0, name="v_1")
            delta_1 = constant_op.constant(5.0, name="delta_1")
            delta_2 = constant_op.constant(-5.0, name="delta_2")
            inc_v_1 = state_ops.assign_add(v_1, delta_1, name="inc_v_1")
            inc_v_2 = state_ops.assign_add(v_2, delta_2, name="inc_v_2")

            sess.run([v_1.initializer, v_2.initializer])

            run_metadata = config_pb2.RunMetadata()
            run_options = config_pb2.RunOptions(output_partition_graphs=True)
            debug_utils.watch_graph(run_options,
                                    sess.graph,
                                    debug_ops=[
                                        "DebugIdentity(gated_grpc=true)",
                                        "DebugNumericSummary(gated_grpc=true)"
                                    ],
                                    debug_urls=[self._debug_server_url_1])

            for i in xrange(4):
                self._server_1.clear_data()

                if i % 2 == 0:
                    self._server_1.request_watch("delta_1", 0, "DebugIdentity")
                    self._server_1.request_watch("delta_2", 0, "DebugIdentity")
                    self._server_1.request_unwatch("delta_1", 0,
                                                   "DebugNumericSummary")
                    self._server_1.request_unwatch("delta_2", 0,
                                                   "DebugNumericSummary")
                else:
                    self._server_1.request_unwatch("delta_1", 0,
                                                   "DebugIdentity")
                    self._server_1.request_unwatch("delta_2", 0,
                                                   "DebugIdentity")
                    self._server_1.request_watch("delta_1", 0,
                                                 "DebugNumericSummary")
                    self._server_1.request_watch("delta_2", 0,
                                                 "DebugNumericSummary")

                sess.run([inc_v_1, inc_v_2],
                         options=run_options,
                         run_metadata=run_metadata)

                # Watched debug tensors are:
                #   Run 0: delta_[1,2]:0:DebugIdentity
                #   Run 1: delta_[1,2]:0:DebugNumericSummary
                #   Run 2: delta_[1,2]:0:DebugIdentity
                #   Run 3: delta_[1,2]:0:DebugNumericSummary
                self.assertEqual(2, len(self._server_1.debug_tensor_values))
                if i % 2 == 0:
                    self.assertAllClose(
                        [5.0], self._server_1.
                        debug_tensor_values["delta_1:0:DebugIdentity"])
                    self.assertAllClose(
                        [-5.0], self._server_1.
                        debug_tensor_values["delta_2:0:DebugIdentity"])
                else:
                    self.assertAllClose(
                        [[
                            1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 5.0, 5.0,
                            5.0, 0.0, 1.0, 0.0
                        ]], self._server_1.
                        debug_tensor_values["delta_1:0:DebugNumericSummary"])
                    self.assertAllClose(
                        [[
                            1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, -5.0, -5.0,
                            -5.0, 0.0, 1.0, 0.0
                        ]], self._server_1.
                        debug_tensor_values["delta_2:0:DebugNumericSummary"])
Example #30
0
    def testToggleBreakpointsWorks(self):
        with session.Session(config=session_debug_testlib.
                             no_rewrite_session_config()) as sess:
            v_1 = variables.VariableV1(50.0, name="v_1")
            v_2 = variables.VariableV1(-50.0, name="v_2")
            delta_1 = constant_op.constant(5.0, name="delta_1")
            delta_2 = constant_op.constant(-5.0, name="delta_2")
            inc_v_1 = state_ops.assign_add(v_1, delta_1, name="inc_v_1")
            inc_v_2 = state_ops.assign_add(v_2, delta_2, name="inc_v_2")

            sess.run([v_1.initializer, v_2.initializer])

            run_metadata = config_pb2.RunMetadata()
            run_options = config_pb2.RunOptions(output_partition_graphs=True)
            debug_utils.watch_graph(
                run_options,
                sess.graph,
                debug_ops=["DebugIdentity(gated_grpc=true)"],
                debug_urls=[self._debug_server_url_1])

            for i in xrange(4):
                self._server_1.clear_data()

                if i in (0, 2):
                    # Enable breakpoint at delta_[1,2]:0:DebugIdentity in runs 0 and 2.
                    self._server_1.request_watch("delta_1",
                                                 0,
                                                 "DebugIdentity",
                                                 breakpoint=True)
                    self._server_1.request_watch("delta_2",
                                                 0,
                                                 "DebugIdentity",
                                                 breakpoint=True)
                else:
                    # Disable the breakpoint in runs 1 and 3.
                    self._server_1.request_unwatch("delta_1", 0,
                                                   "DebugIdentity")
                    self._server_1.request_unwatch("delta_2", 0,
                                                   "DebugIdentity")

                output = sess.run([inc_v_1, inc_v_2],
                                  options=run_options,
                                  run_metadata=run_metadata)
                self.assertAllClose(
                    [50.0 + 5.0 * (i + 1), -50 - 5.0 * (i + 1)], output)

                if i in (0, 2):
                    # During runs 0 and 2, the server should have received the published
                    # debug tensor delta:0:DebugIdentity. The breakpoint should have been
                    # unblocked by EventReply reponses from the server.
                    self.assertAllClose(
                        [5.0], self._server_1.
                        debug_tensor_values["delta_1:0:DebugIdentity"])
                    self.assertAllClose(
                        [-5.0], self._server_1.
                        debug_tensor_values["delta_2:0:DebugIdentity"])
                    # After the runs, the server should have properly registered the
                    # breakpoints due to the request_unwatch calls.
                    self.assertSetEqual(
                        {("delta_1", 0, "DebugIdentity"),
                         ("delta_2", 0, "DebugIdentity")},
                        self._server_1.breakpoints)
                else:
                    # After the end of runs 1 and 3, the server has received the requests
                    # to disable the breakpoint at delta:0:DebugIdentity.
                    self.assertSetEqual(set(), self._server_1.breakpoints)