Esempio n. 1
0
        for i in range(args.max_epoch):
            sess.run(iterator.initializer)
            _ = sess.run(inc_epoch_op)
            while True:
                try:
                    images_train, labels_train = sess.run(next_element)

                    feed_dict = {
                        inputs: images_train,
                        labels: labels_train,
                        phase_train_placeholder: True
                    }
                    start = time.time()
                    _, total_loss_val, inference_loss_val, reg_loss_val, _, acc_val = \
                    sess.run([train_op, total_loss, inference_loss, regularization_losses, inc_global_step_op, Accuracy_Op],
                             feed_dict=feed_dict, options=config_pb2.RunOptions(report_tensor_allocations_upon_oom=True))
                    end = time.time()
                    pre_sec = args.train_batch_size / (end - start)

                    count += 1
                    # print training information
                    if count > 0 and count % args.show_info_interval == 0:
                        print(
                            'epoch %d, total_step %d, total loss is %.2f , inference loss is %.2f, reg_loss is %.2f, training accuracy is %.6f, time %.3f samples/sec'
                            % (i, count, total_loss_val, inference_loss_val,
                               np.sum(reg_loss_val), acc_val, pre_sec))

                    # save summary
                    if count > 0 and count % args.summary_interval == 0:
                        feed_dict = {
                            inputs: images_train,
Esempio n. 2
0
 def before_run(self, run_context):
     self._curr_iter=self._curr_iter+1
     if self._curr_iter > self._start_iter and self._curr_iter <= self._end_iter:
         return tf.estimator.SessionRunArgs(None, options=config_pb2.RunOptions(trace_level=config_pb2.RunOptions.FULL_TRACE))
     else:
         return None
Esempio n. 3
0
    def setUpClass(cls):
        cls._dump_root = tempfile.mkdtemp()

        cls._is_gpu_available = test.is_gpu_available()
        if cls._is_gpu_available:
            cls._main_device = "/job:localhost/replica:0/task:0/gpu:0"
        else:
            cls._main_device = "/job:localhost/replica:0/task:0/cpu:0"

        with session.Session() as sess:
            u_init_val = np.array([[5.0, 3.0], [-1.0, 0.0]])
            v_init_val = np.array([[2.0], [-1.0]])

            u_name = "simple_mul_add/u"
            v_name = "simple_mul_add/v"

            u_init = constant_op.constant(u_init_val, shape=[2, 2])
            u = variables.Variable(u_init, name=u_name)
            v_init = constant_op.constant(v_init_val, shape=[2, 1])
            v = variables.Variable(v_init, name=v_name)

            w = math_ops.matmul(u, v, name="simple_mul_add/matmul")

            x = math_ops.add(w, w, name="simple_mul_add/add")

            u.initializer.run()
            v.initializer.run()

            run_options = config_pb2.RunOptions(output_partition_graphs=True)
            debug_utils.watch_graph(run_options,
                                    sess.graph,
                                    debug_ops=["DebugIdentity"],
                                    debug_urls="file://%s" % cls._dump_root)

            # Invoke Session.run().
            run_metadata = config_pb2.RunMetadata()
            sess.run(x, options=run_options, run_metadata=run_metadata)

        cls._debug_dump = debug_data.DebugDumpDir(
            cls._dump_root, partition_graphs=run_metadata.partition_graphs)

        # Construct the analyzer.
        cls._analyzer = analyzer_cli.DebugAnalyzer(cls._debug_dump)

        # Construct the handler registry.
        cls._registry = debugger_cli_common.CommandHandlerRegistry()

        # Register command handlers.
        cls._registry.register_command_handler(
            "list_tensors",
            cls._analyzer.list_tensors,
            cls._analyzer.get_help("list_tensors"),
            prefix_aliases=["lt"])
        cls._registry.register_command_handler(
            "node_info",
            cls._analyzer.node_info,
            cls._analyzer.get_help("node_info"),
            prefix_aliases=["ni"])
        cls._registry.register_command_handler(
            "print_tensor",
            cls._analyzer.print_tensor,
            cls._analyzer.get_help("print_tensor"),
            prefix_aliases=["pt"])
    def testArithmeticOptimizationActive(self):
        """Tests that tfdbg can dump the tensor from nodes created by Grappler."""
        with session.Session(
                config=_grappler_enabled_session_config()) as sess:
            u = variables.VariableV1([[1, 2], [3, 4]],
                                     name="u",
                                     dtype=dtypes.float32)
            # The next two ops should be optimized by Grappler into a single op:
            # either an AddN op or a Mul op.
            x = math_ops.add(u, u)
            x = math_ops.add(x, u)
            y = math_ops.multiply(x, u)

            sess.run(variables.global_variables_initializer())

            run_options = config_pb2.RunOptions(output_partition_graphs=True)
            debug_utils.watch_graph(run_options,
                                    sess.graph,
                                    debug_ops=["DebugIdentity"],
                                    debug_urls=[self._debug_url])

            run_metadata = config_pb2.RunMetadata()
            run_result = sess.run(y,
                                  options=run_options,
                                  run_metadata=run_metadata)
            self.assertAllClose(run_result, [[3, 12], [27, 48]])

            dump_data = debug_data.DebugDumpDir(
                self._dump_root,
                partition_graphs=run_metadata.partition_graphs,
                validate=True)

            original_node_names = set(
                [op.name for op in sess.graph.get_operations()])
            dumped_node_names = set(dump_data.nodes())
            grappler_created_node_names = dumped_node_names - original_node_names
            grappler_removed_node_names = original_node_names - dumped_node_names

            # Assert that Grappler should have replaced some of the nodes from the
            # original graph with new nodes.
            self.assertTrue(grappler_created_node_names)
            self.assertTrue(grappler_removed_node_names)

            # Iterate through the nodes created by Grappler. One of them should be
            # be the result of replacing the original add ops with an AddN op or a
            # Mul op.
            found_optimized_node = False
            for grappler_node_name in grappler_created_node_names:
                node_op_type = dump_data.node_op_type(grappler_node_name)
                # Look for the node created by Grappler's arithmetic optimization.
                if ((test_util.IsMklEnabled()
                     and node_op_type in ("_MklAddN", "Mul"))
                        or (node_op_type in ("AddN", "Mul"))):
                    datum = dump_data.get_tensors(grappler_node_name, 0,
                                                  "DebugIdentity")
                    self.assertEqual(1, len(datum))
                    self.assertAllClose(datum[0], [[3, 6], [9, 12]])
                    found_optimized_node = True
                    break
            self.assertTrue(
                found_optimized_node,
                "Failed to find optimized node created by Grappler's arithmetic "
                "optimization.")
    def testToggleEnableTwoDebugWatchesNoCrosstalkBetweenDebugNodes(self):
        with session.Session(config=session_debug_testlib.
                             no_rewrite_session_config()) as sess:
            v_1 = variables.VariableV1(50.0, name="v_1")
            v_2 = variables.VariableV1(-50.0, name="v_1")
            delta_1 = constant_op.constant(5.0, name="delta_1")
            delta_2 = constant_op.constant(-5.0, name="delta_2")
            inc_v_1 = state_ops.assign_add(v_1, delta_1, name="inc_v_1")
            inc_v_2 = state_ops.assign_add(v_2, delta_2, name="inc_v_2")

            sess.run([v_1.initializer, v_2.initializer])

            run_metadata = config_pb2.RunMetadata()
            run_options = config_pb2.RunOptions(output_partition_graphs=True)
            debug_utils.watch_graph(run_options,
                                    sess.graph,
                                    debug_ops=[
                                        "DebugIdentity(gated_grpc=true)",
                                        "DebugNumericSummary(gated_grpc=true)"
                                    ],
                                    debug_urls=[self._debug_server_url_1])

            for i in xrange(4):
                self._server_1.clear_data()

                if i % 2 == 0:
                    self._server_1.request_watch("delta_1", 0, "DebugIdentity")
                    self._server_1.request_watch("delta_2", 0, "DebugIdentity")
                    self._server_1.request_unwatch("delta_1", 0,
                                                   "DebugNumericSummary")
                    self._server_1.request_unwatch("delta_2", 0,
                                                   "DebugNumericSummary")
                else:
                    self._server_1.request_unwatch("delta_1", 0,
                                                   "DebugIdentity")
                    self._server_1.request_unwatch("delta_2", 0,
                                                   "DebugIdentity")
                    self._server_1.request_watch("delta_1", 0,
                                                 "DebugNumericSummary")
                    self._server_1.request_watch("delta_2", 0,
                                                 "DebugNumericSummary")

                sess.run([inc_v_1, inc_v_2],
                         options=run_options,
                         run_metadata=run_metadata)

                # Watched debug tensors are:
                #   Run 0: delta_[1,2]:0:DebugIdentity
                #   Run 1: delta_[1,2]:0:DebugNumericSummary
                #   Run 2: delta_[1,2]:0:DebugIdentity
                #   Run 3: delta_[1,2]:0:DebugNumericSummary
                self.assertEqual(2, len(self._server_1.debug_tensor_values))
                if i % 2 == 0:
                    self.assertAllClose(
                        [5.0], self._server_1.
                        debug_tensor_values["delta_1:0:DebugIdentity"])
                    self.assertAllClose(
                        [-5.0], self._server_1.
                        debug_tensor_values["delta_2:0:DebugIdentity"])
                else:
                    self.assertAllClose(
                        [[
                            1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 5.0, 5.0,
                            5.0, 0.0, 1.0, 0.0
                        ]], self._server_1.
                        debug_tensor_values["delta_1:0:DebugNumericSummary"])
                    self.assertAllClose(
                        [[
                            1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, -5.0, -5.0,
                            -5.0, 0.0, 1.0, 0.0
                        ]], self._server_1.
                        debug_tensor_values["delta_2:0:DebugNumericSummary"])
Esempio n. 6
0
def train_step(sess, train_op, global_step, train_step_kwargs):
    """Function that takes a gradient step and specifies whether to stop.

  Args:
    sess: The current session.
    train_op: An `Operation` that evaluates the gradients and returns the
      total loss.
    global_step: A `Tensor` representing the global training step.
    train_step_kwargs: A dictionary of keyword arguments.

  Returns:
    The total loss and a boolean indicating whether or not to stop training.

  Raises:
    ValueError: if 'should_trace' is in `train_step_kwargs` but `logdir` is not.
  """
    start_time = time.time()

    trace_run_options = None
    run_metadata = None
    if 'should_trace' in train_step_kwargs:
        if 'logdir' not in train_step_kwargs:
            raise ValueError(
                'logdir must be present in train_step_kwargs when '
                'should_trace is present')
        if sess.run(train_step_kwargs['should_trace']):
            trace_run_options = config_pb2.RunOptions(
                trace_level=config_pb2.RunOptions.FULL_TRACE)
            run_metadata = config_pb2.RunMetadata()

    total_loss, np_global_step = sess.run([train_op, global_step],
                                          options=trace_run_options,
                                          run_metadata=run_metadata)
    time_elapsed = time.time() - start_time

    if run_metadata is not None:
        tl = timeline.Timeline(run_metadata.step_stats)
        trace = tl.generate_chrome_trace_format()
        trace_filename = os.path.join(train_step_kwargs['logdir'],
                                      'tf_trace-%d.json' % np_global_step)
        logging.info('Writing trace to %s', trace_filename)
        file_io.write_string_to_file(trace_filename, trace)
        if 'summary_writer' in train_step_kwargs:
            train_step_kwargs['summary_writer'].add_run_metadata(
                run_metadata, 'run_metadata-%d' % np_global_step)

    if 'should_log' in train_step_kwargs:
        if sess.run(train_step_kwargs['should_log']):
            logging.info('global step %d: loss = %.4f (%.2f sec/step)',
                         np_global_step, total_loss, time_elapsed)

    # TODO(nsilberman): figure out why we can't put this into sess.run. The
    # issue right now is that the stop check depends on the global step. The
    # increment of global step often happens via the train op, which used
    # created using optimizer.apply_gradients.
    #
    # Since running `train_op` causes the global step to be incremented, one
    # would expected that using a control dependency would allow the
    # should_stop check to be run in the same session.run call:
    #
    #   with ops.control_dependencies([train_op]):
    #     should_stop_op = ...
    #
    # However, this actually seems not to work on certain platforms.
    if 'should_stop' in train_step_kwargs:
        should_stop = sess.run(train_step_kwargs['should_stop'])
    else:
        should_stop = False

    return total_loss, should_stop
    def testDumpCausalityCheck(self):
        with session.Session() as sess:
            u_name = "testDumpCausalityCheck/u"
            v_name = "testDumpCausalityCheck/v"
            w_name = "testDumpCausalityCheck/w"

            u_init = constant_op.constant([2.0, 4.0])
            u = variables.Variable(u_init, name=u_name)
            v = math_ops.add(u, u, name=v_name)
            w = math_ops.add(v, v, name=w_name)

            u.initializer.run()

            run_options = config_pb2.RunOptions(output_partition_graphs=True)
            debug_utils.watch_graph(run_options,
                                    sess.graph,
                                    debug_ops=["DebugIdentity"],
                                    debug_urls=self._debug_urls())

            run_metadata = config_pb2.RunMetadata()
            sess.run(w, options=run_options, run_metadata=run_metadata)

            self.assertEqual(self._expected_partition_graph_count,
                             len(run_metadata.partition_graphs))

            # First, loading the original dump without supplying the
            # partition_graphs should not cause a RuntimeError, validation occurs
            # only with partition_graphs loaded.
            debug_data.DebugDumpDir(self._dump_root)

            # Now, loading the original dump with partition graphs supplied should
            # succeed. The validation should pass quietly.
            dump = debug_data.DebugDumpDir(
                self._dump_root,
                partition_graphs=run_metadata.partition_graphs)

            # Get the dump file names and compute their timestamps.
            self.assertEqual(
                1, len(dump.get_tensor_file_paths(u_name, 0, "DebugIdentity")))
            u_file_path = dump.get_tensor_file_paths(u_name, 0,
                                                     "DebugIdentity")[0]

            self.assertEqual(
                1, len(dump.get_tensor_file_paths(v_name, 0, "DebugIdentity")))
            v_file_path = dump.get_tensor_file_paths(v_name, 0,
                                                     "DebugIdentity")[0]

            u_timestamp = int(u_file_path[u_file_path.rindex("_") + 1:])
            v_timestamp = int(v_file_path[v_file_path.rindex("_") + 1:])

            # Swap the time stamps
            new_u_file_path = u_file_path[:u_file_path.
                                          rindex("_")] + "_%d" % v_timestamp
            new_v_file_path = v_file_path[:v_file_path.
                                          rindex("_")] + "_%d" % u_timestamp

            os.rename(u_file_path, new_u_file_path)
            os.rename(v_file_path, new_v_file_path)

            # Load the dump directory again. Now a ValueError is expected to be
            # raised due to the timestamp swap.
            with self.assertRaisesRegexp(ValueError, "Causality violated"):
                dump = debug_data.DebugDumpDir(
                    self._dump_root,
                    partition_graphs=run_metadata.partition_graphs)

            # Loading the dump directory with kwarg "validate" set explicitly to
            # False should get rid of the error.
            dump = debug_data.DebugDumpDir(
                self._dump_root,
                partition_graphs=run_metadata.partition_graphs,
                validate=False)
Esempio n. 8
0
    def before_run(self, run_context):
        if not self._session_wrapper:
            self._session_wrapper = local_cli_wrapper.LocalCLIDebugWrapperSession(
                run_context.session,
                ui_type=self._ui_type,
                dump_root=self._dump_root,
                thread_name_filter=self._thread_name_filter)

            # Actually register tensor filters registered prior to the construction
            # of the underlying LocalCLIDebugWrapperSession object.
            for filter_name in self._pending_tensor_filters:
                self._session_wrapper.add_tensor_filter(
                    filter_name, self._pending_tensor_filters[filter_name])

        # Increment run call counter.
        self._session_wrapper.increment_run_call_count()

        # Adapt run_context to an instance of OnRunStartRequest for invoking
        # superclass on_run_start().
        on_run_start_request = framework.OnRunStartRequest(
            run_context.original_args.fetches,
            run_context.original_args.feed_dict, None, None,
            self._session_wrapper.run_call_count)

        on_run_start_response = self._session_wrapper.on_run_start(
            on_run_start_request)
        self._performed_action = on_run_start_response.action

        run_args = session_run_hook.SessionRunArgs(
            None, feed_dict=None, options=config_pb2.RunOptions())
        if self._performed_action == framework.OnRunStartAction.DEBUG_RUN:
            # pylint: disable=protected-access
            self._session_wrapper._decorate_run_options_for_debug(
                run_args.options,
                on_run_start_response.debug_urls,
                debug_ops=on_run_start_response.debug_ops,
                node_name_regex_whitelist=(
                    on_run_start_response.node_name_regex_whitelist),
                op_type_regex_whitelist=(
                    on_run_start_response.op_type_regex_whitelist),
                tensor_dtype_regex_whitelist=(
                    on_run_start_response.tensor_dtype_regex_whitelist),
                tolerate_debug_op_creation_failures=(
                    on_run_start_response.tolerate_debug_op_creation_failures))
            # pylint: enable=protected-access
        elif self._performed_action == framework.OnRunStartAction.PROFILE_RUN:
            # pylint: disable=protected-access
            self._session_wrapper._decorate_run_options_for_profile(
                run_args.options)
            # pylint: enable=protected-access
        elif self._performed_action == framework.OnRunStartAction.INVOKE_STEPPER:
            # The _finalized property must be set to False so that the NodeStepper
            # can insert ops for retrieving TensorHandles.
            # pylint: disable=protected-access
            run_context.session.graph._finalized = False
            # pylint: enable=protected-access

            with stepper.NodeStepper(
                    run_context.session, run_context.original_args.fetches,
                    run_context.original_args.feed_dict) as node_stepper:
                self._session_wrapper.invoke_node_stepper(
                    node_stepper, restore_variable_values_on_exit=True)

        return run_args
    def testFindNodesWithBadTensorValues(self):
        with session.Session() as sess:
            u_name = "testFindNodesWithBadTensorValues/u"
            v_name = "testFindNodesWithBadTensorValues/v"
            w_name = "testFindNodesWithBadTensorValues/w"
            x_name = "testFindNodesWithBadTensorValues/x"
            y_name = "testFindNodesWithBadTensorValues/y"
            z_name = "testFindNodesWithBadTensorValues/z"

            u_init = constant_op.constant([2.0, 4.0])
            u = variables.Variable(u_init, name=u_name)
            v_init = constant_op.constant([2.0, 1.0])
            v = variables.Variable(v_init, name=v_name)

            # Expected output: [0.0, 3.0]
            w = math_ops.sub(u, v, name=w_name)

            # Expected output: [inf, 1.3333]
            x = math_ops.div(u, w, name=x_name)

            # Expected output: [nan, 4.0]
            y = math_ops.mul(w, x, name=y_name)

            z = math_ops.mul(y, y, name=z_name)

            u.initializer.run()
            v.initializer.run()

            run_options = config_pb2.RunOptions(output_partition_graphs=True)
            debug_utils.watch_graph(run_options,
                                    sess.graph,
                                    debug_ops=["DebugIdentity"],
                                    debug_urls=self._debug_urls())

            run_metadata = config_pb2.RunMetadata()
            sess.run(z, options=run_options, run_metadata=run_metadata)

            self.assertEqual(self._expected_partition_graph_count,
                             len(run_metadata.partition_graphs))

            dump = debug_data.DebugDumpDir(
                self._dump_root,
                partition_graphs=run_metadata.partition_graphs)

            def has_bad_value(_, tensor):
                return np.any(np.isnan(tensor)) or np.any(np.isinf(tensor))

            # Find all "offending tensors".
            bad_data = dump.find(has_bad_value)

            # Verify that the nodes with bad values are caught through running find
            # on the debug dump.
            self.assertEqual(3, len(bad_data))
            self.assertEqual(x_name, bad_data[0].node_name)
            self.assertEqual(y_name, bad_data[1].node_name)
            self.assertEqual(z_name, bad_data[2].node_name)

            # Test first_n kwarg of find(): Find the first offending tensor.
            first_bad_datum = dump.find(has_bad_value, first_n=1)

            self.assertEqual(1, len(first_bad_datum))
            self.assertEqual(x_name, first_bad_datum[0].node_name)
Esempio n. 10
0
    def testDumpGraphStructureLookup(self):
        # TODO(cais): Separate this test into multiple test methods.

        with session.Session() as sess:
            u_name = "testDumpGraphStructureLookup/u"
            v_name = "testDumpGraphStructureLookup/v"
            w_name = "testDumpGraphStructureLookup/w"

            u_init = constant_op.constant([2.0, 4.0])
            u = variables.Variable(u_init, name=u_name)
            v = math_ops.add(u, u, name=v_name)
            w = math_ops.add(v, v, name=w_name)

            u.initializer.run()

            run_options = config_pb2.RunOptions(output_partition_graphs=True)
            debug_utils.watch_graph(run_options,
                                    sess.graph,
                                    debug_ops=["DebugIdentity"],
                                    debug_urls=self._debug_urls())

            run_metadata = config_pb2.RunMetadata()
            sess.run(w, options=run_options, run_metadata=run_metadata)

            self.assertEqual(self._expected_partition_graph_count,
                             len(run_metadata.partition_graphs))
            dump = debug_data.DebugDumpDir(
                self._dump_root,
                partition_graphs=run_metadata.partition_graphs)

            u_read_name = u_name + "/read"

            # Test node name list lookup of the DebugDumpDir object.
            node_names = dump.nodes()
            self.assertTrue(u_name in node_names)
            self.assertTrue(u_read_name in node_names)

            # Test querying node attributes.
            u_attr = dump.node_attributes(u_name)
            self.assertEqual(dtypes.float32, u_attr["dtype"].type)
            self.assertEqual(1, len(u_attr["shape"].shape.dim))
            self.assertEqual(2, u_attr["shape"].shape.dim[0].size)

            with self.assertRaisesRegexp(ValueError,
                                         "No node named \"foo\" exists"):
                dump.node_attributes("foo")

            # Test querying the debug watch keys with node names.
            self.assertEqual(["%s:0:DebugIdentity" % u_name],
                             dump.debug_watch_keys(u_name))
            self.assertEqual(["%s:0:DebugIdentity" % v_name],
                             dump.debug_watch_keys(v_name))
            self.assertEqual(["%s:0:DebugIdentity" % w_name],
                             dump.debug_watch_keys(w_name))
            self.assertEqual([], dump.debug_watch_keys("foo"))

            # Test querying debug datum instances from debug watch.
            u_data = dump.watch_key_to_data(dump.debug_watch_keys(u_name)[0])
            self.assertEqual(1, len(u_data))
            self.assertEqual(u_name, u_data[0].node_name)
            self.assertEqual(0, u_data[0].output_slot)
            self.assertEqual("DebugIdentity", u_data[0].debug_op)
            self.assertGreaterEqual(u_data[0].timestamp, 0)

            self.assertEqual([], dump.watch_key_to_data("foo"))

            # Test the inputs lookup of the DebugDumpDir object.
            self.assertEqual([], dump.node_inputs(u_name))
            self.assertEqual([u_name], dump.node_inputs(u_read_name))
            self.assertEqual([u_read_name] * 2, dump.node_inputs(v_name))
            self.assertEqual([v_name] * 2, dump.node_inputs(w_name))

            self.assertEqual([], dump.node_inputs(u_name, is_control=True))
            self.assertEqual([], dump.node_inputs(u_read_name,
                                                  is_control=True))
            self.assertEqual([], dump.node_inputs(v_name, is_control=True))
            self.assertEqual([], dump.node_inputs(w_name, is_control=True))

            # Test the outputs recipient lookup of the DebugDumpDir object.
            self.assertTrue(u_read_name in dump.node_recipients(u_name))
            self.assertEqual(2,
                             dump.node_recipients(u_read_name).count(v_name))
            self.assertEqual(2, dump.node_recipients(v_name).count(w_name))

            self.assertEqual([], dump.node_recipients(u_name, is_control=True))
            self.assertEqual([],
                             dump.node_recipients(u_read_name,
                                                  is_control=True))
            self.assertEqual([], dump.node_recipients(v_name, is_control=True))
            self.assertEqual([], dump.node_recipients(w_name, is_control=True))

            # Test errors raised on invalid node names.
            with self.assertRaisesRegexp(ValueError,
                                         "does not exist in partition graphs"):
                dump.node_inputs(u_name + "foo")

            with self.assertRaisesRegexp(ValueError,
                                         "does not exist in partition graphs"):
                dump.node_recipients(u_name + "foo")

            # Test transitive_inputs().
            self.assertEqual([], dump.transitive_inputs(u_name))
            self.assertEqual([u_name], dump.transitive_inputs(u_read_name))
            self.assertEqual(set([u_name, u_read_name]),
                             set(dump.transitive_inputs(v_name)))
            self.assertEqual(set([u_name, u_read_name, v_name]),
                             set(dump.transitive_inputs(w_name)))

            with self.assertRaisesRegexp(ValueError,
                                         "does not exist in partition graphs"):
                dump.transitive_inputs(u_name + "foo")

            # Test num_devices().
            self.assertEqual(self._expected_num_devices, len(dump.devices()))

            # Test node_device().
            self.assertEqual(self._main_device, dump.node_device(u_name))

            with self.assertRaisesRegexp(ValueError,
                                         "does not exist in partition graphs"):
                dump.node_device(u_name + "foo")

            # Test node_exists().
            self.assertTrue(dump.node_exists(u_name))
            self.assertTrue(dump.node_exists(u_name + "/read"))
            self.assertFalse(dump.node_exists(u_name + "/read" + "/foo"))

            # Test node_op_type().
            self.assertEqual("Variable", dump.node_op_type(u_name))
            self.assertEqual("Identity", dump.node_op_type(u_name + "/read"))
            self.assertEqual("Add", dump.node_op_type(v_name))
            self.assertEqual("Add", dump.node_op_type(w_name))

            with self.assertRaisesRegexp(ValueError,
                                         "does not exist in partition graphs"):
                dump.node_op_type(u_name + "foo")
Esempio n. 11
0
    def testDumpToFileWhileLoop(self):
        with session.Session() as sess:
            num_iter = 10

            # "u" is the Variable being updated in the loop.
            u_name = "testDumpToFileWhileLoop/u"
            u_namespace = u_name.split("/")[0]

            u_init_val = np.array(11.0)
            u_init = constant_op.constant(u_init_val)
            u = variables.Variable(u_init, name=u_name)

            # "v" is the increment.
            v_name = "testDumpToFileWhileLoop/v"
            v_namespace = v_name.split("/")[0]

            v_init_val = np.array(2.0)
            v_init = constant_op.constant(v_init_val)
            v = variables.Variable(v_init, name=v_name)

            u.initializer.run()
            v.initializer.run()

            i = constant_op.constant(0, name="testDumpToFileWhileLoop/i")

            def cond(i):
                return math_ops.less(i, num_iter)

            def body(i):
                new_u = state_ops.assign_add(u, v)
                new_i = math_ops.add(i, 1)
                op = control_flow_ops.group(new_u)
                new_i = control_flow_ops.with_dependencies([op], new_i)
                return [new_i]

            loop = control_flow_ops.while_loop(cond,
                                               body, [i],
                                               parallel_iterations=1)

            # Create RunOptions for debug-watching tensors
            run_options = config_pb2.RunOptions(output_partition_graphs=True)
            debug_urls = self._debug_urls()

            # Add debug tensor watch for u.
            debug_utils.add_debug_tensor_watch(run_options,
                                               u_name,
                                               0,
                                               debug_urls=debug_urls)
            # Add debug tensor watch for v.
            debug_utils.add_debug_tensor_watch(run_options,
                                               "%s/read" % v_name,
                                               0,
                                               debug_urls=debug_urls)
            # Add debug tensor watch for while/Identity.
            debug_utils.add_debug_tensor_watch(run_options,
                                               "while/Identity",
                                               0,
                                               debug_urls=debug_urls)
            # Add debug tensor watch for while/Add/y.
            debug_utils.add_debug_tensor_watch(run_options,
                                               "while/Add/y",
                                               0,
                                               debug_urls=debug_urls)

            run_metadata = config_pb2.RunMetadata()
            r = sess.run(loop, options=run_options, run_metadata=run_metadata)

            self.assertEqual(self._expected_partition_graph_count,
                             len(run_metadata.partition_graphs))

            self.assertEqual(num_iter, r)

            u_val_final = sess.run(u)
            self.assertAllClose(u_init_val + num_iter * v_init_val,
                                u_val_final)

            # Verify dump files
            self.assertTrue(os.path.isdir(self._dump_root))

            self.assertTrue(
                os.path.isdir(os.path.join(self._dump_root, u_namespace)))
            self.assertTrue(
                os.path.isdir(os.path.join(self._dump_root, v_namespace, "v")))

            dump = debug_data.DebugDumpDir(
                self._dump_root,
                partition_graphs=run_metadata.partition_graphs)

            # Expected dumped tensors: u, v/read, 10 iterations of while/Identity,
            # and 10 iterations of while/Add/y.
            self.assertEqual(1 + 1 + num_iter + num_iter, dump.size)

            # Verify tensor values.
            self.assertAllClose([u_init_val],
                                dump.get_tensors(u_name, 0, "DebugIdentity"))
            self.assertAllClose([v_init_val],
                                dump.get_tensors("%s/read" % v_name, 0,
                                                 "DebugIdentity"))

            while_id_tensors = dump.get_tensors("while/Identity", 0,
                                                "DebugIdentity")
            self.assertEqual(10, len(while_id_tensors))
            for k in xrange(len(while_id_tensors)):
                self.assertAllClose(np.array(k), while_id_tensors[k])

            # Verify ascending timestamps from the while loops.
            while_id_rel_timestamps = dump.get_rel_timestamps(
                "while/Identity", 0, "DebugIdentity")
            self.assertEqual(10, len(while_id_rel_timestamps))
            prev_rel_time = 0
            for rel_time in while_id_rel_timestamps:
                self.assertGreaterEqual(rel_time, prev_rel_time)
                prev_rel_time = rel_time

            # Test querying debug watch keys from node name.
            watch_keys = dump.debug_watch_keys("while/Identity")
            self.assertEqual(["while/Identity:0:DebugIdentity"], watch_keys)

            # Test querying debug datum instances from debug watch key.
            self.assertEqual(10, len(dump.watch_key_to_data(watch_keys[0])))
            self.assertEqual([], dump.watch_key_to_data("foo"))
Esempio n. 12
0
    def testDumpStringTensorsToFileSystem(self):
        with session.Session() as sess:
            str1_init_val = np.array(b"abc")
            str2_init_val = np.array(b"def")

            str1_init = constant_op.constant(str1_init_val)
            str2_init = constant_op.constant(str2_init_val)

            str1_name = "str1"
            str2_name = "str2"
            str1 = variables.Variable(str1_init, name=str1_name)
            str2 = variables.Variable(str2_init, name=str2_name)
            # Concatenate str1 and str2
            str_concat = math_ops.add(str1, str2, name="str_concat")

            str1.initializer.run()
            str2.initializer.run()

            run_options = config_pb2.RunOptions(output_partition_graphs=True)
            debug_urls = self._debug_urls()

            # Add debug tensor watch for u.
            debug_utils.add_debug_tensor_watch(run_options,
                                               "%s/read" % str1_name,
                                               0,
                                               debug_urls=debug_urls)
            # Add debug tensor watch for v.
            debug_utils.add_debug_tensor_watch(run_options,
                                               "%s/read" % str2_name,
                                               0,
                                               debug_urls=debug_urls)

            run_metadata = config_pb2.RunMetadata()
            sess.run(str_concat,
                     options=run_options,
                     run_metadata=run_metadata)

            # String ops are located on CPU.
            self.assertEqual(1, len(run_metadata.partition_graphs))

            dump = debug_data.DebugDumpDir(
                self._dump_root,
                partition_graphs=run_metadata.partition_graphs)

            self.assertIn(str1_name, dump.nodes())
            self.assertIn(str2_name, dump.nodes())

            self.assertEqual(2, dump.size)

            self.assertEqual([str1_init_val],
                             dump.get_tensors("%s/read" % str1_name, 0,
                                              "DebugIdentity"))
            self.assertEqual([str2_init_val],
                             dump.get_tensors("%s/read" % str2_name, 0,
                                              "DebugIdentity"))

            self.assertGreaterEqual(
                dump.get_rel_timestamps("%s/read" % str1_name, 0,
                                        "DebugIdentity")[0], 0)
            self.assertGreaterEqual(
                dump.get_rel_timestamps("%s/read" % str2_name, 0,
                                        "DebugIdentity")[0], 0)
Esempio n. 13
0
def main():
    mkdir_('./tmp/')
    mkdir_('./weights/')
    mkdir_('./data/')
    zipfile.ZipFile('data.zip').extractall('./data')

    width, height = 500, 500
    '''
    Batch  : 한번에 training 할 영상 개수   []
    lr     : 학습에 사용될 learning rate 
    k      : convolution kernel의 사이즈
    block  : network model에 사용되는 block의 개수
    ch     : 각 layer의 channel 개수
    patch  : 하나의 큰 영상에서 여러 작은 조각 영상을 가져올때 사용되는 조각의 크기
    Epo    : 학습에 사용된 data를 몇 번 학습할지 설정
    '''
    batch = 32
    lr = 1e-4
    k = 3
    block = 3
    ch = 64
    patch = 64
    Epo = 200

    # MSE, MAE, CHA 선택 가능
    loss_type = 'MSE'

    NumImg = len(os.listdir('./data'))
    NumValid = int(0.2 * NumImg // 2)
    NumTrain = int(0.8 * NumImg // 2)

    tot, scat = [], []
    for i in range(NumImg // 2):
        img = imgload('./data/input%4.4d.raw' % i)
        lab = imgload('./data/label%4.4d.raw' % i)

        v_max = np.max(img)
        v_min = np.min(img)

        img, _, _ = MinMaxNorm(img, v_max, v_min, [width, height])
        lab, _, _ = MinMaxNorm(lab, v_max, v_min, [width, height])

        tot.extend(patch_ext(img, patch))
        scat.extend(patch_ext(lab, patch))

    NumTrain = int(0.8 * np.shape(tot)[0])
    NumValid = int(0.2 * np.shape(tot)[0])
    idx_train, idx_valid = [], []

    for i in range(NumTrain // batch):
        idx_train.append(i)
    for i in range(NumValid // batch):
        idx_valid.append(NumTrain // batch + i)

    x = tf.placeholder(tf.float32, shape=[batch, patch, patch])
    y = tf.placeholder(tf.float32, shape=[batch, patch, patch])

    x_ = tf.reshape(x, [batch, patch, patch, 1])
    y_ = tf.reshape(y, [batch, patch, patch, 1])

    ######### Network model #########

    net = Network()
    tensor = net.conv2d(x_, [k, k, ch], 'LReLU')

    tensor1 = tensor

    for i in range(block):
        tensor_ = net.conv2d(tensor, [k, k, ch], 'LReLU')
        tensor = net.conv2d(tensor_, [k, k, ch], 'LReLU')
        tensor = net.conv2d(tensor, [k, k, ch], 'LReLU')
        tensor = net.skip_connect(tensor, tensor_)

    tensor = net.skip_connect(tensor, tensor1)

    output = net.conv2d(tensor, [k, k, 1], 'Linear')

    #################################

    tot_loss = net.loss_cal(output, y_, loss_type)

    opt = tf.train.AdamOptimizer(lr).minimize(tot_loss)
    saver = tf.train.Saver(tf.global_variables())

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True

    with tf.Session(config=config) as sess:
        init = tf.global_variables_initializer()
        sess.run(init)

        start = time.time()
        mkdir_('./weights/error')
        for epoch in range(Epo):
            shuffle(idx_train)
            shuffle(idx_valid)
            E_t = open('./weights/error/E_t' + str(epoch) + '.txt', 'w')
            for idx in range(NumTrain // batch):
                Input = tot[idx_train[idx] * batch:idx_train[idx] * batch +
                            batch][:][:]
                Label = scat[idx_train[idx] * batch:idx_train[idx] * batch +
                             batch][:][:]
                _, l = sess.run([opt, tot_loss],
                                feed_dict={
                                    x: Input,
                                    y: Label
                                },
                                options=config_pb2.RunOptions(
                                    report_tensor_allocations_upon_oom=True))
                e = "%0.8f\n" % l
                E_t.write(e)
                end = time.time()
                print("[Epoch %2d (%6d/%d)] loss %.7f\t %.2f sec" %
                      (epoch, idx, NumTrain // batch, l, end - start))
            E_t.close()

            E_v = open('./weights/error/E_v' + str(epoch) + '.txt', 'w')
            for idx in range(NumValid // batch):
                Input = tot[idx_valid[idx] * batch:idx_valid[idx] * batch +
                            batch][:][:]
                Label = scat[idx_valid[idx] * batch:idx_valid[idx] * batch +
                             batch][:][:]
                l = sess.run(tot_loss, feed_dict={x: Input, y: Label})
                e = "%0.8f\n" % l
                E_v.write(e)
                end = time.time()
                print("[Epoch %2d (%6d/%d)] loss %.7f\t %.2f sec" %
                      (epoch, idx, NumValid // batch, l, end - start))
            E_v.close()
            saver.save(sess, './tmp/weights.ckpt')
            if os.path.isdir('./weights/' + str(epoch + 1)):
                shutil.rmtree('./weights/' + str(epoch + 1))
            shutil.copytree('./tmp', './weights/' + str(epoch + 1))
        sess.close()
    tf.reset_default_graph()
    cuda.select_device(0)
    cuda.close()
Esempio n. 14
0
  def testMinOption(self):
    ops.reset_default_graph()

    def check_min(nodes, mm=0, mam=0, mcm=0, mb=0, mpb=0, mrb=0, mob=0):
      for n in nodes:
        if mm > 0:
          self.assertGreaterEqual(n.exec_micros, mm)
        if mam > 0:
          self.assertGreaterEqual(n.accelerator_exec_micros, mam)
        if mcm > 0:
          self.assertGreaterEqual(n.cpu_exec_micros, mcm)
        if mb > 0:
          self.assertGreaterEqual(n.requested_bytes, mb)
        if mpb > 0:
          self.assertGreaterEqual(n.peak_bytes, mpb)
        if mrb > 0:
          self.assertGreaterEqual(n.residual_bytes, mrb)
        if mob > 0:
          self.assertGreaterEqual(n.output_bytes, mob)
        check_min(n.children, mm, mam, mcm, mb, mpb, mrb, mob)

    with session.Session(config=self._no_rewrite_session_config()) as sess:
      x = lib.BuildSmallModel()
      self.evaluate(variables.global_variables_initializer())
      run_meta = config_pb2.RunMetadata()
      _ = sess.run(x,
                   options=config_pb2.RunOptions(
                       trace_level=config_pb2.RunOptions.FULL_TRACE),
                   run_metadata=run_meta)

      min_val = random.randint(0, 10000)

      opts = builder(builder.time_and_memory(min_micros=min_val)
                    ).with_empty_output().build()
      tfprof_node = model_analyzer.profile(
          sess.graph, run_meta=run_meta, options=opts)
      check_min(tfprof_node.children, mm=min_val)

      opts = builder(builder.time_and_memory(min_accelerator_micros=min_val)
                    ).with_empty_output().build()
      tfprof_node = model_analyzer.profile(
          sess.graph, run_meta=run_meta, options=opts)
      check_min(tfprof_node.children, mam=min_val)

      opts = builder(builder.time_and_memory(min_cpu_micros=min_val)
                    ).with_empty_output().build()
      tfprof_node = model_analyzer.profile(
          sess.graph, run_meta=run_meta, options=opts)
      check_min(tfprof_node.children, mcm=min_val)

      opts = builder(builder.time_and_memory(min_bytes=min_val)
                    ).with_empty_output().build()
      tfprof_node = model_analyzer.profile(
          sess.graph, run_meta=run_meta, options=opts)
      check_min(tfprof_node.children, mb=min_val)

      opts = builder(builder.time_and_memory(min_peak_bytes=min_val)
                    ).with_empty_output().build()
      tfprof_node = model_analyzer.profile(
          sess.graph, run_meta=run_meta, options=opts)
      check_min(tfprof_node.children, mpb=min_val)

      opts = builder(builder.time_and_memory(min_residual_bytes=min_val)
                    ).with_empty_output().build()
      tfprof_node = model_analyzer.profile(
          sess.graph, run_meta=run_meta, options=opts)
      check_min(tfprof_node.children, mrb=min_val)

      opts = builder(builder.time_and_memory(min_output_bytes=min_val)
                    ).with_empty_output().build()
      tfprof_node = model_analyzer.profile(
          sess.graph, run_meta=run_meta, options=opts)
      check_min(tfprof_node.children, mob=min_val)
Esempio n. 15
0
    def testWatchingUnconnectedOutputTensor(self):
        """Watch an output slot not emitting any edges.

    (Not even control edges from the node.)
    """

        with session.Session() as sess:
            x_init = constant_op.constant([2, 2, 3, 5, 5])
            x = variables.Variable(x_init, name="unconnected/x")

            # The UniqueOp (tf.unique) has two output slots. Use only slot 0 in the
            # graph. Let the debugger watch the unused slot 1.
            unique_x, _ = tf.unique(x, name="unconnected/unique_x")
            y = tf.add(unique_x, [0, 1, 2], name="unconnected/y")

            x.initializer.run()

            # Verify that only slot 0 of unique_x has recipients, while slot 1 of the
            # same node does not have recipients.
            unique_x_slot_0_recipients = []
            unique_x_slot_1_recipients = []
            for op in sess.graph.get_operations():
                for inp in op.inputs:
                    if inp.name == "unconnected/unique_x:0":
                        unique_x_slot_0_recipients.append(op.name)
                    elif inp.name == "unconnected/unique_x:1":
                        unique_x_slot_1_recipients.append(op.name)

            self.assertEqual(["unconnected/y"], unique_x_slot_0_recipients)
            self.assertEqual([], unique_x_slot_1_recipients)

            run_options = config_pb2.RunOptions(output_partition_graphs=True)
            debug_utils.watch_graph(run_options,
                                    sess.graph,
                                    debug_ops=["DebugIdentity"],
                                    debug_urls=self._debug_urls())

            run_metadata = config_pb2.RunMetadata()
            result = sess.run(y,
                              options=run_options,
                              run_metadata=run_metadata)
            self.assertAllClose([2, 4, 7], result)

            dump = debug_data.DebugDumpDir(
                self._dump_root,
                partition_graphs=run_metadata.partition_graphs)

            # Assert that the connected slot (slot 0) is dumped properly.
            unique_x_slot_0_dumps = dump.watch_key_to_data(
                "unconnected/unique_x:0:DebugIdentity")
            self.assertEqual(1, len(unique_x_slot_0_dumps))
            self.assertEqual("unconnected/unique_x",
                             unique_x_slot_0_dumps[0].node_name)
            self.assertEqual(0, unique_x_slot_0_dumps[0].output_slot)
            self.assertAllClose([2, 3, 5],
                                unique_x_slot_0_dumps[0].get_tensor())

            # Assert that the unconnected slot (slot 1) is dumped properly.
            unique_x_slot_1_dumps = dump.watch_key_to_data(
                "unconnected/unique_x:1:DebugIdentity")
            self.assertEqual(1, len(unique_x_slot_1_dumps))
            self.assertEqual("unconnected/unique_x",
                             unique_x_slot_1_dumps[0].node_name)
            self.assertEqual(1, unique_x_slot_1_dumps[0].output_slot)
            self.assertAllClose([0, 0, 1, 2, 2],
                                unique_x_slot_1_dumps[0].get_tensor())
Esempio n. 16
0
    def run_op_benchmark(self,
                         sess,
                         op_or_tensor,
                         feed_dict=None,
                         burn_iters=2,
                         min_iters=10,
                         store_trace=False,
                         store_memory_usage=True,
                         name=None,
                         extras=None,
                         mbs=0):
        """Run an op or tensor in the given session.  Report the results.

    Args:
      sess: `Session` object to use for timing.
      op_or_tensor: `Operation` or `Tensor` to benchmark.
      feed_dict: A `dict` of values to feed for each op iteration (see the
        `feed_dict` parameter of `Session.run`).
      burn_iters: Number of burn-in iterations to run.
      min_iters: Minimum number of iterations to use for timing.
      store_trace: Boolean, whether to run an extra untimed iteration and
        store the trace of iteration in returned extras.
        The trace will be stored as a string in Google Chrome trace format
        in the extras field "full_trace_chrome_format". Note that trace
        will not be stored in test_log_pb2.TestResults proto.
      store_memory_usage: Boolean, whether to run an extra untimed iteration,
        calculate memory usage, and store that in extras fields.
      name: (optional) Override the BenchmarkEntry name with `name`.
        Otherwise it is inferred from the top-level method name.
      extras: (optional) Dict mapping string keys to additional benchmark info.
        Values may be either floats or values that are convertible to strings.
      mbs: (optional) The number of megabytes moved by this op, used to
        calculate the ops throughput.

    Returns:
      A `dict` containing the key-value pairs that were passed to
      `report_benchmark`. If `store_trace` option is used, then
      `full_chrome_trace_format` will be included in return dictionary even
      though it is not passed to `report_benchmark` with `extras`.
    """
        for _ in range(burn_iters):
            sess.run(op_or_tensor, feed_dict=feed_dict)

        deltas = [None] * min_iters

        for i in range(min_iters):
            start_time = time.time()
            sess.run(op_or_tensor, feed_dict=feed_dict)
            end_time = time.time()
            delta = end_time - start_time
            deltas[i] = delta

        extras = extras if extras is not None else {}
        unreported_extras = {}
        if store_trace or store_memory_usage:
            run_options = config_pb2.RunOptions(
                trace_level=config_pb2.RunOptions.FULL_TRACE)
            run_metadata = config_pb2.RunMetadata()
            sess.run(op_or_tensor,
                     feed_dict=feed_dict,
                     options=run_options,
                     run_metadata=run_metadata)
            tl = timeline.Timeline(run_metadata.step_stats)

            if store_trace:
                unreported_extras["full_trace_chrome_format"] = (
                    tl.generate_chrome_trace_format())

            if store_memory_usage:
                step_stats_analysis = tl.analyze_step_stats(show_memory=True)
                allocator_maximums = step_stats_analysis.allocator_maximums
                for k, v in allocator_maximums.items():
                    extras["allocator_maximum_num_bytes_%s" % k] = v.num_bytes

        def _median(x):
            if not x:
                return -1
            s = sorted(x)
            l = len(x)
            lm1 = l - 1
            return (s[l // 2] + s[lm1 // 2]) / 2.0

        median_delta = _median(deltas)

        benchmark_values = {
            "iters": min_iters,
            "wall_time": median_delta,
            "extras": extras,
            "name": name,
            "throughput": mbs / median_delta
        }
        self.report_benchmark(**benchmark_values)
        benchmark_values["extras"].update(unreported_extras)
        return benchmark_values
Esempio n. 17
0
    def testDumpToFileOverlappingParentDir(self):
        with session.Session() as sess:
            u_init_val = np.array([[5.0, 3.0], [-1.0, 0.0]])
            v_init_val = np.array([[2.0], [-1.0]])

            # Use node names with overlapping namespace (i.e., parent directory) to
            # test concurrent, non-racing directory creation.
            u_name = "testDumpToFile/u"
            v_name = "testDumpToFile/v"

            u_init = constant_op.constant(u_init_val, shape=[2, 2])
            u = variables.Variable(u_init, name=u_name)
            v_init = constant_op.constant(v_init_val, shape=[2, 1])
            v = variables.Variable(v_init, name=v_name)

            w = math_ops.matmul(u, v, name="testDumpToFile/matmul")

            u.initializer.run()
            v.initializer.run()

            run_options = config_pb2.RunOptions(output_partition_graphs=True)
            debug_urls = "file://%s" % self._dump_root

            # Add debug tensor watch for u.
            debug_utils.add_debug_tensor_watch(run_options,
                                               "%s/read" % u_name,
                                               0,
                                               debug_urls=debug_urls)
            # Add debug tensor watch for v.
            debug_utils.add_debug_tensor_watch(run_options,
                                               "%s/read" % v_name,
                                               0,
                                               debug_urls=debug_urls)

            run_metadata = config_pb2.RunMetadata()

            # Invoke Session.run().
            sess.run(w, options=run_options, run_metadata=run_metadata)

            self.assertEqual(self._expected_partition_graph_count,
                             len(run_metadata.partition_graphs))

            dump = debug_data.DebugDumpDir(
                self._dump_root,
                partition_graphs=run_metadata.partition_graphs)
            self.assertTrue(dump.loaded_partition_graphs())

            # Verify the dumped tensor values for u and v.
            self.assertEqual(2, dump.size)

            self.assertAllClose([u_init_val],
                                dump.get_tensors("%s/read" % u_name, 0,
                                                 "DebugIdentity"))
            self.assertAllClose([v_init_val],
                                dump.get_tensors("%s/read" % v_name, 0,
                                                 "DebugIdentity"))

            self.assertGreaterEqual(
                dump.get_rel_timestamps("%s/read" % u_name, 0,
                                        "DebugIdentity")[0], 0)
            self.assertGreaterEqual(
                dump.get_rel_timestamps("%s/read" % v_name, 0,
                                        "DebugIdentity")[0], 0)
Esempio n. 18
0
    def run_op_benchmark(self,
                         sess,
                         op_or_tensor,
                         feed_dict=None,
                         burn_iters=2,
                         min_iters=10,
                         store_trace=False,
                         name=None,
                         extras=None,
                         mbs=0):
        """Run an op or output in the given session.  Report the results.

    Args:
      sess: `Session` object to use for timing.
      op_or_tensor: `Operation` or `Output` to benchmark.
      feed_dict: A `dict` of values to feed for each op iteration (see the
        `feed_dict` parameter of `Session.run`).
      burn_iters: Number of burn-in iterations to run.
      min_iters: Minimum number of iterations to use for timing.
      store_trace: Boolean, whether to run an extra untimed iteration and
        store the trace of iteration in the benchmark report.
        The trace will be stored as a string in Google Chrome trace format
        in the extras field "full_trace_chrome_format".
      name: (optional) Override the BenchmarkEntry name with `name`.
        Otherwise it is inferred from the top-level method name.
      extras: (optional) Dict mapping string keys to additional benchmark info.
        Values may be either floats or values that are convertible to strings.
      mbs: (optional) The number of megabytes moved by this op, used to
        calculate the ops throughput.

    Returns:
      A `dict` containing the key-value pairs that were passed to
      `report_benchmark`.
    """
        for _ in range(burn_iters):
            sess.run(op_or_tensor, feed_dict=feed_dict)

        deltas = [None] * min_iters

        for i in range(min_iters):
            start_time = time.time()
            sess.run(op_or_tensor, feed_dict=feed_dict)
            end_time = time.time()
            delta = end_time - start_time
            deltas[i] = delta

        extras = extras if extras is not None else {}
        if store_trace:
            run_options = config_pb2.RunOptions(
                trace_level=config_pb2.RunOptions.FULL_TRACE)
            run_metadata = config_pb2.RunMetadata()
            sess.run(op_or_tensor,
                     feed_dict=feed_dict,
                     options=run_options,
                     run_metadata=run_metadata)
            tl = timeline.Timeline(run_metadata.step_stats)
            extras[
                "full_trace_chrome_format"] = tl.generate_chrome_trace_format(
                )

        def _median(x):
            if not x:
                return -1
            s = sorted(x)
            l = len(x)
            lm1 = l - 1
            return (s[l // 2] + s[lm1 // 2]) / 2.0

        median_delta = _median(deltas)

        benchmark_values = {
            "iters": min_iters,
            "wall_time": median_delta,
            "extras": extras,
            "name": name,
            "throughput": mbs / median_delta
        }
        self.report_benchmark(**benchmark_values)
        return benchmark_values
Esempio n. 19
0
    def _run_with_debugging(self, run_start_resp, fetches, feed_dict, options,
                            run_metadata, callable_runner,
                            callable_runner_args, callable_options):
        """Perform a session.run() or callable with debugging."""
        # Decorate RunOption to fill in debugger tensor watch specifications.
        decorated_run_options = None
        if callable_options:
            callable_options_id = id(callable_options)
            if callable_options_id not in self._cached_callables_from_options:
                # Make a copy of callable_options to avoid mutating it.
                new_callable_options = config_pb2.CallableOptions()
                new_callable_options.CopyFrom(callable_options)
                decorated_run_options = new_callable_options.run_options
        else:
            decorated_run_options = options or config_pb2.RunOptions()

        run_metadata = run_metadata or config_pb2.RunMetadata()

        if decorated_run_options:
            self._decorate_run_options_for_debug(
                decorated_run_options,
                run_start_resp.debug_urls,
                debug_ops=run_start_resp.debug_ops,
                node_name_regex_whitelist=(
                    run_start_resp.node_name_regex_whitelist),
                op_type_regex_whitelist=run_start_resp.op_type_regex_whitelist,
                tensor_dtype_regex_whitelist=(
                    run_start_resp.tensor_dtype_regex_whitelist),
                tolerate_debug_op_creation_failures=(
                    run_start_resp.tolerate_debug_op_creation_failures))

        # Invoke the run() method of the wrapped Session. Catch any TensorFlow
        # runtime errors.
        tf_error = None
        try:
            if callable_runner:
                retvals = callable_runner(*callable_runner_args,
                                          options=decorated_run_options,
                                          run_metadata=run_metadata)
            elif callable_options:
                # pylint:disable=protected-access
                if callable_options_id in self._cached_callables_from_options:
                    callable_object = self._cached_callables_from_options[
                        callable_options_id]
                else:
                    callable_object = self._sess._make_callable_from_options(
                        new_callable_options)
                    self._cached_callables_from_options[
                        callable_options_id] = callable_object
                # pylint:enable=protected-access
                retvals = callable_object(*callable_runner_args,
                                          run_metadata=run_metadata)
            else:
                retvals = self._sess.run(fetches,
                                         feed_dict=feed_dict,
                                         options=decorated_run_options,
                                         run_metadata=run_metadata)
        except errors.OpError as op_error:
            if self._pass_through_operrors:
                raise op_error
            tf_error = op_error
            retvals = op_error

        return retvals, OnRunEndRequest(
            run_start_resp.action,
            run_metadata=run_metadata,
            client_graph_def=self._sess.graph.as_graph_def(),
            tf_error=tf_error)
    def testToggleBreakpointsWorks(self):
        with session.Session(config=session_debug_testlib.
                             no_rewrite_session_config()) as sess:
            v_1 = variables.VariableV1(50.0, name="v_1")
            v_2 = variables.VariableV1(-50.0, name="v_2")
            delta_1 = constant_op.constant(5.0, name="delta_1")
            delta_2 = constant_op.constant(-5.0, name="delta_2")
            inc_v_1 = state_ops.assign_add(v_1, delta_1, name="inc_v_1")
            inc_v_2 = state_ops.assign_add(v_2, delta_2, name="inc_v_2")

            sess.run([v_1.initializer, v_2.initializer])

            run_metadata = config_pb2.RunMetadata()
            run_options = config_pb2.RunOptions(output_partition_graphs=True)
            debug_utils.watch_graph(
                run_options,
                sess.graph,
                debug_ops=["DebugIdentity(gated_grpc=true)"],
                debug_urls=[self._debug_server_url_1])

            for i in xrange(4):
                self._server_1.clear_data()

                if i in (0, 2):
                    # Enable breakpoint at delta_[1,2]:0:DebugIdentity in runs 0 and 2.
                    self._server_1.request_watch("delta_1",
                                                 0,
                                                 "DebugIdentity",
                                                 breakpoint=True)
                    self._server_1.request_watch("delta_2",
                                                 0,
                                                 "DebugIdentity",
                                                 breakpoint=True)
                else:
                    # Disable the breakpoint in runs 1 and 3.
                    self._server_1.request_unwatch("delta_1", 0,
                                                   "DebugIdentity")
                    self._server_1.request_unwatch("delta_2", 0,
                                                   "DebugIdentity")

                output = sess.run([inc_v_1, inc_v_2],
                                  options=run_options,
                                  run_metadata=run_metadata)
                self.assertAllClose(
                    [50.0 + 5.0 * (i + 1), -50 - 5.0 * (i + 1)], output)

                if i in (0, 2):
                    # During runs 0 and 2, the server should have received the published
                    # debug tensor delta:0:DebugIdentity. The breakpoint should have been
                    # unblocked by EventReply reponses from the server.
                    self.assertAllClose(
                        [5.0], self._server_1.
                        debug_tensor_values["delta_1:0:DebugIdentity"])
                    self.assertAllClose(
                        [-5.0], self._server_1.
                        debug_tensor_values["delta_2:0:DebugIdentity"])
                    # After the runs, the server should have properly registered the
                    # breakpoints due to the request_unwatch calls.
                    self.assertSetEqual(
                        {("delta_1", 0, "DebugIdentity"),
                         ("delta_2", 0, "DebugIdentity")},
                        self._server_1.breakpoints)
                else:
                    # After the end of runs 1 and 3, the server has received the requests
                    # to disable the breakpoint at delta:0:DebugIdentity.
                    self.assertSetEqual(set(), self._server_1.breakpoints)
    def testDistributedRunWithGatedGrpcCommunicatesWithDebugServerCorrectly(
            self):
        graph = self._createGraph()
        with session.Session(config=self.session_config,
                             graph=graph,
                             target=self.server_target) as sess:
            sess.run(self.a.initializer)
            sess.run(self.b.initializer)

            run_options = config_pb2.RunOptions()
            debug_utils.watch_graph(run_options,
                                    sess.graph,
                                    node_name_regex_whitelist=r"a",
                                    debug_ops=["DebugIdentity"],
                                    debug_urls=[self.debug_server_url])

            # Test gated_grpc for an op located on the worker, i.e., on the same
            # host as where MasterSession is.
            # TODO(cais): gRPC gating of debug ops does not work on partition graphs
            # not located on MasterSession hosts (e.g., parameter servers) yet. Make
            # it work.
            debug_utils.watch_graph(
                run_options,
                sess.graph,
                node_name_regex_whitelist=r"p",
                debug_ops=["DebugIdentity(gated_grpc=True)"],
                debug_urls=[self.debug_server_url])

            for i in xrange(4):
                if i % 2 == 0:
                    self.debug_server.request_watch("p", 0, "DebugIdentity")
                else:
                    self.debug_server.request_unwatch("p", 0, "DebugIdentity")

                expected_p = (10.0 + 2.0 * (i + 1)) * (100.0 - 5.0 * (i + 1))
                self.assertAllClose(-expected_p,
                                    sess.run(self.q, options=run_options))

                self.assertEqual(
                    1, len(self.debug_server.core_metadata_json_strings))
                core_metadata = json.loads(
                    self.debug_server.core_metadata_json_strings[0])
                self.assertEqual([], core_metadata["input_names"])
                self.assertEqual(["q:0"], core_metadata["output_names"])
                self.assertEqual(i, core_metadata["executor_step_index"])

                if i == 0:
                    self.assertEqual(
                        1, len(self.debug_server.partition_graph_defs))

                # Tensor "a" is from a PS. It may take longer to arrive due to the fact
                # that the stream connection between the PS and the debug server is
                # persistent and not torn down at the end of each Session.run()
                self._pollingAssertDebugTensorValuesAllClose(
                    [10.0 + 2.0 * i], "a:0:DebugIdentity")

                # Due to the gRPC gating of the debug op for "p", the debug tensor
                # should be available on odd-indexed runs.
                if i % 2 == 0:
                    self.assertAllClose(
                        [expected_p], self.debug_server.
                        debug_tensor_values["p:0:DebugIdentity"])
                else:
                    self.assertNotIn("p:0:DebugIdentity",
                                     self.debug_server.debug_tensor_values)

                self.assertNotIn("b:0:DebugIdentity",
                                 self.debug_server.debug_tensor_values)
                self.debug_server.clear_data()
Esempio n. 22
0
    def testClusterSpecPropagationThreeServersOneCluster(self):
        """Boots 3 servers, ensures appropriate communication across workers.

    Additionally, in this cluster, we ensure the master is not the 0-th worker.

    Note: this test only uses one session.
    """
        server1 = server_lib.Server.create_local_server()
        server2 = server_lib.Server.create_local_server()
        server3 = server_lib.Server.create_local_server()
        cluster_def = cluster_pb2.ClusterDef()
        job = cluster_def.job.add()
        job.name = 'worker'
        job.tasks[0] = server3.target[len('grpc://'):]
        job.tasks[1] = server2.target[len('grpc://'):]
        job.tasks[2] = server1.target[len('grpc://'):]
        config = config_pb2.ConfigProto(cluster_def=cluster_def)

        # Add ops to the devices in non-linear order.

        with ops.device('/job:worker/task:1'):
            feed1 = array_ops.placeholder(dtypes.float32, shape=(2))
            const1 = constant_op.constant(2.0)
            mul1 = const1 * feed1

        with ops.device('/job:worker/task:2'):
            feed2 = array_ops.placeholder(dtypes.float32, shape=(2))
            const2 = constant_op.constant(2.0)
            mul2 = const2 * feed2

        with ops.device('/job:worker/task:0'):
            feed0 = array_ops.placeholder(dtypes.float32, shape=(2))
            const0 = constant_op.constant(2.0)
            mul0 = const0 * feed0

        sum_op = mul0 + mul1 + mul2

        ones = np.ones([2])
        run_options = config_pb2.RunOptions(
            trace_level=config_pb2.RunOptions.FULL_TRACE)
        run_metadata = config_pb2.RunMetadata()

        # Run!
        with session.Session(server1.target, config=config) as sess:
            output = sess.run(sum_op,
                              options=run_options,
                              run_metadata=run_metadata,
                              feed_dict={
                                  feed1: ones,
                                  feed2: ones,
                                  feed0: ones
                              })
            self.assertAllEqual(6 * ones, output)

            self.assertEqual(
                3,
                len([
                    dev_stats.device
                    for dev_stats in run_metadata.step_stats.dev_stats
                    for node_stats in dev_stats.node_stats
                    if '/job:worker/replica:0/task:' in dev_stats.device
                    and node_stats.node_name.startswith('Const')
                ]), run_metadata)
Esempio n. 23
0
def train_step(sess, train_op, global_step, train_step_kwargs):
    start_time = time.time()

    trace_run_options = None
    run_metadata = None
    if 'should_trace' in train_step_kwargs:
        if 'logdir' not in train_step_kwargs:
            raise ValueError(
                'logdir must be present in train_step_kwargs when '
                'should_trace is present')
        if sess.run(train_step_kwargs['should_trace']):
            trace_run_options = config_pb2.RunOptions(
                trace_level=config_pb2.RunOptions.FULL_TRACE)
            run_metadata = config_pb2.RunMetadata()

    np_global_step = sess.run(global_step)

    if np_global_step % FLAGS.update_mask_step == 0:
        K.set_learning_phase(False)

        gdp_mask_taylor = train_step_kwargs['gdp_mask_taylor']
        network_filters = []
        mean_tvs = [np.zeros(g[1].shape[0]) for g in gdp_mask_taylor]

        for i in range(len(gdp_mask_taylor)):
            mask_index = sess.graph.get_tensor_by_name(
                gdp_mask_taylor[i][0].name.replace('mask', 'mask_index'))
            mask_value = sess.graph.get_tensor_by_name(
                gdp_mask_taylor[i][0].name.replace('mask', 'mask_value'))
            mask_update = sess.graph.get_operation_by_name(
                gdp_mask_taylor[i][0].name.replace('mask', 'mask_update')[:-2])
            for j in range(gdp_mask_taylor[i][0].shape[0]):
                sess.run(mask_update, feed_dict={mask_index: j, mask_value: 1})

        for i in range(FLAGS.taylor_step):
            tvs = sess.run([mt[1] for mt in gdp_mask_taylor])
            for g in range(len(gdp_mask_taylor)):
                mean_tvs[g] += tvs[g]

        for g in range(len(gdp_mask_taylor)):
            mean_tvs[g] /= FLAGS.taylor_step

        for t, mt in enumerate(gdp_mask_taylor):
            for i in range(tvs[t].shape[0]):
                ft = dict()
                ft['mask'] = mt[0]
                ft['index'] = i
                ft['tv'] = tvs[t][i]
                network_filters.append(ft)

        network_filters_sorted = sorted(network_filters,
                                        key=lambda a: -a['tv'])

        for i, ft in enumerate(network_filters_sorted):
            mask_index = sess.graph.get_tensor_by_name(ft['mask'].name.replace(
                'mask', 'mask_index'))
            mask_value = sess.graph.get_tensor_by_name(ft['mask'].name.replace(
                'mask', 'mask_value'))
            mask_update = sess.graph.get_operation_by_name(
                ft['mask'].name.replace('mask', 'mask_update')[:-2])

            if i < int(FLAGS.beta * len(network_filters)):
                sess.run(mask_update,
                         feed_dict={
                             mask_index: ft['index'],
                             mask_value: 1
                         })
            else:
                sess.run(mask_update,
                         feed_dict={
                             mask_index: ft['index'],
                             mask_value: 0
                         })

        layer_compression_ratio = tf.get_collection('LAYER_COMPRESSION_RATIO')
        lrc = sess.run(layer_compression_ratio)
        for i in range(len(layer_compression_ratio)):
            print(layer_compression_ratio[i].name[:-2], lrc[i])

        K.set_learning_phase(True)

    total_loss, np_global_step = sess.run([train_op, global_step],
                                          options=trace_run_options,
                                          run_metadata=run_metadata)
    time_elapsed = time.time() - start_time

    if run_metadata is not None:
        tl = timeline.Timeline(run_metadata.step_stats)
        trace = tl.generate_chrome_trace_format()
        trace_filename = os.path.join(train_step_kwargs['logdir'],
                                      'tf_trace-%d.json' % np_global_step)
        logging.info('Writing trace to %s', trace_filename)
        file_io.write_string_to_file(trace_filename, trace)
        if 'summary_writer' in train_step_kwargs:
            train_step_kwargs['summary_writer'].add_run_metadata(
                run_metadata, 'run_metadata-%d' % np_global_step)

    if 'should_log' in train_step_kwargs:
        if sess.run(train_step_kwargs['should_log']):
            logging.info('global step %d: loss = %.4f (%.3f sec/step)',
                         np_global_step, total_loss, time_elapsed)

    if 'should_stop' in train_step_kwargs:
        should_stop = sess.run(train_step_kwargs['should_stop'])
    else:
        should_stop = False

    return total_loss, should_stop
Esempio n. 24
0
  def run(self, fetches, feed_dict=None, options=None, run_metadata=None):
    """Wrapper around Session.run() that inserts tensor watch options.

    Args:
      fetches: Same as the `fetches` arg to regular `Session.run()`.
      feed_dict: Same as the `feed_dict` arg to regular `Session.run()`.
      options: Same as the `options` arg to regular `Session.run()`.
      run_metadata: Same as the `run_metadata` arg to regular `Session.run()`.

    Returns:
      Simply forwards the output of the wrapped `Session.run()` call.

    Raises:
      ValueError: On invalid `OnRunStartAction` value.
    """

    self._run_call_count += 1

    # Invoke on-run-start callback and obtain response.
    run_start_resp = self.on_run_start(
        OnRunStartRequest(fetches, feed_dict, options, run_metadata,
                          self._run_call_count))
    _check_type(run_start_resp, OnRunStartResponse)

    if run_start_resp.action == OnRunStartAction.DEBUG_RUN:
      # Decorate RunOption to fill in debugger tensor watch specifications.
      decorated_run_options = options or config_pb2.RunOptions()
      run_metadata = run_metadata or config_pb2.RunMetadata()

      self._decorate_run_options(
          decorated_run_options,
          run_start_resp.debug_urls,
          debug_ops=run_start_resp.debug_ops,
          node_name_regex_whitelist=run_start_resp.node_name_regex_whitelist,
          op_type_regex_whitelist=run_start_resp.op_type_regex_whitelist)

      # Invoke the run() method of the wrapped Session. Catch any TensorFlow
      # runtime errors.
      tf_error = None
      try:
        retvals = self._sess.run(fetches,
                                 feed_dict=feed_dict,
                                 options=decorated_run_options,
                                 run_metadata=run_metadata)
      except errors.OpError as op_error:
        tf_error = op_error
        retvals = op_error

      run_end_req = OnRunEndRequest(
          run_start_resp.action,
          run_metadata=run_metadata,
          client_graph_def=self._sess.graph.as_graph_def(),
          tf_error=tf_error)

    elif (run_start_resp.action == OnRunStartAction.NON_DEBUG_RUN or
          run_start_resp.action == OnRunStartAction.INVOKE_STEPPER):
      if run_start_resp.action == OnRunStartAction.INVOKE_STEPPER:
        with stepper.NodeStepper(
            self._sess, fetches, feed_dict) as node_stepper:
          retvals = self.invoke_node_stepper(
              node_stepper, restore_variable_values_on_exit=True)

      # Invoke run() method of the wrapped session.
      retvals = self._sess.run(
          fetches,
          feed_dict=feed_dict,
          options=options,
          run_metadata=run_metadata)

      # Prepare arg for the on-run-end callback.
      run_end_req = OnRunEndRequest(run_start_resp.action)
    else:
      raise ValueError(
          "Invalid OnRunStartAction value: %s" % run_start_resp.action)

    # Invoke on-run-end callback and obtain response.
    run_end_resp = self.on_run_end(run_end_req)
    _check_type(run_end_resp, OnRunEndResponse)
    # Currently run_end_resp is only a placeholder. No action is taken on it.

    return retvals
Esempio n. 25
0
    def testAllowsDifferentWatchesOnDifferentRuns(self):
        """Test watching different tensors on different runs of the same graph."""

        with session.Session() as sess:
            u_init_val = [[5.0, 3.0], [-1.0, 0.0]]
            v_init_val = [[2.0], [-1.0]]

            # Use node names with overlapping namespace (i.e., parent directory) to
            # test concurrent, non-racing directory creation.
            u_name = "diff_Watch/u"
            v_name = "diff_Watch/v"

            u_init = constant_op.constant(u_init_val, shape=[2, 2])
            u = variables.Variable(u_init, name=u_name)
            v_init = constant_op.constant(v_init_val, shape=[2, 1])
            v = variables.Variable(v_init, name=v_name)

            w = math_ops.matmul(u, v, name="diff_Watch/matmul")

            u.initializer.run()
            v.initializer.run()

            for i in range(2):
                run_options = config_pb2.RunOptions(
                    output_partition_graphs=True)

                run_dump_root = self._debug_dump_dir(run_number=i)
                debug_urls = self._debug_urls(run_number=i)

                if i == 0:
                    # First debug run: Add debug tensor watch for u.
                    debug_utils.add_debug_tensor_watch(run_options,
                                                       "%s/read" % u_name,
                                                       0,
                                                       debug_urls=debug_urls)
                else:
                    # Second debug run: Add debug tensor watch for v.
                    debug_utils.add_debug_tensor_watch(run_options,
                                                       "%s/read" % v_name,
                                                       0,
                                                       debug_urls=debug_urls)

                run_metadata = config_pb2.RunMetadata()

                # Invoke Session.run().
                sess.run(w, options=run_options, run_metadata=run_metadata)

                self.assertEqual(self._expected_partition_graph_count,
                                 len(run_metadata.partition_graphs))

                dump = debug_data.DebugDumpDir(
                    run_dump_root,
                    partition_graphs=run_metadata.partition_graphs)
                self.assertTrue(dump.loaded_partition_graphs())

                # Each run should have generated only one dumped tensor, not two.
                self.assertEqual(1, dump.size)

                if i == 0:
                    self.assertAllClose([u_init_val],
                                        dump.get_tensors(
                                            "%s/read" % u_name, 0,
                                            "DebugIdentity"))
                    self.assertGreaterEqual(
                        dump.get_rel_timestamps("%s/read" % u_name, 0,
                                                "DebugIdentity")[0], 0)
                else:
                    self.assertAllClose([v_init_val],
                                        dump.get_tensors(
                                            "%s/read" % v_name, 0,
                                            "DebugIdentity"))
                    self.assertGreaterEqual(
                        dump.get_rel_timestamps("%s/read" % v_name, 0,
                                                "DebugIdentity")[0], 0)