def testDebugMakeCallableFromOptionsWithCustomOptionsAndMetadataWorks(self): variable_1 = variables.VariableV1( 10.5, dtype=dtypes.float32, name="variable_1") a = math_ops.add(variable_1, variable_1, "callable_a") math_ops.add(a, a, "callable_b") self.sess.run(variable_1.initializer) wrapped_sess = LocalCLIDebuggerWrapperSessionForTest( [["run"], ["run"]], self.sess, dump_root=self._tmp_dir) callable_options = config_pb2.CallableOptions() callable_options.fetch.append("callable_b") callable_options.run_options.trace_level = config_pb2.RunOptions.FULL_TRACE sess_callable = wrapped_sess._make_callable_from_options(callable_options) run_metadata = config_pb2.RunMetadata() # Call the callable with a custom run_metadata. callable_output = sess_callable(run_metadata=run_metadata) # Verify that step_stats is populated in the custom run_metadata. self.assertTrue(run_metadata.step_stats) self.assertAllClose(np.array(42.0, dtype=np.float32), callable_output[0]) debug_dumps = wrapped_sess.observers["debug_dumps"] self.assertEqual(1, len(debug_dumps)) debug_dump = debug_dumps[0] node_names = [datum.node_name for datum in debug_dump.dumped_tensor_data] self.assertItemsEqual( ["callable_a", "callable_b", "variable_1", "variable_1/read"], node_names)
def testDebuggingMakeCallableFromOptionsWithZeroFeedWorks(self): variable_1 = variables.VariableV1(10.5, dtype=dtypes.float32, name="variable_1") a = math_ops.add(variable_1, variable_1, "callable_a") math_ops.add(a, a, "callable_b") self.sess.run(variable_1.initializer) wrapped_sess = LocalCLIDebuggerWrapperSessionForTest( [["run"]] * 3, self.sess, dump_root=self._tmp_dir) callable_options = config_pb2.CallableOptions() callable_options.fetch.append("callable_b") sess_callable = wrapped_sess._make_callable_from_options( callable_options) for _ in range(2): callable_output = sess_callable() self.assertAllClose(np.array(42.0, dtype=np.float32), callable_output[0]) debug_dumps = wrapped_sess.observers["debug_dumps"] self.assertEqual(2, len(debug_dumps)) for debug_dump in debug_dumps: node_names = [ datum.node_name for datum in debug_dump.dumped_tensor_data ] self.assertItemsEqual( ["callable_a", "callable_b", "variable_1", "variable_1/read"], node_names)
def testDebuggingMakeCallableFromOptionsWithTwoFeedsWorks(self): ph1 = array_ops.placeholder(dtypes.float32, name="callable_ph1") ph2 = array_ops.placeholder(dtypes.float32, name="callable_ph2") a = math_ops.add(ph1, ph2, "callable_a") math_ops.add(a, a, "callable_b") wrapped_sess = LocalCLIDebuggerWrapperSessionForTest( [["run"]] * 3, self.sess, dump_root=self._tmp_dir) callable_options = config_pb2.CallableOptions() callable_options.feed.append("callable_ph1") callable_options.feed.append("callable_ph2") callable_options.fetch.append("callable_b") sess_callable = wrapped_sess._make_callable_from_options(callable_options) ph1_value = np.array(5.0, dtype=np.float32) ph2_value = np.array(16.0, dtype=np.float32) for _ in range(2): callable_output = sess_callable(ph1_value, ph2_value) self.assertAllClose(np.array(42.0, dtype=np.float32), callable_output[0]) debug_dumps = wrapped_sess.observers["debug_dumps"] self.assertEqual(2, len(debug_dumps)) for debug_dump in debug_dumps: node_names = [datum.node_name for datum in debug_dump.dumped_tensor_data] self.assertIn("callable_a", node_names) self.assertIn("callable_b", node_names)
def benchmarkMapAndBatchDense(self): """Measures the performance of parallelized batching.""" shapes = [(), (10, ), (10, 10), (10, 10, 10), (224, 224, 3)] batch_size_values = [1, 32, 64, 128, 1024] shape_placeholder = array_ops.placeholder(dtypes.int64, shape=[None]) batch_size_placeholder = array_ops.placeholder(dtypes.int64, shape=[]) dataset = dataset_ops.Dataset.range(1000000000) dense_value = random_ops.random_normal(shape=shape_placeholder) dataset = dataset.apply( batching.map_and_batch(lambda _: dense_value, batch_size_placeholder)) iterator = dataset.make_initializable_iterator() next_element = iterator.get_next() for shape in shapes: for batch_size in batch_size_values: with session.Session() as sess: sess.run(iterator.initializer, feed_dict={ shape_placeholder: shape, batch_size_placeholder: batch_size }) # Use a C++ callable to minimize the Python overhead in the benchmark. callable_opts = config_pb2.CallableOptions() callable_opts.target.append(next_element.op.name) op_callable = sess._make_callable_from_options( callable_opts) # pylint: disable=protected-access # Run five steps to warm up the session caches before taking the # first measurement. for _ in range(5): op_callable() deltas = [] overall_start = time.time() # Run at least five repetitions and for at least five seconds. while len(deltas) < 5 or time.time() - overall_start < 5.0: start = time.time() for _ in range(100): op_callable() end = time.time() deltas.append(end - start) del op_callable median_wall_time = np.median(deltas) / 100.0 iters = len(deltas) * 100 print("Map and batch dense dataset shape: %r batch_size: %d " "wall time: %f (%d iters)" % (shape, batch_size, median_wall_time, iters)) self.report_benchmark( iters=iters, wall_time=median_wall_time, name="benchmark_batch_dense_dataset_nnz_%d_batch_size_%d" % (np.prod(shape), batch_size))
def make_callable(sess, feed=[], target=[], fetch=[]): def name_list_append(src, dist): for element in src: if isinstance(element, tf.Tensor): dist.append(element.name) elif isinstance(element, tf.Operation): dist.append(element.name) else: raise ValueError("element must be Tensor or Operation") callable_opts = config_pb2.CallableOptions() name_list_append(feed, callable_opts.feed) name_list_append(target, callable_opts.target) name_list_append(fetch, callable_opts.fetch) callable_object = sess._make_callable_from_options(callable_opts) def run_callable(feed_dict): feed_values = [] for key, value in feed_dict.items(): if not isinstance(value, tf.Tensor): key_type = dtypes_module.as_dtype(key.dtype) value = np.asarray(value, dtype=key_type.as_numpy_dtype) feed_values.append(value) return callable_object(*feed_values) return run_callable
def benchmark_collective(self): """Measures the performance of local CPU collective execution.""" shapes = [(10, ), (1000, ), (1000000, )] devices = [2, 4, 8] collective_key_counter = 0 for group_size in devices: group_key = collective_key_counter instance_key = collective_key_counter collective_key_counter += 1 for shape in shapes: config = config_pb2.ConfigProto( device_count={"CPU": group_size}) with session.Session(config=config) as sess: # Use a C++ callable to minimize the Python overhead in the benchmark. callable_opts = config_pb2.CallableOptions() reduce_ops = [] for device in range(group_size): with ops.device("CPU:{}".format(device)): t = constant_op.constant( np.multiply(range(shape[0]), 1.0)) r = collective_ops.all_reduce( t, group_size, group_key, instance_key, "Add", "Div") reduce_ops.append(r) callable_opts.target.append(r.name) op_callable = sess._make_callable_from_options( callable_opts) # pylint: disable=protected-access # Run five steps to warm up the session caches and do collective param # resolution before taking the first measurement. for _ in range(5): op_callable() deltas = [] overall_start = time.time() # Run at least five repetitions and for at least five seconds. while len(deltas) < 5 or time.time() - overall_start < 5.0: start = time.time() for _ in range(100): op_callable() end = time.time() deltas.append(end - start) del op_callable median_wall_time = np.median(deltas) / 100.0 iters = len(deltas) * 100 self.report_benchmark( iters=iters, wall_time=median_wall_time, name="num_elements_{}_num_devices_{}".format( np.prod(shape), group_size))
def _make_callable(self, feed_arrays, feed_symbols, symbol_vals, session): """ Generates a callable that runs the graph. Arguments: feed_arrays: List of input tensors to be fed Numpy arrays at runtime. feed_symbols: List of input tensors to be fed symbolic tensors at runtime. symbol_vals: List of symbolic tensors to be fed to `feed_symbols`. session: Session to use to generate the callable. Returns: Function that runs the graph according to the above options. """ # Prepare callable options. callable_opts = config_pb2.CallableOptions() # Handle external-data feed. for x in feed_arrays: callable_opts.feed.append(x.name) if self.feed_dict: for key in sorted(self.feed_dict.keys()): callable_opts.feed.append(key.name) # Handle symbolic feed. for x, y in zip(feed_symbols, symbol_vals): connection = callable_opts.tensor_connection.add() if x.dtype != y.dtype: y = math_ops.cast(y, x.dtype) from_tensor = ops._as_graph_element(y) if from_tensor is None: from_tensor = y connection.from_tensor = from_tensor.name # Data tensor connection.to_tensor = x.name # Placeholder # Handle fetches. for x in self.outputs + self.fetches: callable_opts.fetch.append(x.name) # Handle updates. if self.updates_op: callable_opts.target.append(self.updates_op.name) # Handle run_options. if self.run_options: callable_opts.run_options.CopyFrom(self.run_options) # Create callable. callable_fn = session._make_callable_from_options(callable_opts) # Cache parameters corresponding to the generated callable, so that # we can detect future mismatches and refresh the callable. self._callable_fn = callable_fn self._feed_arrays = feed_arrays self._feed_symbols = feed_symbols self._symbol_vals = symbol_vals self._fetches = list(self.fetches) self._session = session
def _run_with_profiling(self, run_start_resp, fetches, feed_dict, options, run_metadata, callable_runner, callable_runner_args, callable_options): """Perform a session.run() or callable with profiling.""" # Decorate RunOption to fill in debugger tensor watch specifications. decorated_run_options = None if callable_options: callable_options_id = id(callable_options) if callable_options_id not in self._cached_callables_from_options: # Make a copy of callable_options to avoid mutating it. new_callable_options = config_pb2.CallableOptions() new_callable_options.CopyFrom(callable_options) decorated_run_options = new_callable_options.run_options else: decorated_run_options = options or config_pb2.RunOptions() self._decorate_run_options_for_profile(decorated_run_options) run_metadata = run_metadata or config_pb2.RunMetadata() if callable_runner: retvals = callable_runner(*callable_runner_args, options=decorated_run_options, run_metadata=run_metadata) elif callable_options: # pylint:disable=protected-access callable_object = self._sess._make_callable_from_options( new_callable_options) # pylint:enable=protected-access retvals = callable_object( *callable_runner_args, run_metadata=run_metadata) else: retvals = self._sess.run(fetches, feed_dict=feed_dict, options=decorated_run_options, run_metadata=run_metadata) return retvals, OnRunEndRequest( run_start_resp.action, run_metadata=run_metadata, client_graph_def=self._sess.graph.as_graph_def())
def remap_callable_options(self, callable_options): """ Remap Callable Options. Args: callable_options: A `CallableOptions` protocol buffer message describing the computation that will be performed by the callable. Returns: A new CallableOptions """ # Prepare callable options. new_callable_options = config_pb2.CallableOptions() callable_arg_fns = [] # Handle external-data feed. for f in callable_options.feed: nf, fn = self._remap_feed(f) new_callable_options.feed.extend([o.name for o in nf]) callable_arg_fns.append(fn) # Handle connection. if len(callable_options.tensor_connection) > 0: # TODO: Remapping Tensor Connections for New Callable Options # for c in callable_options.tensor_connection: # connection = new_callable_options.tensor_connection.add() # connection.from_tensor ~ remap(c.from_tensor) # connection.to_tensor ~ remap(c.to_tensor) raise NotImplementedError( 'AutoDist will support feeding symbolic connections later.') # Handle fetches. for f in callable_options.fetch: nf, _ = self._remap_fetch(f) new_callable_options.fetch.extend([o.name for o in nf]) # Handle updates. for f in callable_options.target: # f in type str if f: nf, _ = self._remap_fetch(f) new_callable_options.target.extend([o.name for o in nf]) # Handle run_options. new_callable_options.run_options.CopyFrom(callable_options.run_options) return new_callable_options, callable_arg_fns
def _make_callable(self, feed_arrays, feed_symbols, symbol_values, all_fetches): callable_opts = config_pb2.CallableOptions() for x in feed_arrays: callable_opts.feed.append(x.name) if self.feed_dict: for key in sorted(self.feed_dict.keys()): callable_opts.feed.appned(key.name) for x, y in zip(feed_symbols, symbol_values): connection = callable_opts.tensor_connection.add() if x.dtype != y.dtype: y = tf.cast(y, x.dtype) from_tensor = tf_ops._as_graph_element(y) if from_tensor is None: from_tensor = y connection.from_tensor = from_tensor.name connection.to_tensor = x.name self._all_fetches = all_fetches self._fetch_handler = _FetchHandler( graph=self.graph or tf.get_default_graph(), fetches=self._all_fetches, feeds={}) for x in self._fetch_handler.fetches(): callable_opts.fetch.append(x.name) callable_opts.target.append(self.updates_ops.name) if self.run_options: callable_opts.run_options.CopyFrom(self.run_options) callable_fn = self.tf_sess._make_callable_from_options(callable_opts) self._callable_fn = callable_fn self._feed_arrays = feed_arrays self._feed_symbols = feed_symbols self._symbol_values = symbol_values
def _run_with_debugging(self, run_start_resp, fetches, feed_dict, options, run_metadata, callable_runner, callable_runner_args, callable_options): """Perform a session.run() or callable with debugging.""" # Decorate RunOption to fill in debugger tensor watch specifications. decorated_run_options = None if callable_options: callable_options_id = id(callable_options) if callable_options_id not in self._cached_callables_from_options: # Make a copy of callable_options to avoid mutating it. new_callable_options = config_pb2.CallableOptions() new_callable_options.CopyFrom(callable_options) decorated_run_options = new_callable_options.run_options else: decorated_run_options = options or config_pb2.RunOptions() run_metadata = run_metadata or config_pb2.RunMetadata() if decorated_run_options: self._decorate_run_options_for_debug( decorated_run_options, run_start_resp.debug_urls, debug_ops=run_start_resp.debug_ops, node_name_regex_whitelist=( run_start_resp.node_name_regex_whitelist), op_type_regex_whitelist=run_start_resp.op_type_regex_whitelist, tensor_dtype_regex_whitelist=( run_start_resp.tensor_dtype_regex_whitelist), tolerate_debug_op_creation_failures=( run_start_resp.tolerate_debug_op_creation_failures)) # Invoke the run() method of the wrapped Session. Catch any TensorFlow # runtime errors. tf_error = None try: if callable_runner: retvals = callable_runner(*callable_runner_args, options=decorated_run_options, run_metadata=run_metadata) elif callable_options: # pylint:disable=protected-access if callable_options_id in self._cached_callables_from_options: callable_object = self._cached_callables_from_options[ callable_options_id] else: callable_object = self._sess._make_callable_from_options( new_callable_options) self._cached_callables_from_options[ callable_options_id] = callable_object # pylint:enable=protected-access retvals = callable_object(*callable_runner_args, run_metadata=run_metadata) else: retvals = self._sess.run(fetches, feed_dict=feed_dict, options=decorated_run_options, run_metadata=run_metadata) except errors.OpError as op_error: if self._pass_through_operrors: raise op_error tf_error = op_error retvals = op_error return retvals, OnRunEndRequest( run_start_resp.action, run_metadata=run_metadata, client_graph_def=self._sess.graph.as_graph_def(), tf_error=tf_error)
def run(self, fetches, feed_dict=None, options=None, run_metadata=None, callable_runner=None, callable_runner_args=None, callable_options=None): """Wrapper around Session.run() that inserts tensor watch options. Args: fetches: Same as the `fetches` arg to regular `Session.run()`. feed_dict: Same as the `feed_dict` arg to regular `Session.run()`. options: Same as the `options` arg to regular `Session.run()`. run_metadata: Same as the `run_metadata` arg to regular `Session.run()`. callable_runner: A `callable` returned by `Session.make_callable()`. If not `None`, `fetches` and `feed_dict` must both be `None`. Mutually exclusive with `callable_options`. callable_runner_args: An optional list of arguments to `callable_runner` or for `callable_options`. callable_options: An instance of `config_pb2.CallableOptions`, to be used with `Session._make_callable_from_options()`. Mutually exclusive with `callable_runner`. Returns: Simply forwards the output of the wrapped `Session.run()` call. Raises: ValueError: On invalid `OnRunStartAction` value. Or if `callable_runner` is not `None` and either or both of `fetches` and `feed_dict` is `None`. """ if callable_runner and callable_options: raise ValueError( "callable_runner and callable_options are mutually exclusive, but " "are both specified in this call to BaseDebugWrapperSession.run()." ) if callable_runner and (fetches or feed_dict): raise ValueError( "callable_runner and fetches/feed_dict are mutually exclusive, " "but are used simultaneously.") elif callable_options and (fetches or feed_dict): raise ValueError( "callable_options and fetches/feed_dict are mutually exclusive, " "but are used simultaneously.") self.increment_run_call_count() empty_fetches = not nest.flatten(fetches) if empty_fetches: tf_logging.info( "Due to empty fetches, tfdbg Session wrapper is letting a " "Session.run pass through without any debugging actions.") if self._is_disabled_thread() or empty_fetches: if callable_runner: return callable_runner(*callable_runner_args) elif callable_options: # pylint:disable=protected-access return self._sess._make_callable_from_options( callable_options)(*callable_runner_args) # pylint:enable=protected-access else: return self._sess.run(fetches, feed_dict=feed_dict, options=options, run_metadata=run_metadata) # Invoke on-run-start callback and obtain response. run_start_resp = self.on_run_start( OnRunStartRequest(fetches, feed_dict, options, run_metadata, self._run_call_count, is_callable_runner=bool(callable_runner))) _check_type(run_start_resp, OnRunStartResponse) if run_start_resp.action == OnRunStartAction.DEBUG_RUN: # Decorate RunOption to fill in debugger tensor watch specifications. decorated_run_options = None if callable_options: callable_options_id = id(callable_options) if callable_options_id not in self._cached_callables_from_options: # Make a copy of callable_options to avoid mutating it. new_callable_options = config_pb2.CallableOptions() new_callable_options.CopyFrom(callable_options) decorated_run_options = new_callable_options.run_options else: decorated_run_options = options or config_pb2.RunOptions() run_metadata = run_metadata or config_pb2.RunMetadata() if decorated_run_options: self._decorate_run_options_for_debug( decorated_run_options, run_start_resp.debug_urls, debug_ops=run_start_resp.debug_ops, node_name_regex_whitelist=run_start_resp. node_name_regex_whitelist, op_type_regex_whitelist=run_start_resp. op_type_regex_whitelist, tensor_dtype_regex_whitelist=( run_start_resp.tensor_dtype_regex_whitelist), tolerate_debug_op_creation_failures=( run_start_resp.tolerate_debug_op_creation_failures)) # Invoke the run() method of the wrapped Session. Catch any TensorFlow # runtime errors. tf_error = None try: if callable_runner: retvals = callable_runner(*callable_runner_args, options=decorated_run_options, run_metadata=run_metadata) elif callable_options: # pylint:disable=protected-access if callable_options_id in self._cached_callables_from_options: callable_object = self._cached_callables_from_options[ callable_options_id] else: callable_object = self._sess._make_callable_from_options( new_callable_options) self._cached_callables_from_options[ callable_options_id] = callable_object # pylint:enable=protected-access retvals = callable_object(*callable_runner_args, run_metadata=run_metadata) else: retvals = self._sess.run(fetches, feed_dict=feed_dict, options=decorated_run_options, run_metadata=run_metadata) except errors.OpError as op_error: if self._pass_through_operrors: raise op_error tf_error = op_error retvals = op_error run_end_req = OnRunEndRequest( run_start_resp.action, run_metadata=run_metadata, client_graph_def=self._sess.graph.as_graph_def(), tf_error=tf_error) elif run_start_resp.action == OnRunStartAction.PROFILE_RUN: decorated_run_options = options or config_pb2.RunOptions() run_metadata = run_metadata or config_pb2.RunMetadata() self._decorate_run_options_for_profile(decorated_run_options) if callable_runner: retvals = callable_runner(*callable_runner_args, options=decorated_run_options, run_metadata=run_metadata) else: retvals = self._sess.run(fetches, feed_dict=feed_dict, options=decorated_run_options, run_metadata=run_metadata) run_end_req = OnRunEndRequest( run_start_resp.action, run_metadata=run_metadata, client_graph_def=self._sess.graph.as_graph_def()) elif (run_start_resp.action == OnRunStartAction.NON_DEBUG_RUN or run_start_resp.action == OnRunStartAction.INVOKE_STEPPER): if callable_runner: raise NotImplementedError( "Stepper mode is not implemented for callables created by " "Session.make_callable().") if run_start_resp.action == OnRunStartAction.INVOKE_STEPPER: with stepper.NodeStepper(self._sess, fetches, feed_dict) as node_stepper: retvals = self.invoke_node_stepper( node_stepper, restore_variable_values_on_exit=True) # Invoke run() method of the wrapped session. retvals = self._sess.run(fetches, feed_dict=feed_dict, options=options, run_metadata=run_metadata) # Prepare arg for the on-run-end callback. run_end_req = OnRunEndRequest(run_start_resp.action) else: raise ValueError("Invalid OnRunStartAction value: %s" % run_start_resp.action) # Invoke on-run-end callback and obtain response. run_end_resp = self.on_run_end(run_end_req) _check_type(run_end_resp, OnRunEndResponse) # Currently run_end_resp is only a placeholder. No action is taken on it. return retvals