def receive_on_queue(queue, timeout=None): G_LOGGER.extra_verbose("Waiting for data to become available on queue") obj = queue.get(block=True, timeout=timeout) if is_compressed(obj): obj = decompress(obj) G_LOGGER.ultra_verbose("Received {:} on queue".format(obj)) return obj
def mark_outputs(network, outputs): """ Mark the specified outputs as network outputs. Args: network (trt.INetworkDefinition): The network in which to mark outputs. outputs (Sequence[str]): The names of tensors to mark as outputs. """ outputs = set(outputs) all_outputs = [] for layer in network: for index in range(layer.num_outputs): tensor = layer.get_output(index) all_outputs.append(tensor.name) # Clear all old outputs if tensor.is_network_output: network.unmark_output(tensor) if tensor.name in outputs: if not tensor.is_network_output: G_LOGGER.ultra_verbose("Marking {:} as an output".format( tensor.name)) network.mark_output(tensor) marked_outputs = set(_get_network_outputs(network)) not_found = outputs - marked_outputs check_outputs_not_found(not_found, all_outputs)
def send_on_queue(queue, obj): if sys.getsizeof(obj) > PIPE_MAX_SEND_BYTES: G_LOGGER.warning( "Object size ({:} bytes) exceeds maximum size that can be sent over queues ({:} bytes). " "Attempting to compress - this may take some time. If this does not work or you want to avoid " "the compression overhead, you should disable subprocesses by omitting the --use-subprocess flag, " "or by setting use_subprocess=False in Comparator.run().".format( sys.getsizeof(obj), PIPE_MAX_SEND_BYTES)) obj = compress(obj) assert sys.getsizeof(obj) <= PIPE_MAX_SEND_BYTES G_LOGGER.ultra_verbose("Sending: {:} on queue".format(obj)) queue.put(obj)
def execute_runner(runner, loader_cache): with runner as active_runner: input_metadata = active_runner.get_input_metadata() G_LOGGER.info("{:35}\n---- Model Input(s) ----\n{:}".format(active_runner.name, input_metadata), mode=LogMode.ONCE) # DataLoaderCache will ensure that the feed_dict does not contain any extra entries # based on the provided input_metadata. loader_cache.set_input_metadata(input_metadata) if warm_up: G_LOGGER.start("{:35} | Running {:} warm-up run(s)".format(active_runner.name, warm_up)) try: feed_dict = loader_cache[0] except IndexError: G_LOGGER.warning("{:} warm-up run(s) were requested, but data loader did not supply any data. " "Skipping warm-up run(s)".format(warm_up)) else: G_LOGGER.ultra_verbose("Warm-up Input Buffers:\n{:}".format(util.indent_block(feed_dict))) # First do a few warm-up runs, and don't time them. for _ in range(warm_up): active_runner.infer(feed_dict=feed_dict) G_LOGGER.finish("{:35} | Finished {:} warm-up run(s)".format(active_runner.name, warm_up)) # Then, actual iterations. index = 0 iteration_results = [] total_runtime = 0 for index, feed_dict in enumerate(loader_cache): G_LOGGER.extra_verbose(lambda: "{:35} | Feeding inputs:\n{:}".format(active_runner.name, util.indent_block(feed_dict))) outputs = active_runner.infer(feed_dict=feed_dict) runtime = active_runner.last_inference_time() total_runtime += runtime # Without a deep copy here, outputs will always reference the output of the last run iteration_results.append(IterationResult(outputs=copy.deepcopy(outputs), runtime=runtime, runner_name=active_runner.name)) G_LOGGER.info(lambda: "{:35}\n---- Model Output(s) ----\n{:}".format( active_runner.name, TensorMetadata().from_feed_dict(outputs)), mode=LogMode.ONCE) G_LOGGER.extra_verbose(lambda: "{:35} | Inference Time: {:.3f} ms | Received outputs:\n{:}".format( active_runner.name, runtime * 1000.0, util.indent_block(outputs))) total_runtime_ms = total_runtime * 1000.0 G_LOGGER.finish("{:35} | Completed {:} iteration(s) in {:.4g} ms | Average inference time: {:.4g} ms.".format(active_runner.name, index + 1, total_runtime_ms, total_runtime_ms / float(index + 1))) return iteration_results
def call_impl(self): """ Returns: Tuple[tf.Graph, Sequence[str]]: The TensorFlow graph, and the names of its outputs. """ (graph, output_names), _ = util.invoke_if_callable(self._graph) with tf.Session(graph=graph) as sess: sess.run(tf.initializers.global_variables()) sess.run(tf.initializers.local_variables()) graphdef = sess.graph.as_graph_def() removed = tf.graph_util.remove_training_nodes(graphdef) G_LOGGER.ultra_verbose("Removed nodes: {:}".format(removed)) for node in graphdef.node: if node.op == "RefSwitch": node.op = "Switch" for index in range(len(node.input)): if "moving_" in node.input[index]: node.input[index] = node.input[index] + "/read" elif node.op == "AssignSub": node.op = "Sub" if "use_locking" in node.attr: del node.attr["use_locking"] elif node.op == "AssignAdd": node.op = "Add" if "use_locking" in node.attr: del node.attr["use_locking"] elif node.op == "Assign": node.op = "Identity" if "use_locking" in node.attr: del node.attr["use_locking"] if "validate_shape" in node.attr: del node.attr["validate_shape"] if len(node.input) == 2: # input0: ref: Should be from a Variable node. May be uninitialized. # input1: value: The value to be assigned to the variable. node.input[0] = node.input[1] del node.input[1] # Strip port information from outputs output_names = [name.split(":")[0] for name in output_names] output_graph_def = tf.graph_util.convert_variables_to_constants( sess, graphdef, output_names) output_graph_def = self.constfold(output_graph_def, output_names) return graph_from_frozen(output_graph_def)
def find_in_dict(name, mapping, index=None): """ Attempts to partially match keys in a dictionary. Checks for exact matches and substring matches, falling back to index based matching. Args: name (str): The key to search for. mapping (dict): The dictionary to search in. index (int): An index to fall back to if the key could not be found by name. Returns: str: The key found in the dict, or None if it could not be found. """ G_LOGGER.ultra_verbose("Searching for key: {:}. Fallback index is set to {:}".format(name, index)) if name in mapping: return name for key in mapping.keys(): if name.lower() in key.lower() or key.lower() in name.lower(): return key if index is not None and index >= 0 and index < len(mapping.keys()): return list(mapping.keys())[index] return None
def get_active_profile_bindings(context): """ Gets the start and end binding indices for the active optimization profile. Args: engine (trt.ICudaEngine): The engine in question. context (trt.IExecutionContext): The context where the profile is currently set. Returns: Tuple[int, int]: The start and end bindings indices, in that order """ active_profile = context.active_optimization_profile bindings_per_profile = get_bindings_per_profile(context.engine) start_binding = bindings_per_profile * active_profile end_binding = start_binding + bindings_per_profile G_LOGGER.ultra_verbose( "Total # of Profiles: {:}, Bindings Per Profile: {:}, Active Profile: {:}, " "Start Binding: {:}, End Binding: {:}".format( context.engine.num_optimization_profiles, bindings_per_profile, active_profile, start_binding, end_binding)) return start_binding, end_binding