def mark_outputs(network, outputs): """ Mark the specified outputs as network outputs. Args: network (trt.INetworkDefinition): The network in which to mark outputs. outputs (Sequence[str]): The names of tensors to mark as outputs. """ outputs = set(outputs) all_outputs = [] for layer in network: for index in range(layer.num_outputs): tensor = layer.get_output(index) all_outputs.append(tensor.name) # Clear all old outputs if tensor.is_network_output: network.unmark_output(tensor) if tensor.name in outputs: if not tensor.is_network_output: G_LOGGER.ultra_verbose("Marking {:} as an output".format(tensor.name)) network.mark_output(tensor) marked_outputs = set(_get_network_outputs(network)) not_found = outputs - marked_outputs check_outputs_not_found(not_found, all_outputs)
def __call__(self): """ Deserializes an engine from a buffer. Returns: trt.ICudaEngine: The deserialized engine. """ buffer, _ = misc.try_call(self._serialized_engine) trt.init_libnvinfer_plugins(trt_util.TRT_LOGGER, "") with trt.Runtime(trt_util.TRT_LOGGER) as runtime: engine = runtime.deserialize_cuda_engine(buffer) if not engine: G_LOGGER.critical("Could not load engine") G_LOGGER.ultra_verbose(lambda: "Note: serialized_engine was: {:}".format(buffer)) return engine
def __call__(self): """ Freezes a TensorFlow graph, and folds constants. Returns: Tuple[tf.Graph, Sequence[str]]: The TensorFlow graph, and the names of its outputs. """ (graph, output_names), _ = misc.try_call(self._graph) with tf.Session(graph=graph) as sess: sess.run(tf.initializers.global_variables()) sess.run(tf.initializers.local_variables()) graphdef = sess.graph.as_graph_def() removed = tf.graph_util.remove_training_nodes(graphdef) G_LOGGER.ultra_verbose("Removed nodes: {:}".format(removed)) for node in graphdef.node: if node.op == 'RefSwitch': node.op = 'Switch' for index in range(len(node.input)): if 'moving_' in node.input[index]: node.input[index] = node.input[index] + '/read' elif node.op == 'AssignSub': node.op = 'Sub' if 'use_locking' in node.attr: del node.attr['use_locking'] elif node.op == 'AssignAdd': node.op = 'Add' if 'use_locking' in node.attr: del node.attr['use_locking'] elif node.op == 'Assign': node.op = 'Identity' if 'use_locking' in node.attr: del node.attr['use_locking'] if 'validate_shape' in node.attr: del node.attr['validate_shape'] if len(node.input) == 2: # input0: ref: Should be from a Variable node. May be uninitialized. # input1: value: The value to be assigned to the variable. node.input[0] = node.input[1] del node.input[1] # Strip port information from outputs output_names = [name.split(":")[0] for name in output_names] output_graph_def = tf.graph_util.convert_variables_to_constants( sess, graphdef, output_names) output_graph_def = self.constfold(output_graph_def, output_names) return func.invoke(GraphFromFrozen(output_graph_def))
def get_active_profile_bindings(context): """ Gets the start and end binding indices for the active optimization profile. Args: engine (trt.ICudaEngine): The engine in question. context (trt.IExecutionContext): The context where the profile is currently set. Returns: Tuple[int, int]: The start and end bindings indices, in that order """ active_profile = context.active_optimization_profile bindings_per_profile = get_bindings_per_profile(context.engine) start_binding = bindings_per_profile * active_profile end_binding = start_binding + bindings_per_profile G_LOGGER.ultra_verbose("Total # of Profiles: {:}, Bindings Per Profile: {:}, Active Profile: {:}, " "Start Binding: {:}, End Binding: {:}".format( context.engine.num_optimization_profiles, bindings_per_profile, active_profile, start_binding, end_binding)) return start_binding, end_binding
def execute_runner(runner, loader_cache): with runner as active_runner: input_metadata = active_runner.get_input_metadata() G_LOGGER.info("Runner: {:40} | Input Metadata: {:}".format( active_runner.name, input_metadata), mode=LogMode.ONCE) # DataLoaderCache will ensure that the feed_dict does not contain any extra entries # based on the provided input_metadata. loader_cache.set_input_metadata(input_metadata) if warm_up: G_LOGGER.start( "Runner: {:40} | Running {:} warm-up runs".format( active_runner.name, warm_up)) try: feed_dict = loader_cache[0] except IndexError: G_LOGGER.warning( "{:} warm-up runs were requested, but data loader did not supply any data. " "Skipping warm-up runs".format(warm_up)) else: G_LOGGER.ultra_verbose( "Warm-up Input Buffers:\n{:}".format( misc.indent_block(feed_dict))) # First do a few warm-up runs, and don't time them. for i in range(warm_up): active_runner.infer(feed_dict=feed_dict) # Then, actual iterations. index = 0 iteration_results = [] output_metadata = TensorMetadata() for index, feed_dict in enumerate(loader_cache): G_LOGGER.extra_verbose( lambda: "Runner: {:40} | Feeding inputs:\n{:}".format( active_runner.name, misc.indent_block(feed_dict))) outputs = active_runner.infer(feed_dict=feed_dict) runtime = active_runner.last_inference_time() # Without a deep copy here, outputs will always reference the output of the last run iteration_results.append( IterationResult(outputs=copy.deepcopy(outputs), runtime=runtime, runner_name=active_runner.name)) if index == 0: for name, out in outputs.items(): output_metadata.add(name, out.dtype, out.shape) G_LOGGER.info( "Runner: {:40} | Output Metadata: {:}".format( active_runner.name, output_metadata), mode=LogMode.ONCE) G_LOGGER.extra_verbose( lambda: "Runner: {:40} | Inference Time: {:.3f} ms | Received outputs:\n{:}" .format(active_runner.name, runtime * 1000.0, misc.indent_block(outputs))) G_LOGGER.finish( "Runner: {:40} | Completed {:} iterations.".format( active_runner.name, index + 1)) return iteration_results