Esempio n. 1
0
def mark_outputs(network, outputs):
    """
    Mark the specified outputs as network outputs.

    Args:
        network (trt.INetworkDefinition): The network in which to mark outputs.
        outputs (Sequence[str]): The names of tensors to mark as outputs.
    """
    outputs = set(outputs)
    all_outputs = []
    for layer in network:
        for index in range(layer.num_outputs):
            tensor = layer.get_output(index)
            all_outputs.append(tensor.name)
            # Clear all old outputs
            if tensor.is_network_output:
                network.unmark_output(tensor)

            if tensor.name in outputs:
                if not tensor.is_network_output:
                    G_LOGGER.ultra_verbose("Marking {:} as an output".format(tensor.name))
                    network.mark_output(tensor)

    marked_outputs = set(_get_network_outputs(network))
    not_found = outputs - marked_outputs
    check_outputs_not_found(not_found, all_outputs)
Esempio n. 2
0
    def __call__(self):
        """
        Deserializes an engine from a buffer.

        Returns:
            trt.ICudaEngine: The deserialized engine.
        """
        buffer, _ = misc.try_call(self._serialized_engine)

        trt.init_libnvinfer_plugins(trt_util.TRT_LOGGER, "")
        with trt.Runtime(trt_util.TRT_LOGGER) as runtime:
            engine = runtime.deserialize_cuda_engine(buffer)
            if not engine:
                G_LOGGER.critical("Could not load engine")
                G_LOGGER.ultra_verbose(lambda: "Note: serialized_engine was: {:}".format(buffer))
        return engine
Esempio n. 3
0
    def __call__(self):
        """
        Freezes a TensorFlow graph, and folds constants.

        Returns:
            Tuple[tf.Graph, Sequence[str]]: The TensorFlow graph, and the names of its outputs.
        """
        (graph, output_names), _ = misc.try_call(self._graph)
        with tf.Session(graph=graph) as sess:
            sess.run(tf.initializers.global_variables())
            sess.run(tf.initializers.local_variables())

            graphdef = sess.graph.as_graph_def()
            removed = tf.graph_util.remove_training_nodes(graphdef)
            G_LOGGER.ultra_verbose("Removed nodes: {:}".format(removed))

            for node in graphdef.node:
                if node.op == 'RefSwitch':
                    node.op = 'Switch'
                    for index in range(len(node.input)):
                        if 'moving_' in node.input[index]:
                            node.input[index] = node.input[index] + '/read'
                elif node.op == 'AssignSub':
                    node.op = 'Sub'
                    if 'use_locking' in node.attr: del node.attr['use_locking']
                elif node.op == 'AssignAdd':
                    node.op = 'Add'
                    if 'use_locking' in node.attr: del node.attr['use_locking']
                elif node.op == 'Assign':
                    node.op = 'Identity'
                    if 'use_locking' in node.attr: del node.attr['use_locking']
                    if 'validate_shape' in node.attr:
                        del node.attr['validate_shape']
                    if len(node.input) == 2:
                        # input0: ref: Should be from a Variable node. May be uninitialized.
                        # input1: value: The value to be assigned to the variable.
                        node.input[0] = node.input[1]
                        del node.input[1]

            # Strip port information from outputs
            output_names = [name.split(":")[0] for name in output_names]
            output_graph_def = tf.graph_util.convert_variables_to_constants(
                sess, graphdef, output_names)
            output_graph_def = self.constfold(output_graph_def, output_names)
            return func.invoke(GraphFromFrozen(output_graph_def))
Esempio n. 4
0
def get_active_profile_bindings(context):
    """
    Gets the start and end binding indices for the active optimization profile.

    Args:
        engine (trt.ICudaEngine): The engine in question.
        context (trt.IExecutionContext): The context where the profile is currently set.

    Returns:
        Tuple[int, int]: The start and end bindings indices, in that order
    """
    active_profile = context.active_optimization_profile
    bindings_per_profile = get_bindings_per_profile(context.engine)

    start_binding = bindings_per_profile * active_profile
    end_binding = start_binding + bindings_per_profile

    G_LOGGER.ultra_verbose("Total # of Profiles: {:}, Bindings Per Profile: {:}, Active Profile: {:}, "
                           "Start Binding: {:}, End Binding: {:}".format(
                                context.engine.num_optimization_profiles, bindings_per_profile,
                                active_profile, start_binding, end_binding))
    return start_binding, end_binding
Esempio n. 5
0
        def execute_runner(runner, loader_cache):
            with runner as active_runner:
                input_metadata = active_runner.get_input_metadata()
                G_LOGGER.info("Runner: {:40} | Input Metadata: {:}".format(
                    active_runner.name, input_metadata),
                              mode=LogMode.ONCE)
                # DataLoaderCache will ensure that the feed_dict does not contain any extra entries
                # based on the provided input_metadata.
                loader_cache.set_input_metadata(input_metadata)

                if warm_up:
                    G_LOGGER.start(
                        "Runner: {:40} | Running {:} warm-up runs".format(
                            active_runner.name, warm_up))
                    try:
                        feed_dict = loader_cache[0]
                    except IndexError:
                        G_LOGGER.warning(
                            "{:} warm-up runs were requested, but data loader did not supply any data. "
                            "Skipping warm-up runs".format(warm_up))
                    else:
                        G_LOGGER.ultra_verbose(
                            "Warm-up Input Buffers:\n{:}".format(
                                misc.indent_block(feed_dict)))
                        # First do a few warm-up runs, and don't time them.
                        for i in range(warm_up):
                            active_runner.infer(feed_dict=feed_dict)

                # Then, actual iterations.
                index = 0
                iteration_results = []
                output_metadata = TensorMetadata()

                for index, feed_dict in enumerate(loader_cache):
                    G_LOGGER.extra_verbose(
                        lambda: "Runner: {:40} | Feeding inputs:\n{:}".format(
                            active_runner.name, misc.indent_block(feed_dict)))
                    outputs = active_runner.infer(feed_dict=feed_dict)

                    runtime = active_runner.last_inference_time()
                    # Without a deep copy here, outputs will always reference the output of the last run
                    iteration_results.append(
                        IterationResult(outputs=copy.deepcopy(outputs),
                                        runtime=runtime,
                                        runner_name=active_runner.name))

                    if index == 0:
                        for name, out in outputs.items():
                            output_metadata.add(name, out.dtype, out.shape)

                    G_LOGGER.info(
                        "Runner: {:40} | Output Metadata: {:}".format(
                            active_runner.name, output_metadata),
                        mode=LogMode.ONCE)
                    G_LOGGER.extra_verbose(
                        lambda:
                        "Runner: {:40} | Inference Time: {:.3f} ms | Received outputs:\n{:}"
                        .format(active_runner.name, runtime * 1000.0,
                                misc.indent_block(outputs)))

                G_LOGGER.finish(
                    "Runner: {:40} | Completed {:} iterations.".format(
                        active_runner.name, index + 1))
                return iteration_results