Beispiel #1
0
def receive_on_queue(queue, timeout=None):
    G_LOGGER.extra_verbose("Waiting for data to become available on queue")
    obj = queue.get(block=True, timeout=timeout)
    if is_compressed(obj):
        obj = decompress(obj)
    G_LOGGER.ultra_verbose("Received {:} on queue".format(obj))
    return obj
Beispiel #2
0
def mark_outputs(network, outputs):
    """
    Mark the specified outputs as network outputs.

    Args:
        network (trt.INetworkDefinition): The network in which to mark outputs.
        outputs (Sequence[str]): The names of tensors to mark as outputs.
    """
    outputs = set(outputs)
    all_outputs = []
    for layer in network:
        for index in range(layer.num_outputs):
            tensor = layer.get_output(index)
            all_outputs.append(tensor.name)
            # Clear all old outputs
            if tensor.is_network_output:
                network.unmark_output(tensor)

            if tensor.name in outputs:
                if not tensor.is_network_output:
                    G_LOGGER.ultra_verbose("Marking {:} as an output".format(
                        tensor.name))
                    network.mark_output(tensor)

    marked_outputs = set(_get_network_outputs(network))
    not_found = outputs - marked_outputs
    check_outputs_not_found(not_found, all_outputs)
Beispiel #3
0
def send_on_queue(queue, obj):
    if sys.getsizeof(obj) > PIPE_MAX_SEND_BYTES:
        G_LOGGER.warning(
            "Object size ({:} bytes) exceeds maximum size that can be sent over queues ({:} bytes). "
            "Attempting to compress - this may take some time. If this does not work or you want to avoid "
            "the compression overhead, you should disable subprocesses by omitting the --use-subprocess flag, "
            "or by setting use_subprocess=False in Comparator.run().".format(
                sys.getsizeof(obj), PIPE_MAX_SEND_BYTES))
        obj = compress(obj)

    assert sys.getsizeof(obj) <= PIPE_MAX_SEND_BYTES

    G_LOGGER.ultra_verbose("Sending: {:} on queue".format(obj))
    queue.put(obj)
Beispiel #4
0
        def execute_runner(runner, loader_cache):
            with runner as active_runner:
                input_metadata = active_runner.get_input_metadata()
                G_LOGGER.info("{:35}\n---- Model Input(s) ----\n{:}".format(active_runner.name, input_metadata),
                              mode=LogMode.ONCE)

                # DataLoaderCache will ensure that the feed_dict does not contain any extra entries
                # based on the provided input_metadata.
                loader_cache.set_input_metadata(input_metadata)

                if warm_up:
                    G_LOGGER.start("{:35} | Running {:} warm-up run(s)".format(active_runner.name, warm_up))
                    try:
                        feed_dict = loader_cache[0]
                    except IndexError:
                        G_LOGGER.warning("{:} warm-up run(s) were requested, but data loader did not supply any data. "
                                         "Skipping warm-up run(s)".format(warm_up))
                    else:
                        G_LOGGER.ultra_verbose("Warm-up Input Buffers:\n{:}".format(util.indent_block(feed_dict)))
                        # First do a few warm-up runs, and don't time them.
                        for _ in range(warm_up):
                            active_runner.infer(feed_dict=feed_dict)
                    G_LOGGER.finish("{:35} | Finished {:} warm-up run(s)".format(active_runner.name, warm_up))

                # Then, actual iterations.
                index = 0
                iteration_results = []

                total_runtime = 0
                for index, feed_dict in enumerate(loader_cache):
                    G_LOGGER.extra_verbose(lambda: "{:35} | Feeding inputs:\n{:}".format(active_runner.name, util.indent_block(feed_dict)))
                    outputs = active_runner.infer(feed_dict=feed_dict)

                    runtime = active_runner.last_inference_time()
                    total_runtime += runtime
                    # Without a deep copy here, outputs will always reference the output of the last run
                    iteration_results.append(IterationResult(outputs=copy.deepcopy(outputs), runtime=runtime, runner_name=active_runner.name))

                    G_LOGGER.info(lambda: "{:35}\n---- Model Output(s) ----\n{:}".format(
                                            active_runner.name, TensorMetadata().from_feed_dict(outputs)),
                                  mode=LogMode.ONCE)
                    G_LOGGER.extra_verbose(lambda: "{:35} | Inference Time: {:.3f} ms | Received outputs:\n{:}".format(
                                                        active_runner.name, runtime * 1000.0, util.indent_block(outputs)))

                total_runtime_ms = total_runtime * 1000.0
                G_LOGGER.finish("{:35} | Completed {:} iteration(s) in {:.4g} ms | Average inference time: {:.4g} ms.".format(active_runner.name, index + 1, total_runtime_ms, total_runtime_ms / float(index + 1)))
                return iteration_results
Beispiel #5
0
    def call_impl(self):
        """
        Returns:
            Tuple[tf.Graph, Sequence[str]]: The TensorFlow graph, and the names of its outputs.
        """
        (graph, output_names), _ = util.invoke_if_callable(self._graph)
        with tf.Session(graph=graph) as sess:
            sess.run(tf.initializers.global_variables())
            sess.run(tf.initializers.local_variables())

            graphdef = sess.graph.as_graph_def()
            removed = tf.graph_util.remove_training_nodes(graphdef)
            G_LOGGER.ultra_verbose("Removed nodes: {:}".format(removed))

            for node in graphdef.node:
                if node.op == "RefSwitch":
                    node.op = "Switch"
                    for index in range(len(node.input)):
                        if "moving_" in node.input[index]:
                            node.input[index] = node.input[index] + "/read"
                elif node.op == "AssignSub":
                    node.op = "Sub"
                    if "use_locking" in node.attr:
                        del node.attr["use_locking"]
                elif node.op == "AssignAdd":
                    node.op = "Add"
                    if "use_locking" in node.attr:
                        del node.attr["use_locking"]
                elif node.op == "Assign":
                    node.op = "Identity"
                    if "use_locking" in node.attr:
                        del node.attr["use_locking"]
                    if "validate_shape" in node.attr:
                        del node.attr["validate_shape"]
                    if len(node.input) == 2:
                        # input0: ref: Should be from a Variable node. May be uninitialized.
                        # input1: value: The value to be assigned to the variable.
                        node.input[0] = node.input[1]
                        del node.input[1]

            # Strip port information from outputs
            output_names = [name.split(":")[0] for name in output_names]
            output_graph_def = tf.graph_util.convert_variables_to_constants(
                sess, graphdef, output_names)
            output_graph_def = self.constfold(output_graph_def, output_names)
            return graph_from_frozen(output_graph_def)
Beispiel #6
0
def find_in_dict(name, mapping, index=None):
    """
    Attempts to partially match keys in a dictionary. Checks for exact matches and
    substring matches, falling back to index based matching.

    Args:
        name (str): The key to search for.
        mapping (dict): The dictionary to search in.
        index (int): An index to fall back to if the key could not be found by name.

    Returns:
        str: The key found in the dict, or None if it could not be found.
    """
    G_LOGGER.ultra_verbose("Searching for key: {:}. Fallback index is set to {:}".format(name, index))
    if name in mapping:
        return name
    for key in mapping.keys():
        if name.lower() in key.lower() or key.lower() in name.lower():
            return key
    if index is not None and index >= 0 and index < len(mapping.keys()):
        return list(mapping.keys())[index]
    return None
Beispiel #7
0
def get_active_profile_bindings(context):
    """
    Gets the start and end binding indices for the active optimization profile.

    Args:
        engine (trt.ICudaEngine): The engine in question.
        context (trt.IExecutionContext): The context where the profile is currently set.

    Returns:
        Tuple[int, int]: The start and end bindings indices, in that order
    """
    active_profile = context.active_optimization_profile
    bindings_per_profile = get_bindings_per_profile(context.engine)

    start_binding = bindings_per_profile * active_profile
    end_binding = start_binding + bindings_per_profile

    G_LOGGER.ultra_verbose(
        "Total # of Profiles: {:}, Bindings Per Profile: {:}, Active Profile: {:}, "
        "Start Binding: {:}, End Binding: {:}".format(
            context.engine.num_optimization_profiles, bindings_per_profile,
            active_profile, start_binding, end_binding))
    return start_binding, end_binding