Beispiel #1
0
def receive_on_queue(queue, timeout=None):
    G_LOGGER.extra_verbose("Waiting for data to become available on queue")
    obj = queue.get(block=True, timeout=timeout)
    if is_compressed(obj):
        obj = decompress(obj)
    G_LOGGER.ultra_verbose("Received {:} on queue".format(obj))
    return obj
Beispiel #2
0
    def try_permute(arr, shape):
        original_shape = arr.shape

        if sorted(arr.shape) != sorted(shape):
            G_LOGGER.extra_verbose("Array of shape: {:} cannot be permuted to: {:}".format(arr.shape, shape))
            return arr

        # We need to remove axes from the original shape as we use them to avoid
        # duplication in the permutation.
        arr_shape_indices = {index: dimlen for index, dimlen in enumerate(arr.shape)}

        # Find which axis in arr.shape corresponds to the specified size. Never returns duplicates.
        def find_axis(dimlen):
            nonlocal arr_shape_indices
            for index, d in arr_shape_indices.items():
                if d == dimlen:
                    del arr_shape_indices[index]
                    return index

        try:
            perm = [find_axis(dimlen) for dimlen in shape]
            arr = np.transpose(arr, perm)
        except Exception as err:
            G_LOGGER.extra_verbose("Skipping permutation due to {:}".format(err))
        else:
            if arr.shape != original_shape:
                G_LOGGER.info(
                    "Permuted array of shape: {:} to: {:} using permutation {:}".format(original_shape, arr.shape, perm)
                )
        return arr
Beispiel #3
0
    def is_output_node(node):
        # Make sure that we're not using hanging nodes as outputs - must have at least one input.
        if len(node_output_map[node.name]) != 0 or len(node.input) == 0:
            return False

        # Tensors with no shape cannot be outputs and TensorFlow doesn't like certain ops as outputs.
        EXCLUDE_OPS = [
            "Switch",
            "FusedBatchNorm",
            "Assert",
            "NextIteration",
            "Enter",
            "LoopCond",
            "Exit",
            "Print",
            "Assign",
            "NoOp",
            "ReadVariableOp",
            "VarIsInitializedOp",
            "Const",
        ]

        # Additionally, we sometimes need to exclude entire namespaces e.g. while loops.
        EXCLUDE_NAMESPACES = ["while", "Assert"]

        if any([ex_op in node.op for ex_op in EXCLUDE_OPS]) or any(
            [ns in node.name for ns in EXCLUDE_NAMESPACES]):
            G_LOGGER.extra_verbose(
                "Excluding {:}, op {:} is not a valid output op or is part of an excluded namespace "
                "(Note: excluded namespaces: {:})".format(
                    node.name, node.op, EXCLUDE_NAMESPACES))
            return False

        return True
Beispiel #4
0
    def infer_impl(self, feed_dict):
        G_LOGGER.extra_verbose("Received feed_dict: {:}".format(feed_dict))
        start = time.time()
        inference_outputs = self.sess.run(self.output_names,
                                          feed_dict=feed_dict,
                                          options=self.run_options,
                                          run_metadata=self.run_metadata)
        end = time.time()

        out_dict = OrderedDict()
        for name, out in zip(self.output_names, inference_outputs):
            out_dict[name] = out
        self.inference_time = end - start

        if self.timeline_dir is not None:
            from tensorflow.python.client import timeline

            t1 = timeline.Timeline(self.run_metadata.step_stats)

            util.save_file(
                contents=t1.generate_chrome_trace_format(),
                dest=os.path.join(self.timeline_dir,
                                  "run-{:}".format(self.num_inferences)),
                mode="w",
            )
        self.num_inferences += 1

        return out_dict
Beispiel #5
0
        def allocate_buffers(engine):
            input_buffers = OrderedDict()
            output_buffers = OrderedDict()
            bindings = []
            stream = cuda.Stream()
            G_LOGGER.verbose("Using batch size: " +
                             str(engine.max_batch_size) +
                             " during buffer allocation")
            for binding in engine:
                shape = (engine.max_batch_size, ) + tuple(
                    engine.get_binding_shape(binding))
                dtype = engine.get_binding_dtype(binding)

                device_mem = cuda.DeviceArray(shape=shape,
                                              dtype=trt.nptype(dtype))
                G_LOGGER.extra_verbose("Tensor: "
                                       "{:35} | Allocated: {:}".format(
                                           binding, device_mem))

                if engine.binding_is_input(binding):
                    input_buffers[binding] = TrtLegacyRunner.HostDeviceMem(
                        None, device_mem)
                else:
                    host_mem = np.empty(shape=shape, dtype=trt.nptype(dtype))
                    output_buffers[binding] = TrtLegacyRunner.HostDeviceMem(
                        host_mem, device_mem)
            return input_buffers, output_buffers, stream
Beispiel #6
0
    def mark_layers(self, network, indices):
        EXCLUDE_LAYER_NAMES = ["CONSTANT"]
        EXCLUDE_LAYERS = [getattr(trt.LayerType, attr) for attr in EXCLUDE_LAYER_NAMES if hasattr(trt.LayerType, attr)]

        # First, reset, since changes from the previous call will persist.
        for layer in network:
            layer.reset_precision()

        marked_indices = set()
        for index in indices:
            layer = network.get_layer(index)

            def should_exclude():
                has_non_execution_output = any(
                    not layer.get_output(i).is_execution_tensor for i in range(layer.num_outputs)
                )
                return layer.type in EXCLUDE_LAYERS or has_non_execution_output

            if not should_exclude():
                G_LOGGER.extra_verbose(
                    "Running layer in higher precision: {:}".format(trt_util.str_from_layer(layer, index))
                )
                layer.precision = self.precision
                marked_indices.add(index)

        G_LOGGER.verbose("Marking layer(s): {:} to run in {:} precision".format(marked_indices, self.precision))
Beispiel #7
0
 def is_not_nan(output):
     nans = np.isnan(output)
     if np.any(nans):
         G_LOGGER.error("NaN Detected | One or more NaNs were encountered in this output")
         G_LOGGER.info("Note: Use -vv or set logging verbosity to EXTRA_VERBOSE to display locations of NaNs", mode=LogMode.ONCE)
         G_LOGGER.extra_verbose("Note: NaNs at:\n{:}".format(nans))
         return False
     return True
Beispiel #8
0
 def is_finite(output):
     non_finite = np.logical_not(np.isfinite(output))
     if np.any(non_finite):
         G_LOGGER.error("Inf Detected | One or more non-finite values were encountered in this output")
         G_LOGGER.info("Note: Use -vv or set logging verbosity to EXTRA_VERBOSE to display non-finite values", mode=LogMode.ONCE)
         G_LOGGER.extra_verbose("Note: non-finite values at:\n{:}".format(non_finite))
         G_LOGGER.extra_verbose("Note: non-finite values:\n{:}".format(output[non_finite]))
         return False
     return True
Beispiel #9
0
 def try_permute(arr, shape):
     try:
         perm = FormatManager.permutation(FormatManager.determine_format(arr.shape), FormatManager.determine_format(shape))
         G_LOGGER.verbose("Permuting shape: {:} using permutation {:}".format(arr.shape, perm))
         arr = np.transpose(arr, perm)
     except Exception as err:
         # FormatManager may not recognize the format or be able generate the permutation for the format combination
         G_LOGGER.extra_verbose("Skipping permutation due to {:}".format(err))
     return arr
Beispiel #10
0
 def __init__(self, arr):
     """
     Args:
         arr (np.ndarray): The NumPy array.
     """
     self.arr = None
     self.tmpfile = None
     if config.ARRAY_SWAP_THRESHOLD_MB >= 0 and arr.nbytes > (config.ARRAY_SWAP_THRESHOLD_MB << 20):
         self.tmpfile = tempfile.NamedTemporaryFile(mode="w+", suffix=".json")
         G_LOGGER.extra_verbose("Evicting large array ({:.3f} MiB) from memory and saving to {:}".format(
                                     arr.nbytes / (1024.0 ** 2), self.tmpfile.name))
         save_json(arr, self.tmpfile)
     else:
         self.arr = arr
Beispiel #11
0
    def call_impl(self):
        """
        Returns:
            tf.Session: The TensorFlow session.
        """
        config, _ = util.invoke_if_callable(self.config)
        (graph, output_names), _ = util.invoke_if_callable(self.graph)

        with graph.as_default() as graph, tf.compat.v1.Session(
                graph=graph, config=config).as_default() as sess:
            G_LOGGER.verbose(
                "Using TensorFlow outputs: {:}".format(output_names))
            G_LOGGER.extra_verbose(
                "Initializing variables in TensorFlow Graph")
            sess.run(tf.compat.v1.initializers.global_variables())
            return sess, output_names
Beispiel #12
0
        def make_buffers(engine):
            """
            Creates empty host and device buffers for the specified engine.
            Always uses binding names from Profile 0.
            """
            device_buffers = OrderedDict()
            host_output_buffers = OrderedDict()

            for idx in range(trt_util.get_bindings_per_profile(engine)):
                binding = engine[idx]
                dtype = trt_util.np_dtype_from_trt(engine.get_binding_dtype(binding))
                device_buffers[binding] = cuda.DeviceArray(dtype=dtype)
                if not engine.binding_is_input(binding):
                    host_output_buffers[binding] = np.empty(shape=tuple(), dtype=dtype)
            G_LOGGER.extra_verbose("Created device buffers: {:}".format(device_buffers))
            return device_buffers, host_output_buffers
Beispiel #13
0
        def execute_runner(runner, loader_cache):
            with runner as active_runner:
                input_metadata = active_runner.get_input_metadata()
                G_LOGGER.info("{:35}\n---- Model Input(s) ----\n{:}".format(active_runner.name, input_metadata),
                              mode=LogMode.ONCE)

                # DataLoaderCache will ensure that the feed_dict does not contain any extra entries
                # based on the provided input_metadata.
                loader_cache.set_input_metadata(input_metadata)

                if warm_up:
                    G_LOGGER.start("{:35} | Running {:} warm-up run(s)".format(active_runner.name, warm_up))
                    try:
                        feed_dict = loader_cache[0]
                    except IndexError:
                        G_LOGGER.warning("{:} warm-up run(s) were requested, but data loader did not supply any data. "
                                         "Skipping warm-up run(s)".format(warm_up))
                    else:
                        G_LOGGER.ultra_verbose("Warm-up Input Buffers:\n{:}".format(util.indent_block(feed_dict)))
                        # First do a few warm-up runs, and don't time them.
                        for _ in range(warm_up):
                            active_runner.infer(feed_dict=feed_dict)
                    G_LOGGER.finish("{:35} | Finished {:} warm-up run(s)".format(active_runner.name, warm_up))

                # Then, actual iterations.
                index = 0
                iteration_results = []

                total_runtime = 0
                for index, feed_dict in enumerate(loader_cache):
                    G_LOGGER.extra_verbose(lambda: "{:35} | Feeding inputs:\n{:}".format(active_runner.name, util.indent_block(feed_dict)))
                    outputs = active_runner.infer(feed_dict=feed_dict)

                    runtime = active_runner.last_inference_time()
                    total_runtime += runtime
                    # Without a deep copy here, outputs will always reference the output of the last run
                    iteration_results.append(IterationResult(outputs=copy.deepcopy(outputs), runtime=runtime, runner_name=active_runner.name))

                    G_LOGGER.info(lambda: "{:35}\n---- Model Output(s) ----\n{:}".format(
                                            active_runner.name, TensorMetadata().from_feed_dict(outputs)),
                                  mode=LogMode.ONCE)
                    G_LOGGER.extra_verbose(lambda: "{:35} | Inference Time: {:.3f} ms | Received outputs:\n{:}".format(
                                                        active_runner.name, runtime * 1000.0, util.indent_block(outputs)))

                total_runtime_ms = total_runtime * 1000.0
                G_LOGGER.finish("{:35} | Completed {:} iteration(s) in {:.4g} ms | Average inference time: {:.4g} ms.".format(active_runner.name, index + 1, total_runtime_ms, total_runtime_ms / float(index + 1)))
                return iteration_results
Beispiel #14
0
    def call_impl(self, builder, network):
        """
        Args:
            builder (trt.Builder):
                    The TensorRT builder to use to create the configuration.
            network (trt.INetworkDefinition):
                    The TensorRT network for which to create the config. The network is used to
                    automatically create a default optimization profile if none are provided.

        Returns:
            trt.IBuilderConfig: The TensorRT builder configuration.
        """
        with util.FreeOnException([builder.create_builder_config()]) as (config, ):
            def try_run(func, name):
                try:
                    return func()
                except AttributeError:
                    trt_util.fail_unavailable("{:} in CreateConfig".format(name))


            def try_set_flag(flag_name):
                return try_run(lambda: config.set_flag(getattr(trt.BuilderFlag, flag_name)), flag_name.lower())


            with G_LOGGER.indent():
                G_LOGGER.verbose("Setting TensorRT Optimization Profiles")
                profiles = copy.deepcopy(self.profiles)
                for profile in profiles:
                    # Last trt_profile is used for set_calibration_profile.
                    trt_profile = profile.fill_defaults(network).to_trt(builder, network)
                    config.add_optimization_profile(trt_profile)
                G_LOGGER.info("Configuring with profiles: {:}".format(profiles))

            config.max_workspace_size = int(self.max_workspace_size)

            if self.strict_types:
                try_set_flag("STRICT_TYPES")

            if self.tf32:
                try_set_flag("TF32")
            else: # TF32 is on by default
                with contextlib.suppress(AttributeError):
                    config.clear_flag(trt.BuilderFlag.TF32)

            if self.fp16:
                try_set_flag("FP16")

            if self.int8:
                try_set_flag("INT8")
                if not network.has_explicit_precision:
                    if self.calibrator is not None:
                        input_metadata = trt_util.get_input_metadata_from_profile(trt_profile, network)
                        with contextlib.suppress(AttributeError): # Polygraphy calibrator has a reset method
                            self.calibrator.reset(input_metadata)
                        config.int8_calibrator = self.calibrator
                        try:
                            config.set_calibration_profile(trt_profile)
                        except:
                            G_LOGGER.extra_verbose("Cannot set calibration profile on TensorRT 7.0 and older.")
                    else:
                        G_LOGGER.warning("Network does not have explicit precision and no calibrator was provided. Please ensure "
                                         "that tensors in the network have dynamic ranges set, or provide a calibrator in order to use int8 mode.")

            if self.sparse_weights:
                try_set_flag("SPARSE_WEIGHTS")

            if self.tactic_sources is not None:
                tactic_sources_flag = 0
                for source in self.tactic_sources:
                    tactic_sources_flag |= (1 << int(source))
                try_run(lambda: config.set_tactic_sources(tactic_sources_flag), name="tactic_sources")

            try:
                if self.timing_cache_path:
                    timing_cache_data = util.load_file(self.timing_cache_path, description="tactic timing cache")
                    cache = config.create_timing_cache(timing_cache_data)
                else:
                    # Create an empty timing cache by default so it will be populated during engine build.
                    # This way, consumers of CreateConfig have the option to use the cache later.
                    cache = config.create_timing_cache(b"")
            except AttributeError:
                if self.timing_cache_path:
                    trt_util.fail_unavailable("load_timing_cache in CreateConfig")
            else:
                config.set_timing_cache(cache, ignore_mismatch=False)

            if self.algorithm_selector is not None:
                def set_algo_selector():
                    config.algorithm_selector = self.algorithm_selector
                try_run(set_algo_selector, "algorithm_selector")

            return config
Beispiel #15
0
    def run(
        runners,
        data_loader=None,
        warm_up=None,
        use_subprocess=None,
        subprocess_timeout=None,
        subprocess_polling_interval=None,
        save_inputs_path=None,
    ):
        """
        Runs the supplied runners sequentially.

        Args:
            runners (List[BaseRunner]):
                    A list of runners to run.
            data_loader (Generator -> OrderedDict[str, numpy.ndarray]):
                    A generator or iterable that yields a dictionary that maps input names to input numpy buffers.
                    In the simplest case, this can be a `List[Dict[str, numpy.ndarray]]` .

                    In case you don't know details about the inputs ahead of time, you can access the
                    `input_metadata` property in your data loader, which will be set to an `TensorMetadata`
                    instance by this function.
                    Note that this does not work for generators or lists.

                    The number of iterations run by this function is controlled by the number of items supplied
                    by the data loader.

                    Defaults to an instance of `DataLoader`.
            warm_up (int):
                    The number of warm up runs to perform for each runner before timing.
                    Defaults to 0.
            use_subprocess (bool):
                    Whether each runner should be run in a subprocess. This allows each runner to have exclusive
                    access to the GPU. When using a subprocess, runners and loaders will never be modified.
            subprocess_timeout (int):
                    The timeout before a subprocess is killed automatically. This is useful for handling processes
                    that never terminate. A value of None disables the timeout. Defaults to None.
            subprocess_polling_interval (int):
                    The polling interval, in seconds, for checking whether a subprocess has completed or crashed.
                    In rare cases, omitting this parameter when subprocesses are enabled may cause this function
                    to hang indefinitely if the subprocess crashes.
                    A value of 0 disables polling. Defaults to 30 seconds.
            save_inputs_path (str):
                    [EXPERIMENTAL] Path at which to save inputs used during inference. This will include all inputs generated by
                    the provided data_loader, and will be saved as a JSON List[Dict[str, numpy.ndarray]].

        Returns:
            RunResults:
                    A mapping of runner names to the results of their inference.
                    The ordering of `runners` is preserved in this mapping.
        """
        warm_up = util.default(warm_up, 0)
        data_loader = util.default(data_loader, DataLoader())
        use_subprocess = util.default(use_subprocess, False)
        subprocess_polling_interval = util.default(subprocess_polling_interval,
                                                   30)
        loader_cache = DataLoaderCache(data_loader,
                                       save_inputs_path=save_inputs_path)

        def execute_runner(runner, loader_cache):
            with runner as active_runner:
                # DataLoaderCache will ensure that the feed_dict does not contain any extra entries
                # based on the provided input_metadata.
                loader_cache.set_input_metadata(
                    active_runner.get_input_metadata())

                if warm_up:
                    G_LOGGER.start("{:35} | Running {:} warm-up run(s)".format(
                        active_runner.name, warm_up))
                    try:
                        feed_dict = loader_cache[0]
                    except IndexError:
                        G_LOGGER.warning(
                            "{:} warm-up run(s) were requested, but data loader did not supply any data. "
                            "Skipping warm-up run(s)".format(warm_up))
                    else:
                        G_LOGGER.ultra_verbose(
                            "Warm-up Input Buffers:\n{:}".format(
                                util.indent_block(feed_dict)))
                        # First do a few warm-up runs, and don't time them.
                        for _ in range(warm_up):
                            active_runner.infer(feed_dict=feed_dict)
                    G_LOGGER.finish(
                        "{:35} | Finished {:} warm-up run(s)".format(
                            active_runner.name, warm_up))

                # Then, actual iterations.
                index = 0
                iteration_results = []

                total_runtime = 0
                for index, feed_dict in enumerate(loader_cache):
                    G_LOGGER.info(
                        "{:35}\n---- Inference Input(s) ----\n{:}".format(
                            active_runner.name,
                            TensorMetadata().from_feed_dict(feed_dict)),
                        mode=LogMode.ONCE,
                    )

                    G_LOGGER.extra_verbose(
                        lambda: "{:35} | Feeding inputs:\n{:}".format(
                            active_runner.name, util.indent_block(feed_dict)))
                    outputs = active_runner.infer(feed_dict=feed_dict)

                    runtime = active_runner.last_inference_time()
                    total_runtime += runtime
                    # Without a deep copy here, outputs will always reference the output of the last run
                    iteration_results.append(
                        IterationResult(outputs=copy.deepcopy(outputs),
                                        runtime=runtime,
                                        runner_name=active_runner.name))

                    G_LOGGER.info(
                        "{:35}\n---- Inference Output(s) ----\n{:}".format(
                            active_runner.name,
                            TensorMetadata().from_feed_dict(outputs)),
                        mode=LogMode.ONCE,
                    )
                    G_LOGGER.extra_verbose(
                        lambda:
                        "{:35} | Inference Time: {:.3f} ms | Received outputs:\n{:}"
                        .format(active_runner.name, runtime * 1000.0,
                                util.indent_block(outputs)))

                total_runtime_ms = total_runtime * 1000.0
                G_LOGGER.finish(
                    "{:35} | Completed {:} iteration(s) in {:.4g} ms | Average inference time: {:.4g} ms."
                    .format(active_runner.name, index + 1, total_runtime_ms,
                            total_runtime_ms / float(index + 1)))
                return iteration_results

        # Wraps execute_runner to use a queue.
        def execute_runner_with_queue(runner_queue, runner, loader_cache):
            iteration_results = None
            try:
                iteration_results = execute_runner(runner, loader_cache)
            except:
                # Cannot necessarily send the exception back over the queue.
                G_LOGGER.backrace()
            util.try_send_on_queue(runner_queue, iteration_results)
            # After finishing, send the updated loader_cache back.
            util.try_send_on_queue(runner_queue, loader_cache)

        # Do all inferences in one loop, then comparisons at a later stage.
        # We run each runner in a separate process so that we can provide exclusive GPU access for each runner.
        run_results = RunResults()

        if not runners:
            G_LOGGER.warning(
                "No runners were provided to Comparator.run(). Inference will not be run, and run results will be empty."
            )

        for runner in runners:
            G_LOGGER.start("{:35} | Activating and starting inference".format(
                runner.name))
            if use_subprocess:
                runner_queue = Queue()
                process = Process(target=execute_runner_with_queue,
                                  args=(runner_queue, runner, loader_cache))
                process.start()

                # If a subprocess hangs in a certain way, then process.join could block forever. Hence,
                # we need to keep polling the process to make sure it really is alive.
                iteration_results = None
                while process.is_alive() and iteration_results is None:
                    try:
                        iteration_results = util.try_receive_on_queue(
                            runner_queue,
                            timeout=subprocess_polling_interval / 2)
                        # Receive updated loader cache, or fall back if it could not be sent.
                        loader_cache = util.try_receive_on_queue(
                            runner_queue,
                            timeout=subprocess_polling_interval / 2)
                    except queue.Empty:
                        G_LOGGER.extra_verbose(
                            "Polled subprocess - still running")

                try:
                    assert iteration_results is not None
                    run_results.append((runner.name, iteration_results))
                    process.join(subprocess_timeout)
                except:
                    G_LOGGER.critical(
                        "{:35} | Terminated prematurely. Check the exception logged above. "
                        "If there is no exception logged above, make sure not to use the --use-subprocess "
                        "flag or set use_subprocess=False in Comparator.run()."
                        .format(runner.name))
                finally:
                    process.terminate()

                if loader_cache is None:
                    G_LOGGER.critical(
                        "Could not send data loader cache to runner subprocess. Please try disabling subprocesses "
                        "by removing the --use-subprocess flag, or setting use_subprocess=False in Comparator.run()"
                    )
            else:
                run_results.append(
                    (runner.name, execute_runner(runner, loader_cache)))

        G_LOGGER.verbose("Successfully ran: {:}".format(
            [r.name for r in runners]))
        return run_results
Beispiel #16
0
    def compare_accuracy(run_results,
                         fail_fast=False,
                         comparisons=None,
                         compare_func=None):
        """
        Args:
            run_results (RunResults): The result of Comparator.run()


            fail_fast (bool): Whether to exit after the first failure
            comparisons (List[Tuple[int, int]]):
                    Comparisons to perform, specified by runner indexes. For example, [(0, 1), (1, 2)]
                    would compare the first runner with the second, and the second with the third.
                    By default, this compares each result to the subsequent one.
            compare_func (Callable(IterationResult, IterationResult) -> OrderedDict[str, bool]):
                    A function that takes in two IterationResults, and returns a dictionary that maps output
                    names to a boolean (or anything convertible to a boolean) indicating whether outputs matched.
                    The order of arguments to this function is guaranteed to be the same as the ordering of the
                    tuples contained in `comparisons`.

        Returns:
            AccuracyResult:
                    A summary of the results of the comparisons. The order of the keys (i.e. runner pairs) is
                    guaranteed to be the same as the order of `comparisons`. For more details, see the AccuracyResult
                    docstring (e.g. help(AccuracyResult)).
        """
        def find_mismatched(match_dict):
            return [
                name for name, matched in match_dict.items()
                if not bool(matched)
            ]

        compare_func = util.default(compare_func, CompareFunc.simple())
        comparisons = util.default(comparisons,
                                   Comparator.default_comparisons(run_results))

        accuracy_result = AccuracyResult()
        for runner0_index, runner1_index in comparisons:
            (runner0_name, results0), (
                runner1_name, results1
            ) = run_results[runner0_index], run_results[runner1_index]

            G_LOGGER.start("Accuracy Comparison | {:} vs. {:}".format(
                runner0_name, runner1_name))
            with G_LOGGER.indent():
                runner_pair = (runner0_name, runner1_name)
                accuracy_result[runner_pair] = []

                num_iters = min(len(results0), len(results1))
                for iteration, (result0,
                                result1) in enumerate(zip(results0, results1)):
                    if num_iters > 1:
                        G_LOGGER.info("Iteration: {:}".format(iteration))
                    with contextlib.ExitStack() as stack:
                        if num_iters > 1:
                            stack.enter_context(G_LOGGER.indent())
                        iteration_match_dict = compare_func(result0, result1)
                        accuracy_result[runner_pair].append(
                            iteration_match_dict)

                        mismatched_outputs = find_mismatched(
                            iteration_match_dict)
                        if fail_fast and mismatched_outputs:
                            return accuracy_result

                G_LOGGER.extra_verbose(
                    "Finished comparing {:} with {:}".format(
                        runner0_name,
                        runner1_name,
                    ))

                passed, _, total = accuracy_result.stats(runner_pair)
                pass_rate = accuracy_result.percentage(runner_pair) * 100.0
                if num_iters > 1 or len(comparisons) > 1:
                    msg = "Accuracy Summary | {:} vs. {:} | Passed: {:}/{:} iterations | Pass Rate: {:}%".format(
                        runner0_name, runner1_name, passed, total, pass_rate)
                    if passed == total:
                        G_LOGGER.finish(msg)
                    else:
                        G_LOGGER.error(msg)
        return accuracy_result
Beispiel #17
0
def get_output_metadata(graph, layerwise=False):
    graphdef = graph.as_graph_def()

    node_output_map = map_node_outputs(graphdef)

    def is_output_node(node):
        # Make sure that we're not using hanging nodes as outputs - must have at least one input.
        if len(node_output_map[node.name]) != 0 or len(node.input) == 0:
            return False

        # Tensors with no shape cannot be outputs and TensorFlow doesn't like certain ops as outputs.
        EXCLUDE_OPS = [
            "Switch",
            "FusedBatchNorm",
            "Assert",
            "NextIteration",
            "Enter",
            "LoopCond",
            "Exit",
            "Print",
            "Assign",
            "NoOp",
            "ReadVariableOp",
            "VarIsInitializedOp",
            "Const",
        ]

        # Additionally, we sometimes need to exclude entire namespaces e.g. while loops.
        EXCLUDE_NAMESPACES = ["while", "Assert"]

        if any([ex_op in node.op for ex_op in EXCLUDE_OPS]) or any(
            [ns in node.name for ns in EXCLUDE_NAMESPACES]):
            G_LOGGER.extra_verbose(
                "Excluding {:}, op {:} is not a valid output op or is part of an excluded namespace "
                "(Note: excluded namespaces: {:})".format(
                    node.name, node.op, EXCLUDE_NAMESPACES))
            return False

        return True

    # For layerwise mode, every layer becomes an output.
    if layerwise:
        output_nodes = list(graphdef.node)
        G_LOGGER.verbose(
            "Running in layerwise mode. Marking {:} layers as potential outputs"
            .format(len(output_nodes)))
    else:
        output_nodes = [node for node in graphdef.node if is_output_node(node)]
    G_LOGGER.extra_verbose(
        "Found likely output nodes: {:}".format(output_nodes))

    output_tensors = []
    for node in output_nodes:

        tensor_name = node.name + ":0"
        try:
            tensor = graph.get_tensor_by_name(tensor_name)
            output_tensors.append(tensor)
        except KeyError:
            G_LOGGER.warning(
                "Could not import: {:}. Skipping.".format(tensor_name))
    if len(output_tensors) != len(output_nodes):
        G_LOGGER.warning(
            "Excluded {:} ops that don't seem like outputs. Use -vv/--super-verbose, or set "
            "logging verbosity to EXTRA_VERBOSE to view them.".format(
                len(output_nodes) - len(output_tensors)))

    G_LOGGER.extra_verbose("Found output op types in graph: {:}".format(
        {tensor.op.type
         for tensor in output_tensors}))
    G_LOGGER.verbose(
        "Retrieved TensorFlow output_tensors: {:}".format(output_tensors))
    return get_tensor_metadata(output_tensors)
Beispiel #18
0
        def compare_output(iter_result0, iter_result1):
            """
            Compare the outputs of two runners from a single iteration.

            This function will always iterate over the output names of the first IterationResult,
                and attempt to find corresponding output names in the second.
            If no corresponding output name is found, the output is skipped.
            If all output names are skipped, then this function raises an error.

            Args:
                iter_result0 (IterationResult): The result of the first runner.
                iter_result1 (IterationResult): The result of the second runner.

            Returns:
                OrderedDict[str, OutputCompareResult]:
                        The name of the outputs compared, derived from the first IterationResult,
                        and whether they matched. If an output name is not found, it is omitted from this dictionary.

            Raises:
                PolygraphyException: If all output names are skipped, and thus no outputs are compared.
            """
            def check_dict(dct, dict_name):
                if isinstance(dct, dict):
                    util.check_dict_contains(dct, set(iter_result0.keys()) | set(iter_result1.keys()) | set([""]),
                                             check_missing=False, dict_name=dict_name)


            check_dict(rtol, "the rtol dictionary")
            check_dict(atol, "the atol dictionary")
            check_dict(check_error_stat, "the chcek_error_stat dictionary")


            # Returns whether the outputs match
            def check_outputs_match(out0, out0_name, out1, out1_name, per_out_rtol, per_out_atol, per_out_err_stat):
                VALID_CHECK_ERROR_STATS = ["max", "mean", "median", "elemwise"]
                if per_out_err_stat not in VALID_CHECK_ERROR_STATS:
                    G_LOGGER.critical("Invalid choice for check_error_stat: {:}.\n"
                                      "Note: Valid choices are: {:}".format(per_out_err_stat, VALID_CHECK_ERROR_STATS))

                G_LOGGER.super_verbose("{:35} | Output: {:} (dtype={:}, shape={:}):\n{:}".format(
                                            iter_result0.runner_name, out0_name, out0.dtype, out0.shape, util.indent_block(out0)))
                G_LOGGER.super_verbose("{:35} | Output: {:} (dtype={:}, shape={:}):\n{:}".format(
                                            iter_result1.runner_name, out1_name, out1.dtype, out1.shape, util.indent_block(out1)))

                # Check difference vs. tolerances
                if np.issubdtype(out0.dtype, np.bool_) and np.issubdtype(out1.dtype, np.bool_):
                    absdiff = np.logical_xor(out0, out1)
                else:
                    absdiff = np.abs(out0 - out1)

                absout1 = np.abs(out1)
                with np.testing.suppress_warnings() as sup:
                    sup.filter(RuntimeWarning)
                    reldiff = absdiff / absout1

                max_absdiff = comp_util.compute_max(absdiff)
                mean_absdiff = comp_util.compute_mean(absdiff)
                median_absdiff = comp_util.compute_median(absdiff)
                max_reldiff = comp_util.compute_max(reldiff)
                mean_reldiff = comp_util.compute_mean(reldiff)
                median_reldiff = comp_util.compute_median(reldiff)

                max_elemwiseabs = "Unknown"
                max_elemwiserel = "Unknown"

                if per_out_err_stat == "mean":
                    failed = mean_absdiff > per_out_atol and (np.isnan(mean_reldiff) or mean_reldiff > per_out_rtol)
                elif per_out_err_stat == "median":
                    failed = median_absdiff > per_out_atol and (np.isnan(median_reldiff) or median_reldiff > per_out_rtol)
                elif per_out_err_stat == "max":
                    failed = max_absdiff > per_out_atol and (np.isnan(max_reldiff) or max_reldiff > per_out_rtol)
                else:
                    assert per_out_err_stat == "elemwise", "This branch should be unreachable unless per_out_err_stat is 'elemwise'"
                    mismatches = (absdiff > per_out_atol) & (reldiff > per_out_rtol)

                    failed = np.any(mismatches)
                    try:
                        # Special because we need to account for tolerances too.
                        max_elemwiseabs = comp_util.compute_max(absdiff[mismatches])
                        max_elemwiserel = comp_util.compute_max(reldiff[mismatches])

                        with G_LOGGER.indent():
                            G_LOGGER.super_verbose("Mismatched indices:\n{:}".format(np.argwhere(mismatches)))
                            G_LOGGER.extra_verbose("{:35} | Mismatched values:\n{:}".format(iter_result0.runner_name, out0[mismatches]))
                            G_LOGGER.extra_verbose("{:35} | Mismatched values:\n{:}".format(iter_result1.runner_name, out1[mismatches]))
                    except Exception as err:
                        G_LOGGER.warning("Failing to log mismatches.\nNote: Error was: {:}".format(err))

                # Log information about the outputs
                hist_bin_range = (min(comp_util.compute_min(out0), comp_util.compute_min(out1)),
                                  max(comp_util.compute_max(out0), comp_util.compute_max(out1)))
                comp_util.log_output_stats(out0, failed, iter_result0.runner_name + ": " + out0_name, hist_range=hist_bin_range)
                comp_util.log_output_stats(out1, failed, iter_result1.runner_name + ": " + out1_name, hist_range=hist_bin_range)

                G_LOGGER.info("Error Metrics: {:}".format(out0_name))
                with G_LOGGER.indent():
                    def req_tol(mean_diff, median_diff, max_diff, elemwise_diff):
                        return {
                            "mean": mean_diff,
                            "median": median_diff,
                            "max": max_diff,
                            "elemwise": elemwise_diff,
                        }[per_out_err_stat]

                    G_LOGGER.info("Minimum Required Tolerance: {:} error | [abs={:.5g}] OR [rel={:.5g}]".format(
                                    per_out_err_stat,
                                    req_tol(mean_absdiff, median_absdiff, max_absdiff, max_elemwiseabs),
                                    req_tol(mean_reldiff, median_reldiff, max_reldiff, max_elemwiserel)))
                    comp_util.log_output_stats(absdiff, failed, "Absolute Difference")
                    comp_util.log_output_stats(reldiff, failed, "Relative Difference")

                # Finally show summary.
                if failed:
                    G_LOGGER.error("FAILED | Difference exceeds tolerance (rel={:}, abs={:})".format(per_out_rtol, per_out_atol))
                else:
                    G_LOGGER.finish("PASSED | Difference is within tolerance (rel={:}, abs={:})".format(per_out_rtol, per_out_atol))

                G_LOGGER.extra_verbose("Finished comparing: '{:}' (dtype={:}, shape={:}) [{:}] and '{:}' (dtype={:}, shape={:}) [{:}]"
                                .format(out0_name, out0.dtype, out0.shape, iter_result0.runner_name, out1_name, out1.dtype, out1.shape, iter_result1.runner_name))
                return OutputCompareResult(not failed, max_absdiff, max_reldiff, mean_absdiff, mean_reldiff, median_absdiff, median_reldiff)
                #
                # End: def check_outputs_match
                #

            output_status = OrderedDict() # OrderedDict[str, bool] Maps output names to whether they matched.

            if not check_shapes:
                G_LOGGER.info("Strict shape checking disabled. Will attempt to match output shapes before comparisons")


            def default_find_output_func(output_name, index, iter_result):
                found_name = util.find_in_dict(output_name, iter_result, index)
                if found_name is None:
                    return None
                elif found_name != output_name:
                    exact_match = util.find_in_dict(found_name, iter_result0)
                    if exact_match == found_name:
                        G_LOGGER.verbose("Will not compare {:} with {:}, since the former already has an exact match: {:}".format(
                                            found_name, output_name, exact_match))
                        return None # If the found output is being compared against another output already, skip this non-exact match
                    G_LOGGER.warning("Output names did not match exactly. Assuming {:} output: {:} "
                                    "corresponds to output: {:}".format(
                                        iter_result.runner_name, found_name, output_name))
                return [found_name]


            nonlocal find_output_func
            find_output_func = util.default(find_output_func, default_find_output_func)

            for index, (out0_name, output0) in enumerate(iter_result0.items()):
                out1_names = util.default(find_output_func(out0_name, index, iter_result1), [])

                if len(out1_names) > 1:
                    G_LOGGER.info("Will attempt to compare output: '{:}' [{:}] with multiple outputs: '{:}' [{:}]".format(
                                    out0_name, iter_result0.runner_name, list(out1_names), iter_result1.runner_name))

                for out1_name in out1_names:
                    if out1_name is None or out1_name not in iter_result1:
                        G_LOGGER.warning("For output: '{:}' [{:}], skipping corresponding output: '{:}' [{:}], "
                                         "since the output was not found".format(out0_name, iter_result0.runner_name,
                                                                                 out1_name, iter_result1.runner_name))
                        continue


                    def get_tol(tol_dict, default):
                        if isinstance(tol_dict, numbers.Number):
                            return tol_dict

                        if out0_name in tol_dict:
                            return tol_dict[out0_name]
                        elif "" in tol_dict:
                            return tol_dict[""]
                        return default


                    def get_error_stat():
                        if isinstance(check_error_stat, str):
                            return check_error_stat

                        if out0_name in check_error_stat:
                            return check_error_stat[out0_name]
                        elif "" in check_error_stat:
                            return  check_error_stat[""]
                        return default_error_stat


                    per_out_atol = get_tol(atol, default_atol)
                    per_out_rtol = get_tol(rtol, default_rtol)
                    per_out_err_stat = get_error_stat()

                    output1 = iter_result1[out1_name]
                    G_LOGGER.start("Comparing Output: '{:}' (dtype={:}, shape={:}) with '{:}' (dtype={:}, shape={:}) | "
                                   "Tolerance: [abs={:.5g}, rel={:.5g}] | Checking {:} error".format(
                                        out0_name, output0.dtype, output0.shape,
                                        out1_name, output1.dtype, output1.shape,
                                        per_out_atol, per_out_rtol, per_out_err_stat))
                    G_LOGGER.extra_verbose("Note: Comparing {:} vs. {:}".format(iter_result0.runner_name, iter_result1.runner_name))


                    with G_LOGGER.indent():
                        if check_shapes and output0.shape != output1.shape:
                            G_LOGGER.error("Will not compare outputs of different shapes. Note: Output shapes are "
                                           "{:} and {:}.".format(output0.shape, output1.shape))
                            G_LOGGER.error("Note: Use --no-strict-shape-checking or set check_shapes=False to "
                                           "attempt to compare values anyway.", mode=LogMode.ONCE)
                            outputs_match = False
                        else:
                            output1 = util.try_match_shape(output1, output0.shape)
                            output0 = output0.reshape(output1.shape)
                            outputs_match = check_outputs_match(output0, out0_name, output1, out1_name,
                                                                per_out_rtol=per_out_rtol, per_out_atol=per_out_atol,
                                                                per_out_err_stat=per_out_err_stat)

                        output_status[out0_name] = outputs_match
                        if fail_fast and not outputs_match:
                            return output_status


            mismatched_output_names = [name for name, matched in output_status.items() if not matched]
            if mismatched_output_names:
                G_LOGGER.error("FAILED | Mismatched outputs: {:}".format(mismatched_output_names))
            else:
                G_LOGGER.finish("PASSED | All outputs matched | Outputs: {:}".format(list(output_status.keys())))

            # This is useful for catching cases were Polygraphy does something wrong with the runner output buffers
            if not output_status and (bool(iter_result0.keys()) or bool(iter_result1.keys())):
                r0_name = iter_result0.runner_name
                r0_outs = list(iter_result0.keys())
                r1_name = iter_result1.runner_name
                r1_outs = list(iter_result1.keys())
                G_LOGGER.critical("All outputs were skipped, no common outputs found! Note:\n{:} outputs: "
                                  "{:}\n{:} outputs: {:}".format(r0_name, r0_outs, r1_name, r1_outs))

            return output_status
Beispiel #19
0
            def check_outputs_match(out0, out0_name, out1, out1_name, per_out_rtol, per_out_atol, per_out_err_stat):
                VALID_CHECK_ERROR_STATS = ["max", "mean", "median", "elemwise"]
                if per_out_err_stat not in VALID_CHECK_ERROR_STATS:
                    G_LOGGER.critical("Invalid choice for check_error_stat: {:}.\n"
                                      "Note: Valid choices are: {:}".format(per_out_err_stat, VALID_CHECK_ERROR_STATS))

                G_LOGGER.super_verbose("{:35} | Output: {:} (dtype={:}, shape={:}):\n{:}".format(
                                            iter_result0.runner_name, out0_name, out0.dtype, out0.shape, util.indent_block(out0)))
                G_LOGGER.super_verbose("{:35} | Output: {:} (dtype={:}, shape={:}):\n{:}".format(
                                            iter_result1.runner_name, out1_name, out1.dtype, out1.shape, util.indent_block(out1)))

                # Check difference vs. tolerances
                if np.issubdtype(out0.dtype, np.bool_) and np.issubdtype(out1.dtype, np.bool_):
                    absdiff = np.logical_xor(out0, out1)
                else:
                    absdiff = np.abs(out0 - out1)

                absout1 = np.abs(out1)
                with np.testing.suppress_warnings() as sup:
                    sup.filter(RuntimeWarning)
                    reldiff = absdiff / absout1

                max_absdiff = comp_util.compute_max(absdiff)
                mean_absdiff = comp_util.compute_mean(absdiff)
                median_absdiff = comp_util.compute_median(absdiff)
                max_reldiff = comp_util.compute_max(reldiff)
                mean_reldiff = comp_util.compute_mean(reldiff)
                median_reldiff = comp_util.compute_median(reldiff)

                max_elemwiseabs = "Unknown"
                max_elemwiserel = "Unknown"

                if per_out_err_stat == "mean":
                    failed = mean_absdiff > per_out_atol and (np.isnan(mean_reldiff) or mean_reldiff > per_out_rtol)
                elif per_out_err_stat == "median":
                    failed = median_absdiff > per_out_atol and (np.isnan(median_reldiff) or median_reldiff > per_out_rtol)
                elif per_out_err_stat == "max":
                    failed = max_absdiff > per_out_atol and (np.isnan(max_reldiff) or max_reldiff > per_out_rtol)
                else:
                    assert per_out_err_stat == "elemwise", "This branch should be unreachable unless per_out_err_stat is 'elemwise'"
                    mismatches = (absdiff > per_out_atol) & (reldiff > per_out_rtol)

                    failed = np.any(mismatches)
                    try:
                        # Special because we need to account for tolerances too.
                        max_elemwiseabs = comp_util.compute_max(absdiff[mismatches])
                        max_elemwiserel = comp_util.compute_max(reldiff[mismatches])

                        with G_LOGGER.indent():
                            G_LOGGER.super_verbose("Mismatched indices:\n{:}".format(np.argwhere(mismatches)))
                            G_LOGGER.extra_verbose("{:35} | Mismatched values:\n{:}".format(iter_result0.runner_name, out0[mismatches]))
                            G_LOGGER.extra_verbose("{:35} | Mismatched values:\n{:}".format(iter_result1.runner_name, out1[mismatches]))
                    except Exception as err:
                        G_LOGGER.warning("Failing to log mismatches.\nNote: Error was: {:}".format(err))

                # Log information about the outputs
                hist_bin_range = (min(comp_util.compute_min(out0), comp_util.compute_min(out1)),
                                  max(comp_util.compute_max(out0), comp_util.compute_max(out1)))
                comp_util.log_output_stats(out0, failed, iter_result0.runner_name + ": " + out0_name, hist_range=hist_bin_range)
                comp_util.log_output_stats(out1, failed, iter_result1.runner_name + ": " + out1_name, hist_range=hist_bin_range)

                G_LOGGER.info("Error Metrics: {:}".format(out0_name))
                with G_LOGGER.indent():
                    def req_tol(mean_diff, median_diff, max_diff, elemwise_diff):
                        return {
                            "mean": mean_diff,
                            "median": median_diff,
                            "max": max_diff,
                            "elemwise": elemwise_diff,
                        }[per_out_err_stat]

                    G_LOGGER.info("Minimum Required Tolerance: {:} error | [abs={:.5g}] OR [rel={:.5g}]".format(
                                    per_out_err_stat,
                                    req_tol(mean_absdiff, median_absdiff, max_absdiff, max_elemwiseabs),
                                    req_tol(mean_reldiff, median_reldiff, max_reldiff, max_elemwiserel)))
                    comp_util.log_output_stats(absdiff, failed, "Absolute Difference")
                    comp_util.log_output_stats(reldiff, failed, "Relative Difference")

                # Finally show summary.
                if failed:
                    G_LOGGER.error("FAILED | Difference exceeds tolerance (rel={:}, abs={:})".format(per_out_rtol, per_out_atol))
                else:
                    G_LOGGER.finish("PASSED | Difference is within tolerance (rel={:}, abs={:})".format(per_out_rtol, per_out_atol))

                G_LOGGER.extra_verbose("Finished comparing: '{:}' (dtype={:}, shape={:}) [{:}] and '{:}' (dtype={:}, shape={:}) [{:}]"
                                .format(out0_name, out0.dtype, out0.shape, iter_result0.runner_name, out1_name, out1.dtype, out1.shape, iter_result1.runner_name))
                return OutputCompareResult(not failed, max_absdiff, max_reldiff, mean_absdiff, mean_reldiff, median_absdiff, median_reldiff)
Beispiel #20
0
        def compare_output(iter_result0, iter_result1):
            """
            Compare the outputs of two runners from a single iteration.

            This function will always iterate over the output names of the first IterationResult,
                and attempt to find corresponding output names in the second.
            If no corresponding output name is found, the output is skipped.
            If all output names are skipped, then this function raises an error.

            Args:
                iter_result0 (IterationResult): The result of the first runner.
                iter_result1 (IterationResult): The result of the second runner.

            Returns:
                OrderedDict[str, OutputCompareResult]:
                        The name of the outputs compared, derived from the first IterationResult,
                        and whether they matched. If an output name is not found, it is omitted from this dictionary.

            Raises:
                PolygraphyException: If all output names are skipped, and thus no outputs are compared.
            """
            def check_dict(dct, dict_name):
                if isinstance(dct, dict):
                    util.check_dict_contains(
                        dct,
                        set(iter_result0.keys()) | set(iter_result1.keys())
                        | {""},
                        check_missing=False,
                        dict_name=dict_name,
                    )

            check_dict(rtol, "the rtol dictionary")
            check_dict(atol, "the atol dictionary")
            check_dict(check_error_stat, "the check_error_stat dictionary")

            output_status = OrderedDict(
            )  # OrderedDict[str, bool] Maps output names to whether they matched.

            if not check_shapes:
                G_LOGGER.info(
                    "Strict shape checking disabled. Will attempt to match output shapes before comparisons"
                )

            def default_find_output_func(output_name, index, iter_result):
                found_name = util.find_in_dict(output_name, iter_result, index)
                if found_name is None:
                    return None
                elif found_name != output_name:
                    exact_match = util.find_in_dict(found_name, iter_result0)
                    if exact_match == found_name:
                        G_LOGGER.verbose(
                            "Will not compare {:} with {:}, since the former already has an exact match: {:}"
                            .format(found_name, output_name, exact_match))
                        return None  # If the found output is being compared against another output already, skip this non-exact match
                    G_LOGGER.warning(
                        "Output names did not match exactly. Assuming {:} output: {:} "
                        "corresponds to output: {:}".format(
                            iter_result.runner_name, found_name, output_name))
                return [found_name]

            nonlocal find_output_func
            find_output_func = util.default(find_output_func,
                                            default_find_output_func)

            for index, (out0_name, output0) in enumerate(iter_result0.items()):
                out1_names = util.default(
                    find_output_func(out0_name, index, iter_result1), [])

                if len(out1_names) > 1:
                    G_LOGGER.info(
                        "Will attempt to compare output: '{:}' [{:}] with multiple outputs: '{:}' [{:}]"
                        .format(out0_name, iter_result0.runner_name,
                                list(out1_names), iter_result1.runner_name))

                for out1_name in out1_names:
                    if out1_name is None or out1_name not in iter_result1:
                        G_LOGGER.warning(
                            "For output: '{:}' [{:}], skipping corresponding output: '{:}' [{:}], "
                            "since the output was not found".format(
                                out0_name, iter_result0.runner_name, out1_name,
                                iter_result1.runner_name))
                        continue

                    per_out_atol = util.value_or_from_dict(
                        atol, out0_name, default_atol)
                    per_out_rtol = util.value_or_from_dict(
                        rtol, out0_name, default_rtol)
                    per_out_err_stat = util.value_or_from_dict(
                        check_error_stat, out0_name, default_error_stat)

                    output1 = iter_result1[out1_name]
                    G_LOGGER.start(
                        "Comparing Output: '{:}' (dtype={:}, shape={:}) with '{:}' (dtype={:}, shape={:}) | "
                        "Tolerance: [abs={:.5g}, rel={:.5g}] | Checking {:} error"
                        .format(
                            out0_name,
                            output0.dtype,
                            output0.shape,
                            out1_name,
                            output1.dtype,
                            output1.shape,
                            per_out_atol,
                            per_out_rtol,
                            per_out_err_stat,
                        ))
                    G_LOGGER.extra_verbose(
                        "Note: Comparing {:} vs. {:}".format(
                            iter_result0.runner_name,
                            iter_result1.runner_name))

                    with G_LOGGER.indent():
                        if check_shapes and output0.shape != output1.shape:
                            G_LOGGER.error(
                                "Will not compare outputs of different shapes. Note: Output shapes are "
                                "{:} and {:}.".format(output0.shape,
                                                      output1.shape))
                            G_LOGGER.error(
                                "Note: Use --no-shape-check or set check_shapes=False to "
                                "attempt to compare values anyway.",
                                mode=LogMode.ONCE,
                            )
                            outputs_match = False
                        else:
                            output1 = util.try_match_shape(
                                output1, output0.shape)
                            output0 = output0.reshape(output1.shape)
                            outputs_match = check_outputs_match(
                                output0,
                                out0_name,
                                output1,
                                out1_name,
                                per_out_rtol=per_out_rtol,
                                per_out_atol=per_out_atol,
                                per_out_err_stat=per_out_err_stat,
                                runner0_name=iter_result0.runner_name,
                                runner1_name=iter_result1.runner_name,
                            )

                        output_status[out0_name] = outputs_match
                        if fail_fast and not outputs_match:
                            return output_status

            mismatched_output_names = [
                name for name, matched in output_status.items() if not matched
            ]
            if mismatched_output_names:
                G_LOGGER.error("FAILED | Mismatched outputs: {:}".format(
                    mismatched_output_names))
            else:
                G_LOGGER.finish(
                    "PASSED | All outputs matched | Outputs: {:}".format(
                        list(output_status.keys())))

            # This is useful for catching cases were Polygraphy does something wrong with the runner output buffers
            if not output_status and (bool(iter_result0.keys())
                                      or bool(iter_result1.keys())):
                r0_name = iter_result0.runner_name
                r0_outs = list(iter_result0.keys())
                r1_name = iter_result1.runner_name
                r1_outs = list(iter_result1.keys())
                G_LOGGER.critical(
                    "All outputs were skipped, no common outputs found! Note:\n{:} outputs: "
                    "{:}\n{:} outputs: {:}".format(r0_name, r0_outs, r1_name,
                                                   r1_outs))

            return output_status