def execute_runner(runner, loader_cache): with runner as active_runner: input_metadata = active_runner.get_input_metadata() G_LOGGER.info("{:35}\n---- Model Input(s) ----\n{:}".format(active_runner.name, input_metadata), mode=LogMode.ONCE) # DataLoaderCache will ensure that the feed_dict does not contain any extra entries # based on the provided input_metadata. loader_cache.set_input_metadata(input_metadata) if warm_up: G_LOGGER.start("{:35} | Running {:} warm-up run(s)".format(active_runner.name, warm_up)) try: feed_dict = loader_cache[0] except IndexError: G_LOGGER.warning("{:} warm-up run(s) were requested, but data loader did not supply any data. " "Skipping warm-up run(s)".format(warm_up)) else: G_LOGGER.ultra_verbose("Warm-up Input Buffers:\n{:}".format(util.indent_block(feed_dict))) # First do a few warm-up runs, and don't time them. for _ in range(warm_up): active_runner.infer(feed_dict=feed_dict) G_LOGGER.finish("{:35} | Finished {:} warm-up run(s)".format(active_runner.name, warm_up)) # Then, actual iterations. index = 0 iteration_results = [] total_runtime = 0 for index, feed_dict in enumerate(loader_cache): G_LOGGER.extra_verbose(lambda: "{:35} | Feeding inputs:\n{:}".format(active_runner.name, util.indent_block(feed_dict))) outputs = active_runner.infer(feed_dict=feed_dict) runtime = active_runner.last_inference_time() total_runtime += runtime # Without a deep copy here, outputs will always reference the output of the last run iteration_results.append(IterationResult(outputs=copy.deepcopy(outputs), runtime=runtime, runner_name=active_runner.name)) G_LOGGER.info(lambda: "{:35}\n---- Model Output(s) ----\n{:}".format( active_runner.name, TensorMetadata().from_feed_dict(outputs)), mode=LogMode.ONCE) G_LOGGER.extra_verbose(lambda: "{:35} | Inference Time: {:.3f} ms | Received outputs:\n{:}".format( active_runner.name, runtime * 1000.0, util.indent_block(outputs))) total_runtime_ms = total_runtime * 1000.0 G_LOGGER.finish("{:35} | Completed {:} iteration(s) in {:.4g} ms | Average inference time: {:.4g} ms.".format(active_runner.name, index + 1, total_runtime_ms, total_runtime_ms / float(index + 1))) return iteration_results
def validate_output(runner_name, output_name, output): G_LOGGER.start("{:35} | Validating output: {:} (check_inf={:}, check_nan={:})".format( runner_name, output_name, check_inf, check_nan)) with G_LOGGER.indent(): comp_util.log_output_stats(output) output_valid = True if check_nan: output_valid &= is_not_nan(output) if check_inf: output_valid &= is_finite(output) if output_valid: G_LOGGER.finish("PASSED | Output: {:} is valid".format(output_name)) else: G_LOGGER.error("FAILED | Errors detected in output: {:}".format(output_name)) return output_valid
def setup(self, args, network): self.precision = { "fp32": trt.float32, "fp16": trt.float16 }[args.precision] if self.precision == trt.float16 and not self.arg_groups[ TrtConfigArgs].fp16: G_LOGGER.exit( "Cannot mark layers to run in fp16 if it is not enabled in the builder configuration.\n" "Please also specify `--fp16` as a command-line option") if self.precision == trt.float16 and not self.arg_groups[ TrtConfigArgs].int8: G_LOGGER.warning( "Using fp16 as the higher precision, but fp16 is also the lowest precision available. " "Did you mean to set --int8 as well?") if not any([ self.arg_groups[TrtConfigArgs].tf32, self.arg_groups[TrtConfigArgs].fp16, self.arg_groups[TrtConfigArgs].int8 ]): G_LOGGER.exit( "Please enable at least one precision besides fp32 (e.g. --int8, --fp16, --tf32)" ) if self.arg_groups[ModelArgs].model_type == "engine": G_LOGGER.exit( "The precision tool cannot work with engines, as they cannot be modified. " "Please provide a different format, such as an ONNX or TensorFlow model." ) G_LOGGER.start("Using {:} as higher precision".format(self.precision)) if args.mode == "linear": self.layer_marker = LinearMarker(len(network), args.direction) elif args.mode == "bisect": self.layer_marker = BisectMarker(len(network), args.direction)