def update_meta_from_meta(meta, golden_meta): for name, (dtype, shape) in meta.items(): if name in golden_meta: (golden_dtype, golden_shape) = golden_meta[name] meta[name] = (dtype or golden_dtype, shape or golden_shape) G_LOGGER.verbose( "Updated tensor: {:} metadata to: {:}".format( name, meta[name])) return meta
def log_output_stats(output, info_hist=False, runner_name=None, hist_range=None): ret = str_output_stats(output, runner_name) G_LOGGER.info(ret) with G_LOGGER.indent(): # Show histogram on failures. G_LOGGER.log(lambda: str_histogram(output, hist_range), severity=G_LOGGER.INFO if info_hist else G_LOGGER.VERBOSE)
def add_tf_loader(script, args, disable_outputs=None, suffix=None): if disable_outputs: outputs = None else: outputs = _get_outputs_arg(script, args, "tf_outputs") model_file = args_util.get(args, "model_file") model_type = args_util.get(args, "model_type") save_pb = args_util.get(args, "save_pb") save_tensorboard = args_util.get(args, "save_tensorboard") if model_type == "ckpt": G_LOGGER.verbose("Loading a TensorFlow checkpoint. Please ensure you are not using the --use-subprocess flag".format(model_file), mode=LogMode.ONCE) script.add_import(imports=["GraphFromCkpt"], frm="polygraphy.backend.tf") loader_id = "load_ckpt" loader_str = Script.invoke("GraphFromCkpt", model_file, args_util.get(args, "ckpt")) elif model_type == "keras": script.add_import(imports=["GraphFromKeras"], frm="polygraphy.backend.tf") loader_id = "load_keras" loader_str = Script.invoke("GraphFromKeras", model_file) else: script.add_import(imports=["GraphFromFrozen"], frm="polygraphy.backend.tf") G_LOGGER.verbose("Attempting to load as a frozen graph. If this is not correct, please specify --model-type", mode=LogMode.ONCE) loader_id = "load_frozen" loader_str = Script.invoke("GraphFromFrozen", model_file) loader_name = script.add_loader(loader_str, loader_id, suffix=suffix) if args_util.get(args, "freeze_graph"): script.add_import(imports=["OptimizeGraph"], frm="polygraphy.backend.tf") loader_name = script.add_loader(Script.invoke("OptimizeGraph", loader_name), "optimize_graph", suffix=suffix) if args_util.get(args, "tftrt"): script.add_import(imports=["UseTfTrt"], frm="polygraphy.backend.tf") loader_str = Script.invoke("UseTfTrt", loader_name, max_workspace_size=args_util.get(args, "workspace"), fp16=args_util.get(args, "fp16"), int8=args_util.get(args, "int8"), max_batch_size=args_util.get(args, "batch_size"), is_dynamic_op=args_util.get(args, "dynamic_op"), minimum_segment_size=args_util.get(args, "minimum_segment_size")) loader_name = script.add_loader(loader_str, "use_tftrt", suffix=suffix) MODIFY_TF = "ModifyGraph" modify_tf_str = Script.invoke(MODIFY_TF, loader_name, outputs=outputs) if modify_tf_str != Script.invoke(MODIFY_TF, loader_name): script.add_import(imports=[MODIFY_TF], frm="polygraphy.backend.tf") loader_name = script.add_loader(modify_tf_str, "modify_tf") engine_dir = None if args_util.get(args, "tftrt"): engine_dir = args_util.get(args, "save_engine") WRITE_TF = "SaveGraph" write_tf_str = Script.invoke(WRITE_TF, loader_name, path=save_pb, tensorboard_dir=save_tensorboard, engine_dir=engine_dir) if write_tf_str != Script.invoke(WRITE_TF, loader_name): script.add_import(imports=[WRITE_TF], frm="polygraphy.backend.tf") loader_name = script.add_loader(write_tf_str, "save_tf") return loader_name
def select_layers(self): if self.direction == "forward": G_LOGGER.info( "Selecting first {:} layer(s) to run in higher precision". format(self.num_layers)) return range(0, self.num_layers) else: G_LOGGER.info( "Selecting last {:} layer(s) to run in higher precision". format(self.num_layers)) return range(self.max_layers - self.num_layers, self.max_layers)
def ensure_safe(inp): """ Ensures that the input is marked as a safe string (i.e. Script.String(safe=True)). """ if config.INTERNAL_CORRECTNESS_CHECKS: if not isinstance(inp, Script.String): G_LOGGER.internal_error("Input to ensure_safe must be of type Script.String, but was: {:}".format(inp)) elif not inp.safe: G_LOGGER.internal_error("Input string: {:} was not checked for safety. " "This is a potential security risk!".format(inp)) return inp
def stop(self, index, success): if success: self.success_message() return True if index >= (self.max_layers - 1): G_LOGGER.error( "Could not find a configuration that satisfied accuracy requirements." ) return True return False
def __getitem__(self, key): if isinstance(key, int): return self.lst[key] for name, iteration_results in self.lst: if name == key: return iteration_results G_LOGGER.critical( "{:35} does not exist in this RunResults instance. Note: Available runners: {:}" .format(key, list(self.keys())))
def check_meta(name, dtype, shape, meta_type, needs_shape=True): if not self.check_meta: return if needs_shape and shape is None: G_LOGGER.warning( "{:} metadata should include shape, but no shape was " "provided for tensor: {:}".format(meta_type, name)) if dtype is None: G_LOGGER.warning( "{:} metadata should include data type, but no data type was " "provided for tensor: {:}".format(meta_type, name))
def run(self, args): _, graph = super().import_graph(args) TENSOR_MAP = graph.tensors() def get_tensor(name): if name not in TENSOR_MAP: G_LOGGER.critical( "Tensor: {:} does not exist in the model.".format(name)) return TENSOR_MAP[name] # We populate outputs first because we may need to update output nodes from the # input tensors if output == input. output_tensors = [] for name in args.outputs: if name in args.inputs: tensor = gs.Variable( name="{:}_polygraphy_surgeon_insert_output".format(name)) # Bind outputs to outputs of original inputs. # This construct is required to preserve ordering of the input tensors in the output nodes. for out in get_tensor(name).outputs: for index, inp in enumerate(out.inputs): if inp.name == name: out.inputs[index] = tensor G_LOGGER.verbose( "Generating new tensor for output: {:}".format(tensor)) else: tensor = get_tensor(name) tensor.inputs.clear() output_tensors.append(tensor) if not tensor.outputs: for index, out in enumerate(graph.outputs): if out.name == name: graph.outputs[index] = tensor input_tensors = [] for name in args.inputs: tensor = get_tensor(name) tensor.outputs.clear() input_tensors.append(tensor) new_node = gs.Node(op=args.op, name=args.name, inputs=input_tensors, outputs=output_tensors) G_LOGGER.verbose("Generated new node: {:}".format(new_node)) graph.nodes.append(new_node) # Since new graph outputs may be added, and we don't know the types, we skip type checks in ONNX-GraphSurgeon. super().export_graph(graph, args, do_type_check=False)
def assert_identifier(inp): """ Checks if the argument can be a valid Python identifier. Raises a PolygraphyException if it can't. """ if not inp.isidentifier(): G_LOGGER.critical( "This argument must be a valid identifier. " "Provided argument cannot be a Python identifier: {:}".format(inp)) return inp
def write_calibration_cache(self, cache): self.cache_contents = cache.tobytes() self.has_cached_scales = True if self._cache is None: return try: util.save_file(contents=self.cache_contents, dest=self._cache, description="calibration cache") except: G_LOGGER.warning("Could not write to calibration cache: {:}".format(self._cache))
def pop_meta(name): nonlocal tensor_meta_arg tensor_meta_arg, _, val = tensor_meta_arg.rpartition(SEP) if not tensor_meta_arg: G_LOGGER.critical( "Could not parse {:} from argument: {:}. Is it separated by a comma " "(,) from the tensor name?".format(name, orig_tensor_meta_arg)) if val.lower() == "auto": val = None return val
def fill_defaults(self, network, default_shape_value=None): """ Fill this profile with sane default values for any bindings whose shapes have not been set explicitly. Args: network (trt.INetworkDefinition): The TensorRT network this profile is meant for. This will be used to determine model inputs and their shapes. default_shape_value (int): The value to use to override dynamic dimensions. Returns: Profile: Self """ default_shape_value = util.default(default_shape_value, constants.DEFAULT_SHAPE_VALUE) for idx in range(network.num_inputs): inp = network.get_input(idx) if inp.name in self: continue with G_LOGGER.verbosity( G_LOGGER.CRITICAL): # WAR for spam from TRT is_shape_tensor = inp.is_shape_tensor if is_shape_tensor: rank = inp.shape[0] shape = (default_shape_value, ) * rank G_LOGGER.warning( "{:} | No values provided; Will use input values: {:} for min/opt/max in profile.\n" .format(trt_util.str_from_tensor(inp, is_shape_tensor), shape, rank), mode=LogMode.ONCE, ) G_LOGGER.warning( "This will cause the shape-tensor to have static values. If this is incorrect, please " "set the range of values for this input shape-tensor.", mode=LogMode.ONCE, ) else: shape = util.override_dynamic_shape(inp.shape, default_shape_value) if shape != inp.shape: G_LOGGER.warning( "{:} | No shapes provided; Will use shape: {:} for min/opt/max in profile.\n" .format(trt_util.str_from_tensor(inp, is_shape_tensor), shape), mode=LogMode.ONCE, ) G_LOGGER.warning( "This will cause the tensor to have a static shape. If this is incorrect, please " "set the range of shapes for this input tensor.", mode=LogMode.ONCE, ) self.add(inp.name, shape, shape, shape) return self
def try_add(layer_type, layer_cls): try: layer_type = getattr(trt.LayerType, layer_type) layer_cls = getattr(trt, layer_cls) except AttributeError: if config.INTERNAL_CORRECTNESS_CHECKS: G_LOGGER.warning( "Could not find one or more of layer type: {:} or layer class: {:}" .format(layer_type, layer_cls)) else: layer_class_mapping[layer_type] = layer_cls
def __iadd__(self, other): if not isinstance(other, Script.String): G_LOGGER.internal_error( "Cannot concatenate str and Script.String. Note: str was: {:}" .format(other)) elif self.safe != other.safe: G_LOGGER.internal_error( "Cannot concatenate unsafe string ({:}) to safe string ({:})!" .format(other, self.s)) self.s += other.s return self
def get_static_shape(name, shape): static_shape = shape if util.is_shape_dynamic(shape): static_shape = util.override_dynamic_shape(shape) if static_shape != shape and name not in self.user_input_metadata: if not util.is_valid_shape_override(static_shape, shape): G_LOGGER.critical("Input tensor: {:} | Cannot override original shape: {:} to {:}".format(name, shape, static_shape)) G_LOGGER.warning("Input tensor: {:} | Will generate data of shape: {:}.\n" "If this is incorrect, please set input_metadata " "or provide a custom data loader.".format(name, static_shape), mode=LogMode.ONCE) return static_shape
def display_results(results): results_str = "" results_str += "==== Run Results ({:} runners) ====\n\n".format( len(results)) for runner_name, iters in results.items(): results_str += "---- {:35} ({:} iterations) ----\n".format( runner_name, len(iters)) results_str += str_from_iters(iters) + "\n" results_str = util.indent_block(results_str, level=0).strip() G_LOGGER.info(results_str)
def __init__( self, network_loader=None, max_workspace_size=None, max_batch_size=None, fp16=None, tf32=None, load_engine=None, save_engine=None, layerwise=False, plugins=[], name=None, ): """ Creates a runner that manages a single TensorRT engine. network_loader (BaseModelLoader): A loader that returns a TRT builder, network, parser and input shapes. max_workspace_size (int): The maximum workspace size. max_batch_size (int): The maximum batch size. fp16 (bool): Whether to run in fp16 mode layerwise (bool): Whether to retrieve the outputs of every layer in the network. name (str): The human-readable name prefix to use for this runner. A runner count and timestamp will be appended to this prefix. """ G_LOGGER.warning( "TrtLegacyRunner is deprecated, and will be removed in a future release" ) # Load any user-supplied plugin libraries. This must happen before everything else, including engine deserialization. if plugins: import ctypes for plugin in plugins: path = os.path.abspath(plugin) G_LOGGER.info("Loading plugin library: {:}".format(path)) ctypes.CDLL(path) # Choose a unique name for this runner. super().__init__(name=name, prefix="trt-legacy-runner") # Save parameters for activate and deactivate. self.network_loader = network_loader self.max_workspace_size = util.default(max_workspace_size, 1 << 24) self.fp16 = util.default(fp16, False) self.tf32 = util.default(tf32, False) self.load_engine = load_engine self.engine_path = save_engine self.layerwise = layerwise self.max_batch_size = max_batch_size
def try_reshape(arr, shape): original_shape = arr.shape try: arr = arr.reshape(shape) except ValueError: G_LOGGER.warning( "Could not reshape array from shape: {:} to {:}. Skipping reshape.".format(arr.shape, shape) ) else: if arr.shape != original_shape: G_LOGGER.info("Reshaped array from shape: {:} to: {:}".format(original_shape, arr.shape)) return arr
def get_input_metadata(graph): input_tensors = [] input_nodes = find_nodes_by_ops(graph.as_graph_def(), ["Placeholder", "FIFOQueue"]) G_LOGGER.verbose("Found input tensors: {:}".format( ["{:}: {:}".format(n.name, n.op) for n in input_nodes])) for node in input_nodes: input_tensors.append(graph.get_tensor_by_name(node.name + ":0")) G_LOGGER.verbose( "Retrieved TensorFlow input_tensors: {:}".format(input_tensors)) return get_tensor_metadata(input_tensors)
def try_receive_on_queue(queue, timeout=None): try: obj = receive_on_queue(queue, timeout) if obj is None: G_LOGGER.warning("Received {:} on the queue. This likely means that there was an error in sending " "the object over the queue. You may want to run with use_subprocess=False in Comparator.run() " "or omit the --use-subprocess flag to prevent further issues.".format(obj)) return obj except Exception as err: G_LOGGER.warning("Could not receive on queue: {:}\nYou may want to run with use_subprocess=False in Comparator.run() " "or omit the --use-subprocess flag to prevent further issues.".format(err)) return None
def load_from_cache(): if self._cache is None or not util.get_file_size(self._cache): return None try: return util.load_file(self._cache, description="calibration cache") except Exception as err: G_LOGGER.error( "Could not read from calibration cache: {:}\nNote: Error was: {:}" .format(self._cache, err)) return None
def try_register_tool(module, tool_class): global TOOL_REGISTRY try: toolmod = importlib.import_module(module) ToolClass = getattr(toolmod, tool_class) TOOL_REGISTRY.append(ToolClass()) except Exception as err: G_LOGGER.internal_error( "Could not load command-line tool: {:}.\nNote: Error was: {:}". format(tool_class.lower(), err)) TOOL_REGISTRY.append(MissingTool(tool_class.lower(), err=err))
def to_trt(self, builder, network): """ Creates a TensorRT IOptimizationProfile based on the values set in this Profile. Args: builder (trt.Builder): A TensorRT builder. This will be used to construct the IOptimizationProfile. network (trt.INetworkDefinition): The TensorRT network the profile applies to. Returns: trt.IOptimizationProfile: A TensorRT optimization profile. """ trt_profile = builder.create_optimization_profile() unused_keys = set(self.keys()) available_inputs = set() for idx in range(network.num_inputs): inp = network.get_input(idx) if inp.name in unused_keys: unused_keys.remove(inp.name) available_inputs.add(inp.name) with G_LOGGER.verbosity(): # WAR for spam from TRT is_shape_tensor = inp.is_shape_tensor if is_shape_tensor: if inp.name in self: shapes = self[inp.name] trt_profile.set_shape_input(inp.name, shapes.min, shapes.opt, shapes.max) G_LOGGER.verbose( "{:} | Setting input shape-tensor value range to: {:}". format(trt_util.str_from_tensor(inp, is_shape_tensor), shapes)) else: G_LOGGER.warning( "{:} | No values provided. " "Assuming this is not a dynamic shape-tensor.".format( trt_util.str_from_tensor(inp, is_shape_tensor)), mode=LogMode.ONCE, ) else: shapes = self[inp.name] trt_profile.set_shape(inp.name, shapes.min, shapes.opt, shapes.max) G_LOGGER.verbose( "{:} | Setting input tensor shapes to: {:}".format( trt_util.str_from_tensor(inp, is_shape_tensor), shapes)) if unused_keys: G_LOGGER.error( "Invalid inputs were provided to the optimization profile: {:}\n" "Note: Inputs available in the TensorRT network are: {:}". format(unused_keys, available_inputs)) return trt_util.check_profile(trt_profile)
def find(self): def run(indices): self.mark_layers(indices) return self.check_network("-".join(map(str, indices))) # Finds num worst indices in acc_results def find_worst(num, acc_results): acc_mapping = list(acc_results.values())[0][ 0] # First iteration of first runner-pair. # Compute for each layer: atol / prev_atol, to determine which layers contribute the greatest error. # It is not enough to simply find the max(atol), because that doesn't account for error introduced # by previous layers. items = list(acc_mapping.items()) ratios = [] for (_, prev_tols), (outname, cur_tols) in zip(items[:-1], items[1:]): ratio = cur_tols.max_absdiff / prev_tols.max_absdiff ratios.append((ratio, outname)) # Mark more layers on each iteration ratios = sorted(ratios, reverse=True)[:num] G_LOGGER.verbose( "Found worst {:} layers (Format: (error ratio, tensor name)): {:}" .format(num, ratios)) return [output_mapping[outname] for (ratio, outname) in ratios] if not self.makers[TrtLoaderArgs].outputs: G_LOGGER.critical( "worst-first requires all outputs to be marked as network outputs mode to determine where errors are being introduced. " "Please enable --trt-outputs mark all, and ensure that your golden outputs also include layer-wise results" ) output_mapping = { } # Maps output tensor names to producer layer indices for layer_index, layer in enumerate(self.network): for out_index in range(layer.num_outputs): output_mapping[layer.get_output(out_index).name] = layer_index indices = [] acc_results = run(indices) max_outputs = len(list(acc_results.values())[0][0]) - 1 iter_num = 0 # indices will be at most one less than the number of layers, since we're comparing layers against subsequent ones. while not bool(acc_results) and len(indices) < max_outputs: iter_num += 1 indices = find_worst(self.args.top * iter_num, acc_results) acc_results = run(indices) if bool(acc_results): return indices
def run(self, args): if self.arg_groups[ModelArgs].model_file is None and args.runners: G_LOGGER.critical( "One or more runners was specified, but no model file was provided. Make sure you've specified the model path, " "and also that it's not being consumed as an argument for another parameter" ) script = self.build_script(args) if args.gen_script: script.save(args.gen_script) else: exec(str(script))
def call_impl(self): """ Returns: onnx.ModelProto: The ONNX model """ G_LOGGER.info("Loading model: {:}".format(self.path)) # If external_data_dir is not None, we'll load external data ourselves model = onnx.load(self.path, load_external_data=self.external_data_dir is None) if self.external_data_dir is not None: external_data_helper.load_external_data_for_model( model, self.external_data_dir) return model
def call_impl(self, *args, **kwargs): """ Returns: object: The provided ``obj`` argument, or its return value if it is callable. Returns ``None`` if ``obj`` was not set. """ for plugin in self.plugins: G_LOGGER.info("Loading plugin library: {:}".format(plugin)) ctypes.CDLL(plugin) ret, _ = util.invoke_if_callable(self.obj, *args, **kwargs) return ret
def last_inference_time(self): """ Returns the total inference time required during the last call to ``infer()``. Returns: float: The time in seconds, or None if runtime was not measured by the runner. """ if self.inference_time is None: G_LOGGER.warning("{:35} | inference_time was not set. Inference time will be incorrect!" "To correctly compare runtimes, please set the inference_time property in the" "infer() function".format(self.name), mode=LogMode.ONCE) return None return self.inference_time
def validate_meta(meta): for (fmt, dtype) in meta: if not isinstance(fmt, trt.TensorFormat): G_LOGGER.critical( "'format' must be an instance of trt.TensorFormat, but is: {:}.\n" "Note: Provided input/output metadata was: {:}".format(fmt, meta) ) if not isinstance(dtype, trt.DataType): G_LOGGER.critical( "'dtype' must be an instance of trt.DataType, but is: {:}.\n" "Note: Provided input/output metadata was: {:}".format(dtype, meta) ) return meta