def partition_and_infer(subgraph): def get_out_node_ids(): # Gets the final output nodes - producer nodes of graph output tensors without other outputs. with subgraph.node_ids(): out_node_ids = set() for out in subgraph.outputs: if not out.outputs and not isinstance(out, Constant): for n_inp in out.inputs: out_node_ids.add(n_inp.id) return out_node_ids # Compute each output node in a separate subgraph. out_node_ids = get_out_node_ids() constant_values = {} for index in out_node_ids: # Have to use index since 'node' is not in part part = subgraph.copy() out_node = part.nodes[index] part.outputs = out_node.outputs part.name = "Folding: {:}".format( [out.name for out in part.outputs]) part.cleanup(remove_unused_graph_inputs=True) names = [out.name for out in part.outputs] try: # Determining types is not trivial, and ONNX-RT does its own type inference. sess = rt.InferenceSession( export_onnx(part, do_type_check=False).SerializeToString()) values = sess.run(names, {}) except Exception as err: G_LOGGER.warning( "Inference failed for subgraph: {:}. Note: Error was:\n{:}" .format(part.name, err)) if partitioning == "recursive": G_LOGGER.verbose( "Attempting to recursively partition subgraph") # Partition failed, peel off last node. # We only need to remove one node, so avoid doing an expensive call to cleanup() part.outputs = out_node.inputs del part.nodes[part.nodes.index(out_node)] out_node.outputs.clear() out_node.inputs.clear() else: G_LOGGER.info( "You may see better results if you set partitioning='recursive'" ) if not error_ok: raise err constant_values.update(partition_and_infer(part)) else: constant_values.update( {name: val for name, val in zip(names, values)}) return constant_values
def register_func(func): if hasattr(Graph, func.__name__): G_LOGGER.warning("Registered function: {:} is hidden by a Graph attribute or function with the same name. " "This function will never be called!".format(func.__name__)) # Default behavior is to register functions for all opsets. if opsets is None: Graph.GLOBAL_FUNC_MAP[func.__name__] = func else: for opset in opsets: Graph.OPSET_FUNC_MAP[opset][func.__name__] = func return func
def add_to_tensor_map(tensor): if not tensor.is_empty(): if tensor.name in tensor_map and not (tensor_map[tensor.name] is tensor): msg = "Found distinct tensors that share the same name:\n[id: {:}] {:}\n[id: {:}] {:}\n".format( id(tensor_map[tensor.name]), tensor_map[tensor.name], id(tensor), tensor, ) msg += ( "Note: Producer node(s) of first tensor:\n{:}\nProducer node(s) of second tensor:\n{:}" .format( tensor_map[tensor.name].inputs, tensor.inputs, )) if check_duplicates: G_LOGGER.critical(msg) G_LOGGER.warning(msg) tensor_map[tensor.name] = tensor
def fold_constants(self, fold_shapes=True, recurse_subgraphs=True, partitioning=None, error_ok=True): """ Folds constants in-place in the graph. The graph must be topologically sorted prior to calling this function (see `toposort()`). This function will not remove constants after folding them. In order to get rid of these hanging nodes, you can run the `cleanup()` function. *Note: Due to how this function is implemented, the graph must be exportable to ONNX, and evaluable in ONNX-Runtime. Additionally, ONNX-Runtime must be installed.* Args: fold_shapes (bool): Whether to fold `Shape` nodes in the graph. This requires shapes to be inferred in the graph, and can only fold static shapes. Defaults to True. recurse_subgraphs (bool): Whether to recursively fold constants in subgraphs. Defaults to True. partitioning (Union[str, None]): Whether/How to partition the graph so that errors in folding one part of a model do not affect other parts. Available modes are: - None: Do not partition the graph. If inference fails, no constants are folded. - "basic": Partition the graph. If inference fails in one partition, other partitions will remain unaffected. - "recursive": Parition the graph recursively. If inference fails in a partition, the partition will be further paritioned. Defaults to None. error_ok (bool): Whether inference errors should be suppressed. When this is enabled, any errors encountered during inference will be re-raised. Defaults to True. Returns: self """ import onnxruntime as rt from onnx_graphsurgeon.exporters.onnx_exporter import export_onnx PARTITIONING_MODES = [None, "basic", "recursive"] if partitioning not in PARTITIONING_MODES: G_LOGGER.critical( "Argument for parameter 'partitioning' must be one of: {:}". format(PARTITIONING_MODES)) G_LOGGER.debug("Folding constants in {:}".format(self.name)) graph_clone = self.copy() clone_tensors = graph_clone.tensors() # We find graph constants in two passes: # Pass 1 finds all Constant tensors in the graph, then walks over their outputs. # Pass 2 searches for Shape nodes that have variable inputs (i.e. not marked const in pass 1) # and turns them into Constants iff the input has a statically known shape. def update_foldable_outputs(graph_constants): def is_foldable(node): def all_tensors_const(tensors): return all([t.name in graph_constants for t in tensors]) if not all_tensors_const(node.inputs): return False all_subgraph_foreign_tensors_const = True for attr in node.attrs.values(): if isinstance(attr, Graph): foreign_tensors = attr._foreign_tensors().values() all_subgraph_foreign_tensors_const &= all_tensors_const( foreign_tensors) return all_subgraph_foreign_tensors_const # Walks along the outputs of graph_constants to see if they can also be computed statically. # Since the graph is topologically sorted, this should find all constant nodes in the graph. for node in graph_clone.nodes: if is_foldable(node): graph_constants.update( {out.name: out for out in node.outputs}) return graph_constants # Pass 1: Non-shape Constant Folding graph_constants = { name: tensor for name, tensor in clone_tensors.items() if isinstance(tensor, Constant) } # Replaces outputs of Constant nodes with constant tensors for tensor in clone_tensors.values(): if len(tensor.inputs) == 1: node = tensor.inputs[0] if node.op == "Constant": graph_constants[tensor.name] = tensor.to_constant( node.attrs["value"]._values ) # Using ._values avoids copying graph_constants[tensor.name].inputs.clear() graph_constants = update_foldable_outputs(graph_constants) # Pass 2: Shape Folding def get_producer(tensor, op): """ Get the producer of the specified tensor iff it matches op """ if len(tensor.inputs) != 1: return None node = tensor.inputs[0] if node.op != op: return None return node def get_input(node, index=0): """ Get the input tensor of a node iff the input tensor is not already marked a graph constant. """ if node is None: return None inp = node.inputs[index] # If the input was already found to be a constant, it will be folded anyway. if inp.name in graph_constants: return None return inp def handle_shape(tensor): inp = get_input(get_producer(tensor, "Shape")) if inp is None: return None if inp.shape is None or misc.is_dynamic_shape(inp.shape): return None return np.array(inp.shape, dtype=np.int64) def handle_shape_gather(tensor): gather = get_producer(tensor, "Gather") if gather is None: return None data = gather.inputs[0] indices_tensor = gather.inputs[1] inp = get_input(get_producer(data, "Shape")) if inp is None or inp.shape is None: return None if not isinstance(indices_tensor, Constant): return None indices = indices_tensor.values if not indices.shape: # Scalar-case shape = inp.shape[int(indices)] if misc.is_dynamic_dimension(shape): return None else: shape = [inp.shape[index] for index in indices] if misc.is_dynamic_shape(shape): return None return np.array(shape, dtype=np.int64) # Finds the static shape of a shape node output if possible, otherwise returns None. def lower_shape(tensor): SHAPE_FOLD_FUNCS = [handle_shape, handle_shape_gather] for fold_func in SHAPE_FOLD_FUNCS: shape = fold_func(tensor) if shape is not None: return shape if fold_shapes: for tensor in clone_tensors.values(): shape_of = lower_shape(tensor) if shape_of is not None: G_LOGGER.ultra_verbose( "Folding shape tensor: {:} to: {:}".format( tensor.name, shape_of)) graph_constants[tensor.name] = tensor.to_constant(shape_of) graph_constants[tensor.name].inputs.clear() graph_constants = update_foldable_outputs(graph_constants) def partition_and_infer(subgraph): def get_out_node_ids(): # Gets the final output nodes - producer nodes of graph output tensors without other outputs. with subgraph.node_ids(): out_node_ids = set() for out in subgraph.outputs: if not out.outputs and not isinstance(out, Constant): for n_inp in out.inputs: out_node_ids.add(n_inp.id) return out_node_ids # Compute each output node in a separate subgraph. out_node_ids = get_out_node_ids() constant_values = {} for index in out_node_ids: # Have to use index since 'node' is not in part part = subgraph.copy() out_node = part.nodes[index] part.outputs = out_node.outputs part.name = "Folding: {:}".format( [out.name for out in part.outputs]) part.cleanup(remove_unused_graph_inputs=True) names = [out.name for out in part.outputs] try: # Determining types is not trivial, and ONNX-RT does its own type inference. sess = rt.InferenceSession( export_onnx(part, do_type_check=False).SerializeToString()) values = sess.run(names, {}) except Exception as err: G_LOGGER.warning( "Inference failed for subgraph: {:}. Note: Error was:\n{:}" .format(part.name, err)) if partitioning == "recursive": G_LOGGER.verbose( "Attempting to recursively partition subgraph") # Partition failed, peel off last node. # We only need to remove one node, so avoid doing an expensive call to cleanup() part.outputs = out_node.inputs del part.nodes[part.nodes.index(out_node)] out_node.outputs.clear() out_node.inputs.clear() else: G_LOGGER.info( "You may see better results if you set partitioning='recursive'" ) if not error_ok: raise err constant_values.update(partition_and_infer(part)) else: constant_values.update( {name: val for name, val in zip(names, values)}) return constant_values # Next, evaluate the foldable variables with ONNX-Runtime graph_clone.outputs = [ t for t in graph_constants.values() if not isinstance(t, Constant) ] graph_clone.cleanup(remove_unused_graph_inputs=True) # Using ._values avoids a deep copy of the values. constant_values = { name: tensor._values for name, tensor in graph_constants.items() if isinstance(tensor, Constant) } if graph_clone.outputs: if partitioning: constant_values.update(partition_and_infer(graph_clone)) else: names = [t.name for t in graph_clone.outputs] try: sess = rt.InferenceSession( export_onnx(graph_clone, do_type_check=False).SerializeToString()) values = sess.run(names, {}) constant_values.update( {name: val for name, val in zip(names, values)}) except Exception as err: G_LOGGER.warning( "Inference failed. You may want to try enabling partitioning to see better results. " "Note: Error was:\n{:}".format(err)) G_LOGGER.verbose( "Note: Graph was:\n{:}".format(graph_clone)) if not error_ok: raise elif not constant_values: G_LOGGER.info( "Could not find any nodes in this graph ({:}) that can be folded. " "This could mean that constant folding has already been run on this graph. " "Skipping.".format(self.name)) # Finally, replace the Variables in the original graph with constants. if constant_values: graph_tensors = self.tensors() for name, values in constant_values.items(): tensor = graph_tensors[name] if not isinstance(tensor, Constant): tensor.to_constant(values) tensor.inputs.clear() # Constants do not need inputs # Folding subgraphs after the outer graph can lead to better folding. def fold_subgraphs(): for node in self.nodes: for attr in node.attrs.values(): if isinstance(attr, Graph): attr.fold_constants(fold_shapes=fold_shapes, partitioning=partitioning) if recurse_subgraphs: fold_subgraphs() return self
def fold_constants(self, fold_shapes=True, recurse_subgraphs=True, partitioning=None, error_ok=True): """ Folds constants in-place in the graph. The graph must be topologically sorted prior to calling this function (see `toposort()`). This function will not remove constants after folding them. In order to get rid of these hanging nodes, you can run the `cleanup()` function. *Note: Due to how this function is implemented, the graph must be exportable to ONNX, and evaluable in ONNX-Runtime. Additionally, ONNX-Runtime must be installed.* Args: fold_shapes (bool): Whether to fold `Shape` nodes in the graph. This requires shapes to be inferred in the graph, and can only fold static shapes. Defaults to True. recurse_subgraphs (bool): Whether to recursively fold constants in subgraphs. Defaults to True. partitioning (Union[str, None]): Whether/How to partition the graph so that errors in folding one part of a model do not affect other parts. Available modes are: - None: Do not partition the graph. If inference fails, no constants are folded. - "basic": Partition the graph. If inference fails in one partition, other partitions will remain unaffected. - "recursive": Parition the graph recursively. If inference fails in a partition, the partition will be further paritioned. Defaults to None. error_ok (bool): Whether inference errors should be suppressed. When this is enabled, any errors encountered during inference will be re-raised. Defaults to True. Returns: self """ import onnxruntime as rt from onnx_graphsurgeon.exporters.onnx_exporter import export_onnx PARTITIONING_MODES = [None, "basic", "recursive"] if partitioning not in PARTITIONING_MODES: G_LOGGER.critical( "Argument for parameter 'partitioning' must be one of: {:}". format(PARTITIONING_MODES)) # First perform shape tensor cast elision on the graph prior to other constant folding # Search for Cast(s) (from int -> float) -> intermediate operator (with float constants) -> Cast(s) (back to int) # This pattern is problematic for TensorRT since these operations may be performed on Shape Tensors, which # are not allowed to be floating point type. Attempt to fold the pattern here VALID_CAST_ELISION_OPS = [ "Add", "Sub", "Mul", "Div", "Max", "Min", "Equal", "Greater", "Less", "Concat" ] def run_cast_elision(node): import onnx if node.op not in VALID_CAST_ELISION_OPS: return # Get list of input nodes inp_casts = [ inp_node for inp_tensor in node.inputs for inp_node in inp_tensor.inputs if inp_node.op == "Cast" and inp_node.attrs["to"] == 1 ] # No cast nodes found, return early if not inp_casts: return # Ensure that all input cast nodes are casting from the same type final_type = None for inp in inp_casts: curr_type = onnx.mapping.NP_TYPE_TO_TENSOR_TYPE[ inp.inputs[0].dtype] final_type = final_type or curr_type if final_type != curr_type: return # Check validity and get list of output nodes out_casts = [] for out_tensor in node.outputs: for out_node in out_tensor.outputs: if out_node.op != "Cast" or out_node.attrs["to"] not in [ 6, 7 ]: # Can exit early if any of the output nodes are not valid casts return out_casts.append(out_node) # Check that all final cast types are the same. curr_type = out_node.attrs["to"] if final_type != curr_type: return # If all checks passed - update constant values. for inp in node.inputs: if isinstance(inp, Constant): inp.values = inp.values.astype( onnx.mapping.TENSOR_TYPE_TO_NP_TYPE[final_type]) # "Remove" casts nodes by changing I/O node operators to Identity. Update corresponding tensor dtypes as well def replace_with_identity(cast_node, change_dtype): cast_node.op = "Identity" cast_node.attrs = {} getattr( cast_node, change_dtype )[0].dtype = onnx.mapping.TENSOR_TYPE_TO_NP_TYPE[final_type] G_LOGGER.debug("Cast node {:} elided".format(cast_node.name)) for inp in inp_casts: replace_with_identity(inp, change_dtype="outputs") for out in out_casts: replace_with_identity(out, change_dtype="inputs") # Perform shape tensor cast elision: if fold_shapes: G_LOGGER.debug( "Performing shape tensor cast elision in {:}".format( self.name)) try: for node in self.nodes: run_cast_elision(node) except Exception as err: if not error_ok: raise err G_LOGGER.warning("'{:}' routine failed with: {:}".format( "Shape tensor cast elision", err)) G_LOGGER.debug("Folding constants in {:}".format(self.name)) graph_clone = self.copy() clone_tensors = graph_clone.tensors() # We find graph constants in two passes: # Pass 1 finds all Constant tensors in the graph, then walks over their outputs. # Pass 2 searches for Shape nodes that have variable inputs (i.e. not marked const in pass 1) # and turns them into Constants iff the input has a statically known shape. def update_foldable_outputs(graph_constants): def is_foldable(node): def all_tensors_const(tensors): return all([t.name in graph_constants for t in tensors]) if not all_tensors_const(node.inputs): return False all_subgraph_foreign_tensors_const = True for attr in node.attrs.values(): if isinstance(attr, Graph): foreign_tensors = attr._foreign_tensors().values() all_subgraph_foreign_tensors_const &= all_tensors_const( foreign_tensors) return all_subgraph_foreign_tensors_const # Walks along the outputs of graph_constants to see if they can also be computed statically. # Since the graph is topologically sorted, this should find all constant nodes in the graph. for node in graph_clone.nodes: if is_foldable(node): graph_constants.update( {out.name: out for out in node.outputs}) return graph_constants # Pass 1: Non-shape Constant Folding graph_constants = { name: tensor for name, tensor in clone_tensors.items() if isinstance(tensor, Constant) } # Replaces outputs of Constant nodes with constant tensors for tensor in clone_tensors.values(): if len(tensor.inputs) == 1: node = tensor.inputs[0] if node.op == "Constant": graph_constants[tensor.name] = tensor.to_constant( node.attrs["value"]._values ) # Using ._values avoids copying graph_constants[tensor.name].inputs.clear() graph_constants = update_foldable_outputs(graph_constants) # Pass 2: Shape Folding def get_producer(tensor, op): """ Get the producer of the specified tensor iff it matches op """ if len(tensor.inputs) != 1: return None node = tensor.inputs[0] if node.op != op: return None return node def get_input(node, index=0): """ Get the input tensor of a node iff the input tensor is not already marked a graph constant. """ if node is None: return None inp = node.inputs[index] # If the input was already found to be a constant, it will be folded anyway. if inp.name in graph_constants: return None return inp def get_scalar_value(tensor): """ Gets the scalar value of a tensor with a single item """ if not tensor.shape: return tensor.values else: return list(tensor.values)[0] def fold_shape(tensor): inp = get_input(get_producer(tensor, "Shape")) if inp is None: return None if inp.shape is None or misc.is_dynamic_shape(inp.shape): return None return np.array(inp.shape, dtype=np.int64) def fold_shape_gather(tensor): gather = get_producer(tensor, "Gather") if gather is None: return None data = gather.inputs[0] indices_tensor = gather.inputs[1] inp = get_input(get_producer(data, "Shape")) if inp is None or inp.shape is None: return None if not isinstance(indices_tensor, Constant): return None indices = indices_tensor.values if not indices.shape: # Scalar-case shape = inp.shape[int(indices)] if misc.is_dynamic_dimension(shape): return None else: shape = [inp.shape[index] for index in indices] if misc.is_dynamic_shape(shape): return None return np.array(shape, dtype=np.int64) def fold_shape_slice(tensor): slice = get_producer(tensor, "Slice") if slice is None: return None data = slice.inputs[0] if len(slice.inputs) >= 3: starts, ends = slice.inputs[1:3] if any(not isinstance(t, Constant) for t in [starts, ends]): return None starts, ends = get_scalar_value(starts), get_scalar_value(ends) elif "starts" in slice.attrs and "ends" in slice.attrs: starts, ends = slice.attrs["starts"][0], slice.attrs["ends"][0] else: return None inp = get_input(get_producer(data, "Shape")) if inp is None or inp.shape is None: return None # For shape tensors, we can only slice on the 0th dimension. if len(slice.inputs) > 3: axes = slice.inputs[3] if not isinstance(axes, Constant): return None if get_scalar_value(axes) != 0: return None elif "axes" in slice.attrs: if slice.attrs["axes"][0] != 0: return None steps = 1 if len(slice.inputs) > 4: steps = slice.inputs[4] if not isinstance(steps, Constant): return None steps = get_scalar_value(steps) elif "steps" in slice.attrs: steps = slice.attrs["steps"][0] shape = inp.shape[starts:ends:steps] if misc.is_dynamic_shape(shape): return None return np.array(shape, dtype=np.int64) if fold_shapes: # NOTE: The order of shape folding passes is important to maximize how much we fold (phase-ordering problem). SHAPE_FOLD_FUNCS = [ fold_shape_gather, fold_shape_slice, fold_shape ] for shape_fold_func in SHAPE_FOLD_FUNCS: try: for tensor in clone_tensors.values(): shape_of = shape_fold_func(tensor) if shape_of is not None: G_LOGGER.ultra_verbose( "Folding shape tensor: {:} to: {:}".format( tensor.name, shape_of)) graph_constants[tensor.name] = tensor.to_constant( shape_of) graph_constants[tensor.name].inputs.clear() except Exception as err: if not error_ok: raise err G_LOGGER.warning("'{:}' routine failed with:\n{:}".format( shape_fold_func.__name__, err)) else: graph_constants = update_foldable_outputs(graph_constants) def partition_and_infer(subgraph): def get_out_node_ids(): # Gets the final output nodes - producer nodes of graph output tensors without other outputs. with subgraph.node_ids(): out_node_ids = set() for out in subgraph.outputs: if not out.outputs and not isinstance(out, Constant): for n_inp in out.inputs: out_node_ids.add(n_inp.id) return out_node_ids # Compute each output node in a separate subgraph. out_node_ids = get_out_node_ids() constant_values = {} for index in out_node_ids: # Have to use index since 'node' is not in part part = subgraph.copy() out_node = part.nodes[index] part.outputs = out_node.outputs part.name = "Folding: {:}".format( [out.name for out in part.outputs]) part.cleanup(remove_unused_graph_inputs=True) names = [out.name for out in part.outputs] try: # Determining types is not trivial, and ONNX-RT does its own type inference. sess = rt.InferenceSession( export_onnx(part, do_type_check=False).SerializeToString()) values = sess.run(names, {}) except Exception as err: G_LOGGER.warning( "Inference failed for subgraph: {:}. Note: Error was:\n{:}" .format(part.name, err)) if partitioning == "recursive": G_LOGGER.verbose( "Attempting to recursively partition subgraph") # Partition failed, peel off last node. # We only need to remove one node, so avoid doing an expensive call to cleanup() part.outputs = out_node.inputs del part.nodes[part.nodes.index(out_node)] out_node.outputs.clear() out_node.inputs.clear() else: G_LOGGER.info( "You may see better results if you set partitioning='recursive'" ) if not error_ok: raise err constant_values.update(partition_and_infer(part)) else: constant_values.update( {name: val for name, val in zip(names, values)}) return constant_values # Next, evaluate the foldable variables with ONNX-Runtime # Only evaluate foldable values that have non-foldable outputs or are graph outputs. # Otherwise, if all the outputs are foldable, then we can just evaluate the outputs directly. def should_eval_foldable(tensor): non_const = not isinstance(tensor, Constant) is_graph_output = not tensor.outputs has_non_foldable_outputs = any(out.name not in graph_constants for out in tensor.outputs) return non_const and (is_graph_output or has_non_foldable_outputs) graph_clone.outputs = [ t for t in graph_constants.values() if should_eval_foldable(t) ] G_LOGGER.debug("Folding tensors: {:}".format(graph_clone.outputs)) graph_clone.cleanup(remove_unused_graph_inputs=True) # Using ._values avoids a deep copy of the values. constant_values = { name: tensor._values for name, tensor in graph_constants.items() if isinstance(tensor, Constant) } if graph_clone.outputs: if partitioning: constant_values.update(partition_and_infer(graph_clone)) else: names = [t.name for t in graph_clone.outputs] try: sess = rt.InferenceSession( export_onnx(graph_clone, do_type_check=False).SerializeToString()) values = sess.run(names, {}) constant_values.update( {name: val for name, val in zip(names, values)}) except Exception as err: G_LOGGER.warning( "Inference failed. You may want to try enabling partitioning to see better results. " "Note: Error was:\n{:}".format(err)) G_LOGGER.verbose( "Note: Graph was:\n{:}".format(graph_clone)) if not error_ok: raise elif not constant_values: G_LOGGER.info( "Could not find any nodes in this graph ({:}) that can be folded. " "This could mean that constant folding has already been run on this graph. " "Skipping.".format(self.name)) # Finally, replace the Variables in the original graph with constants. if constant_values: graph_tensors = self.tensors() for name, values in constant_values.items(): tensor = graph_tensors[name] if not isinstance(tensor, Constant): tensor.to_constant(values) tensor.inputs.clear() # Constants do not need inputs # Folding subgraphs after the outer graph can lead to better folding. def fold_subgraphs(): for node in self.nodes: for attr in node.attrs.values(): if isinstance(attr, Graph): attr.fold_constants(fold_shapes=fold_shapes, partitioning=partitioning) if recurse_subgraphs: fold_subgraphs() return self