Example #1
0
        def partition_and_infer(subgraph):
            def get_out_node_ids():
                # Gets the final output nodes - producer nodes of graph output tensors without other outputs.
                with subgraph.node_ids():
                    out_node_ids = set()
                    for out in subgraph.outputs:
                        if not out.outputs and not isinstance(out, Constant):
                            for n_inp in out.inputs:
                                out_node_ids.add(n_inp.id)
                return out_node_ids

            # Compute each output node in a separate subgraph.
            out_node_ids = get_out_node_ids()
            constant_values = {}

            for index in out_node_ids:  # Have to use index since 'node' is not in part
                part = subgraph.copy()
                out_node = part.nodes[index]
                part.outputs = out_node.outputs
                part.name = "Folding: {:}".format(
                    [out.name for out in part.outputs])
                part.cleanup(remove_unused_graph_inputs=True)
                names = [out.name for out in part.outputs]

                try:
                    # Determining types is not trivial, and ONNX-RT does its own type inference.
                    sess = rt.InferenceSession(
                        export_onnx(part,
                                    do_type_check=False).SerializeToString())
                    values = sess.run(names, {})
                except Exception as err:
                    G_LOGGER.warning(
                        "Inference failed for subgraph: {:}. Note: Error was:\n{:}"
                        .format(part.name, err))
                    if partitioning == "recursive":
                        G_LOGGER.verbose(
                            "Attempting to recursively partition subgraph")
                        # Partition failed, peel off last node.
                        # We only need to remove one node, so avoid doing an expensive call to cleanup()
                        part.outputs = out_node.inputs
                        del part.nodes[part.nodes.index(out_node)]
                        out_node.outputs.clear()
                        out_node.inputs.clear()
                    else:
                        G_LOGGER.info(
                            "You may see better results if you set partitioning='recursive'"
                        )
                        if not error_ok:
                            raise err

                    constant_values.update(partition_and_infer(part))
                else:
                    constant_values.update(
                        {name: val
                         for name, val in zip(names, values)})

            return constant_values
Example #2
0
        def register_func(func):
            if hasattr(Graph, func.__name__):
                G_LOGGER.warning("Registered function: {:} is hidden by a Graph attribute or function with the same name. "
                                 "This function will never be called!".format(func.__name__))

            # Default behavior is to register functions for all opsets.
            if opsets is None:
                Graph.GLOBAL_FUNC_MAP[func.__name__] = func
            else:
                for opset in opsets:
                    Graph.OPSET_FUNC_MAP[opset][func.__name__] = func
            return func
Example #3
0
        def add_to_tensor_map(tensor):
            if not tensor.is_empty():
                if tensor.name in tensor_map and not (tensor_map[tensor.name]
                                                      is tensor):
                    msg = "Found distinct tensors that share the same name:\n[id: {:}] {:}\n[id: {:}] {:}\n".format(
                        id(tensor_map[tensor.name]),
                        tensor_map[tensor.name],
                        id(tensor),
                        tensor,
                    )
                    msg += (
                        "Note: Producer node(s) of first tensor:\n{:}\nProducer node(s) of second tensor:\n{:}"
                        .format(
                            tensor_map[tensor.name].inputs,
                            tensor.inputs,
                        ))

                    if check_duplicates:
                        G_LOGGER.critical(msg)
                    G_LOGGER.warning(msg)

                tensor_map[tensor.name] = tensor
Example #4
0
    def fold_constants(self,
                       fold_shapes=True,
                       recurse_subgraphs=True,
                       partitioning=None,
                       error_ok=True):
        """
        Folds constants in-place in the graph. The graph must be topologically sorted prior to
        calling this function (see `toposort()`).

        This function will not remove constants after folding them. In order to get rid of
        these hanging nodes, you can run the `cleanup()` function.

        *Note: Due to how this function is implemented, the graph must be exportable to ONNX,
        and evaluable in ONNX-Runtime. Additionally, ONNX-Runtime must be installed.*

        Args:
            fold_shapes (bool):
                    Whether to fold `Shape` nodes in the graph.
                    This requires shapes to be inferred in the graph, and can only fold
                    static shapes.
                    Defaults to True.
            recurse_subgraphs (bool):
                    Whether to recursively fold constants in subgraphs.
                    Defaults to True.
            partitioning (Union[str, None]):
                    Whether/How to partition the graph so that errors in folding one
                    part of a model do not affect other parts. Available modes are:

                    - None: Do not partition the graph. If inference fails, no constants are folded.
                    - "basic": Partition the graph. If inference fails in one partition, other partitions will
                            remain unaffected.
                    - "recursive": Parition the graph recursively. If inference fails in a partition, the partition
                            will be further paritioned.

                    Defaults to None.
            error_ok (bool):
                    Whether inference errors should be suppressed.
                    When this is enabled, any errors encountered during inference will be re-raised.
                    Defaults to True.

        Returns:
            self
        """
        import onnxruntime as rt
        from onnx_graphsurgeon.exporters.onnx_exporter import export_onnx

        PARTITIONING_MODES = [None, "basic", "recursive"]
        if partitioning not in PARTITIONING_MODES:
            G_LOGGER.critical(
                "Argument for parameter 'partitioning' must be one of: {:}".
                format(PARTITIONING_MODES))

        G_LOGGER.debug("Folding constants in {:}".format(self.name))

        graph_clone = self.copy()
        clone_tensors = graph_clone.tensors()

        # We find graph constants in two passes:
        # Pass 1 finds all Constant tensors in the graph, then walks over their outputs.
        # Pass 2 searches for Shape nodes that have variable inputs (i.e. not marked const in pass 1)
        #    and turns them into Constants iff the input has a statically known shape.

        def update_foldable_outputs(graph_constants):
            def is_foldable(node):
                def all_tensors_const(tensors):
                    return all([t.name in graph_constants for t in tensors])

                if not all_tensors_const(node.inputs):
                    return False

                all_subgraph_foreign_tensors_const = True
                for attr in node.attrs.values():
                    if isinstance(attr, Graph):
                        foreign_tensors = attr._foreign_tensors().values()
                        all_subgraph_foreign_tensors_const &= all_tensors_const(
                            foreign_tensors)
                return all_subgraph_foreign_tensors_const

            # Walks along the outputs of graph_constants to see if they can also be computed statically.
            # Since the graph is topologically sorted, this should find all constant nodes in the graph.
            for node in graph_clone.nodes:
                if is_foldable(node):
                    graph_constants.update(
                        {out.name: out
                         for out in node.outputs})
            return graph_constants

        # Pass 1: Non-shape Constant Folding

        graph_constants = {
            name: tensor
            for name, tensor in clone_tensors.items()
            if isinstance(tensor, Constant)
        }

        # Replaces outputs of Constant nodes with constant tensors
        for tensor in clone_tensors.values():
            if len(tensor.inputs) == 1:
                node = tensor.inputs[0]
                if node.op == "Constant":
                    graph_constants[tensor.name] = tensor.to_constant(
                        node.attrs["value"]._values
                    )  # Using ._values avoids copying
                    graph_constants[tensor.name].inputs.clear()

        graph_constants = update_foldable_outputs(graph_constants)

        # Pass 2: Shape Folding

        def get_producer(tensor, op):
            """
            Get the producer of the specified tensor iff it matches op
            """
            if len(tensor.inputs) != 1:
                return None

            node = tensor.inputs[0]
            if node.op != op:
                return None
            return node

        def get_input(node, index=0):
            """
            Get the input tensor of a node iff the input tensor is not already marked a graph constant.
            """
            if node is None:
                return None

            inp = node.inputs[index]

            # If the input was already found to be a constant, it will be folded anyway.
            if inp.name in graph_constants:
                return None

            return inp

        def handle_shape(tensor):
            inp = get_input(get_producer(tensor, "Shape"))
            if inp is None:
                return None

            if inp.shape is None or misc.is_dynamic_shape(inp.shape):
                return None
            return np.array(inp.shape, dtype=np.int64)

        def handle_shape_gather(tensor):
            gather = get_producer(tensor, "Gather")
            if gather is None:
                return None

            data = gather.inputs[0]
            indices_tensor = gather.inputs[1]

            inp = get_input(get_producer(data, "Shape"))
            if inp is None or inp.shape is None:
                return None

            if not isinstance(indices_tensor, Constant):
                return None

            indices = indices_tensor.values
            if not indices.shape:  # Scalar-case
                shape = inp.shape[int(indices)]
                if misc.is_dynamic_dimension(shape):
                    return None
            else:
                shape = [inp.shape[index] for index in indices]
                if misc.is_dynamic_shape(shape):
                    return None

            return np.array(shape, dtype=np.int64)

        # Finds the static shape of a shape node output if possible, otherwise returns None.
        def lower_shape(tensor):
            SHAPE_FOLD_FUNCS = [handle_shape, handle_shape_gather]
            for fold_func in SHAPE_FOLD_FUNCS:
                shape = fold_func(tensor)
                if shape is not None:
                    return shape

        if fold_shapes:
            for tensor in clone_tensors.values():
                shape_of = lower_shape(tensor)

                if shape_of is not None:
                    G_LOGGER.ultra_verbose(
                        "Folding shape tensor: {:} to: {:}".format(
                            tensor.name, shape_of))
                    graph_constants[tensor.name] = tensor.to_constant(shape_of)
                    graph_constants[tensor.name].inputs.clear()

            graph_constants = update_foldable_outputs(graph_constants)

        def partition_and_infer(subgraph):
            def get_out_node_ids():
                # Gets the final output nodes - producer nodes of graph output tensors without other outputs.
                with subgraph.node_ids():
                    out_node_ids = set()
                    for out in subgraph.outputs:
                        if not out.outputs and not isinstance(out, Constant):
                            for n_inp in out.inputs:
                                out_node_ids.add(n_inp.id)
                return out_node_ids

            # Compute each output node in a separate subgraph.
            out_node_ids = get_out_node_ids()
            constant_values = {}

            for index in out_node_ids:  # Have to use index since 'node' is not in part
                part = subgraph.copy()
                out_node = part.nodes[index]
                part.outputs = out_node.outputs
                part.name = "Folding: {:}".format(
                    [out.name for out in part.outputs])
                part.cleanup(remove_unused_graph_inputs=True)
                names = [out.name for out in part.outputs]

                try:
                    # Determining types is not trivial, and ONNX-RT does its own type inference.
                    sess = rt.InferenceSession(
                        export_onnx(part,
                                    do_type_check=False).SerializeToString())
                    values = sess.run(names, {})
                except Exception as err:
                    G_LOGGER.warning(
                        "Inference failed for subgraph: {:}. Note: Error was:\n{:}"
                        .format(part.name, err))
                    if partitioning == "recursive":
                        G_LOGGER.verbose(
                            "Attempting to recursively partition subgraph")
                        # Partition failed, peel off last node.
                        # We only need to remove one node, so avoid doing an expensive call to cleanup()
                        part.outputs = out_node.inputs
                        del part.nodes[part.nodes.index(out_node)]
                        out_node.outputs.clear()
                        out_node.inputs.clear()
                    else:
                        G_LOGGER.info(
                            "You may see better results if you set partitioning='recursive'"
                        )
                        if not error_ok:
                            raise err

                    constant_values.update(partition_and_infer(part))
                else:
                    constant_values.update(
                        {name: val
                         for name, val in zip(names, values)})

            return constant_values

        # Next, evaluate the foldable variables with ONNX-Runtime
        graph_clone.outputs = [
            t for t in graph_constants.values() if not isinstance(t, Constant)
        ]
        graph_clone.cleanup(remove_unused_graph_inputs=True)

        # Using ._values avoids a deep copy of the values.
        constant_values = {
            name: tensor._values
            for name, tensor in graph_constants.items()
            if isinstance(tensor, Constant)
        }
        if graph_clone.outputs:
            if partitioning:
                constant_values.update(partition_and_infer(graph_clone))
            else:
                names = [t.name for t in graph_clone.outputs]
                try:
                    sess = rt.InferenceSession(
                        export_onnx(graph_clone,
                                    do_type_check=False).SerializeToString())
                    values = sess.run(names, {})
                    constant_values.update(
                        {name: val
                         for name, val in zip(names, values)})
                except Exception as err:
                    G_LOGGER.warning(
                        "Inference failed. You may want to try enabling partitioning to see better results. "
                        "Note: Error was:\n{:}".format(err))
                    G_LOGGER.verbose(
                        "Note: Graph was:\n{:}".format(graph_clone))
                    if not error_ok:
                        raise
        elif not constant_values:
            G_LOGGER.info(
                "Could not find any nodes in this graph ({:}) that can be folded. "
                "This could mean that constant folding has already been run on this graph. "
                "Skipping.".format(self.name))

        # Finally, replace the Variables in the original graph with constants.
        if constant_values:
            graph_tensors = self.tensors()
            for name, values in constant_values.items():
                tensor = graph_tensors[name]
                if not isinstance(tensor, Constant):
                    tensor.to_constant(values)
                    tensor.inputs.clear()  # Constants do not need inputs

        # Folding subgraphs after the outer graph can lead to better folding.
        def fold_subgraphs():
            for node in self.nodes:
                for attr in node.attrs.values():
                    if isinstance(attr, Graph):
                        attr.fold_constants(fold_shapes=fold_shapes,
                                            partitioning=partitioning)

        if recurse_subgraphs:
            fold_subgraphs()

        return self
Example #5
0
    def fold_constants(self,
                       fold_shapes=True,
                       recurse_subgraphs=True,
                       partitioning=None,
                       error_ok=True):
        """
        Folds constants in-place in the graph. The graph must be topologically sorted prior to
        calling this function (see `toposort()`).

        This function will not remove constants after folding them. In order to get rid of
        these hanging nodes, you can run the `cleanup()` function.

        *Note: Due to how this function is implemented, the graph must be exportable to ONNX,
        and evaluable in ONNX-Runtime. Additionally, ONNX-Runtime must be installed.*

        Args:
            fold_shapes (bool):
                    Whether to fold `Shape` nodes in the graph.
                    This requires shapes to be inferred in the graph, and can only fold
                    static shapes.
                    Defaults to True.
            recurse_subgraphs (bool):
                    Whether to recursively fold constants in subgraphs.
                    Defaults to True.
            partitioning (Union[str, None]):
                    Whether/How to partition the graph so that errors in folding one
                    part of a model do not affect other parts. Available modes are:

                    - None: Do not partition the graph. If inference fails, no constants are folded.
                    - "basic": Partition the graph. If inference fails in one partition, other partitions will
                            remain unaffected.
                    - "recursive": Parition the graph recursively. If inference fails in a partition, the partition
                            will be further paritioned.

                    Defaults to None.
            error_ok (bool):
                    Whether inference errors should be suppressed.
                    When this is enabled, any errors encountered during inference will be re-raised.
                    Defaults to True.

        Returns:
            self
        """
        import onnxruntime as rt
        from onnx_graphsurgeon.exporters.onnx_exporter import export_onnx

        PARTITIONING_MODES = [None, "basic", "recursive"]
        if partitioning not in PARTITIONING_MODES:
            G_LOGGER.critical(
                "Argument for parameter 'partitioning' must be one of: {:}".
                format(PARTITIONING_MODES))

        # First perform shape tensor cast elision on the graph prior to other constant folding
        # Search for Cast(s) (from int -> float) -> intermediate operator (with float constants) -> Cast(s) (back to int)
        # This pattern is problematic for TensorRT since these operations may be performed on Shape Tensors, which
        # are not allowed to be floating point type. Attempt to fold the pattern here
        VALID_CAST_ELISION_OPS = [
            "Add", "Sub", "Mul", "Div", "Max", "Min", "Equal", "Greater",
            "Less", "Concat"
        ]

        def run_cast_elision(node):
            import onnx

            if node.op not in VALID_CAST_ELISION_OPS:
                return

            # Get list of input nodes
            inp_casts = [
                inp_node for inp_tensor in node.inputs
                for inp_node in inp_tensor.inputs
                if inp_node.op == "Cast" and inp_node.attrs["to"] == 1
            ]

            # No cast nodes found, return early
            if not inp_casts:
                return

            # Ensure that all input cast nodes are casting from the same type
            final_type = None
            for inp in inp_casts:
                curr_type = onnx.mapping.NP_TYPE_TO_TENSOR_TYPE[
                    inp.inputs[0].dtype]
                final_type = final_type or curr_type
                if final_type != curr_type:
                    return

            # Check validity and get list of output nodes
            out_casts = []

            for out_tensor in node.outputs:
                for out_node in out_tensor.outputs:
                    if out_node.op != "Cast" or out_node.attrs["to"] not in [
                            6, 7
                    ]:
                        # Can exit early if any of the output nodes are not valid casts
                        return
                    out_casts.append(out_node)
                    # Check that all final cast types are the same.
                    curr_type = out_node.attrs["to"]
                    if final_type != curr_type:
                        return

            # If all checks passed - update constant values.
            for inp in node.inputs:
                if isinstance(inp, Constant):
                    inp.values = inp.values.astype(
                        onnx.mapping.TENSOR_TYPE_TO_NP_TYPE[final_type])

            # "Remove" casts nodes by changing I/O node operators to Identity. Update corresponding tensor dtypes as well
            def replace_with_identity(cast_node, change_dtype):
                cast_node.op = "Identity"
                cast_node.attrs = {}
                getattr(
                    cast_node, change_dtype
                )[0].dtype = onnx.mapping.TENSOR_TYPE_TO_NP_TYPE[final_type]
                G_LOGGER.debug("Cast node {:} elided".format(cast_node.name))

            for inp in inp_casts:
                replace_with_identity(inp, change_dtype="outputs")

            for out in out_casts:
                replace_with_identity(out, change_dtype="inputs")

        # Perform shape tensor cast elision:
        if fold_shapes:
            G_LOGGER.debug(
                "Performing shape tensor cast elision in {:}".format(
                    self.name))
            try:
                for node in self.nodes:
                    run_cast_elision(node)
            except Exception as err:
                if not error_ok:
                    raise err
                G_LOGGER.warning("'{:}' routine failed with: {:}".format(
                    "Shape tensor cast elision", err))

        G_LOGGER.debug("Folding constants in {:}".format(self.name))

        graph_clone = self.copy()
        clone_tensors = graph_clone.tensors()

        # We find graph constants in two passes:
        # Pass 1 finds all Constant tensors in the graph, then walks over their outputs.
        # Pass 2 searches for Shape nodes that have variable inputs (i.e. not marked const in pass 1)
        #    and turns them into Constants iff the input has a statically known shape.

        def update_foldable_outputs(graph_constants):
            def is_foldable(node):
                def all_tensors_const(tensors):
                    return all([t.name in graph_constants for t in tensors])

                if not all_tensors_const(node.inputs):
                    return False

                all_subgraph_foreign_tensors_const = True
                for attr in node.attrs.values():
                    if isinstance(attr, Graph):
                        foreign_tensors = attr._foreign_tensors().values()
                        all_subgraph_foreign_tensors_const &= all_tensors_const(
                            foreign_tensors)
                return all_subgraph_foreign_tensors_const

            # Walks along the outputs of graph_constants to see if they can also be computed statically.
            # Since the graph is topologically sorted, this should find all constant nodes in the graph.
            for node in graph_clone.nodes:
                if is_foldable(node):
                    graph_constants.update(
                        {out.name: out
                         for out in node.outputs})
            return graph_constants

        # Pass 1: Non-shape Constant Folding

        graph_constants = {
            name: tensor
            for name, tensor in clone_tensors.items()
            if isinstance(tensor, Constant)
        }

        # Replaces outputs of Constant nodes with constant tensors
        for tensor in clone_tensors.values():
            if len(tensor.inputs) == 1:
                node = tensor.inputs[0]
                if node.op == "Constant":
                    graph_constants[tensor.name] = tensor.to_constant(
                        node.attrs["value"]._values
                    )  # Using ._values avoids copying
                    graph_constants[tensor.name].inputs.clear()

        graph_constants = update_foldable_outputs(graph_constants)

        # Pass 2: Shape Folding

        def get_producer(tensor, op):
            """
            Get the producer of the specified tensor iff it matches op
            """
            if len(tensor.inputs) != 1:
                return None

            node = tensor.inputs[0]
            if node.op != op:
                return None
            return node

        def get_input(node, index=0):
            """
            Get the input tensor of a node iff the input tensor is not already marked a graph constant.
            """
            if node is None:
                return None

            inp = node.inputs[index]

            # If the input was already found to be a constant, it will be folded anyway.
            if inp.name in graph_constants:
                return None

            return inp

        def get_scalar_value(tensor):
            """
            Gets the scalar value of a tensor with a single item
            """
            if not tensor.shape:
                return tensor.values
            else:
                return list(tensor.values)[0]

        def fold_shape(tensor):
            inp = get_input(get_producer(tensor, "Shape"))
            if inp is None:
                return None

            if inp.shape is None or misc.is_dynamic_shape(inp.shape):
                return None
            return np.array(inp.shape, dtype=np.int64)

        def fold_shape_gather(tensor):
            gather = get_producer(tensor, "Gather")
            if gather is None:
                return None

            data = gather.inputs[0]
            indices_tensor = gather.inputs[1]

            inp = get_input(get_producer(data, "Shape"))
            if inp is None or inp.shape is None:
                return None

            if not isinstance(indices_tensor, Constant):
                return None

            indices = indices_tensor.values
            if not indices.shape:  # Scalar-case
                shape = inp.shape[int(indices)]
                if misc.is_dynamic_dimension(shape):
                    return None
            else:
                shape = [inp.shape[index] for index in indices]
                if misc.is_dynamic_shape(shape):
                    return None

            return np.array(shape, dtype=np.int64)

        def fold_shape_slice(tensor):
            slice = get_producer(tensor, "Slice")
            if slice is None:
                return None

            data = slice.inputs[0]

            if len(slice.inputs) >= 3:
                starts, ends = slice.inputs[1:3]
                if any(not isinstance(t, Constant) for t in [starts, ends]):
                    return None
                starts, ends = get_scalar_value(starts), get_scalar_value(ends)
            elif "starts" in slice.attrs and "ends" in slice.attrs:
                starts, ends = slice.attrs["starts"][0], slice.attrs["ends"][0]
            else:
                return None

            inp = get_input(get_producer(data, "Shape"))
            if inp is None or inp.shape is None:
                return None

            # For shape tensors, we can only slice on the 0th dimension.
            if len(slice.inputs) > 3:
                axes = slice.inputs[3]
                if not isinstance(axes, Constant):
                    return None

                if get_scalar_value(axes) != 0:
                    return None
            elif "axes" in slice.attrs:
                if slice.attrs["axes"][0] != 0:
                    return None

            steps = 1
            if len(slice.inputs) > 4:
                steps = slice.inputs[4]
                if not isinstance(steps, Constant):
                    return None
                steps = get_scalar_value(steps)
            elif "steps" in slice.attrs:
                steps = slice.attrs["steps"][0]

            shape = inp.shape[starts:ends:steps]
            if misc.is_dynamic_shape(shape):
                return None

            return np.array(shape, dtype=np.int64)

        if fold_shapes:
            # NOTE: The order of shape folding passes is important to maximize how much we fold (phase-ordering problem).
            SHAPE_FOLD_FUNCS = [
                fold_shape_gather, fold_shape_slice, fold_shape
            ]
            for shape_fold_func in SHAPE_FOLD_FUNCS:
                try:
                    for tensor in clone_tensors.values():
                        shape_of = shape_fold_func(tensor)

                        if shape_of is not None:
                            G_LOGGER.ultra_verbose(
                                "Folding shape tensor: {:} to: {:}".format(
                                    tensor.name, shape_of))
                            graph_constants[tensor.name] = tensor.to_constant(
                                shape_of)
                            graph_constants[tensor.name].inputs.clear()
                except Exception as err:
                    if not error_ok:
                        raise err
                    G_LOGGER.warning("'{:}' routine failed with:\n{:}".format(
                        shape_fold_func.__name__, err))
                else:
                    graph_constants = update_foldable_outputs(graph_constants)

        def partition_and_infer(subgraph):
            def get_out_node_ids():
                # Gets the final output nodes - producer nodes of graph output tensors without other outputs.
                with subgraph.node_ids():
                    out_node_ids = set()
                    for out in subgraph.outputs:
                        if not out.outputs and not isinstance(out, Constant):
                            for n_inp in out.inputs:
                                out_node_ids.add(n_inp.id)
                return out_node_ids

            # Compute each output node in a separate subgraph.
            out_node_ids = get_out_node_ids()
            constant_values = {}

            for index in out_node_ids:  # Have to use index since 'node' is not in part
                part = subgraph.copy()
                out_node = part.nodes[index]
                part.outputs = out_node.outputs
                part.name = "Folding: {:}".format(
                    [out.name for out in part.outputs])
                part.cleanup(remove_unused_graph_inputs=True)
                names = [out.name for out in part.outputs]

                try:
                    # Determining types is not trivial, and ONNX-RT does its own type inference.
                    sess = rt.InferenceSession(
                        export_onnx(part,
                                    do_type_check=False).SerializeToString())
                    values = sess.run(names, {})
                except Exception as err:
                    G_LOGGER.warning(
                        "Inference failed for subgraph: {:}. Note: Error was:\n{:}"
                        .format(part.name, err))
                    if partitioning == "recursive":
                        G_LOGGER.verbose(
                            "Attempting to recursively partition subgraph")
                        # Partition failed, peel off last node.
                        # We only need to remove one node, so avoid doing an expensive call to cleanup()
                        part.outputs = out_node.inputs
                        del part.nodes[part.nodes.index(out_node)]
                        out_node.outputs.clear()
                        out_node.inputs.clear()
                    else:
                        G_LOGGER.info(
                            "You may see better results if you set partitioning='recursive'"
                        )
                        if not error_ok:
                            raise err

                    constant_values.update(partition_and_infer(part))
                else:
                    constant_values.update(
                        {name: val
                         for name, val in zip(names, values)})

            return constant_values

        # Next, evaluate the foldable variables with ONNX-Runtime

        # Only evaluate foldable values that have non-foldable outputs or are graph outputs.
        # Otherwise, if all the outputs are foldable, then we can just evaluate the outputs directly.
        def should_eval_foldable(tensor):
            non_const = not isinstance(tensor, Constant)
            is_graph_output = not tensor.outputs
            has_non_foldable_outputs = any(out.name not in graph_constants
                                           for out in tensor.outputs)
            return non_const and (is_graph_output or has_non_foldable_outputs)

        graph_clone.outputs = [
            t for t in graph_constants.values() if should_eval_foldable(t)
        ]
        G_LOGGER.debug("Folding tensors: {:}".format(graph_clone.outputs))
        graph_clone.cleanup(remove_unused_graph_inputs=True)

        # Using ._values avoids a deep copy of the values.
        constant_values = {
            name: tensor._values
            for name, tensor in graph_constants.items()
            if isinstance(tensor, Constant)
        }
        if graph_clone.outputs:
            if partitioning:
                constant_values.update(partition_and_infer(graph_clone))
            else:
                names = [t.name for t in graph_clone.outputs]
                try:
                    sess = rt.InferenceSession(
                        export_onnx(graph_clone,
                                    do_type_check=False).SerializeToString())
                    values = sess.run(names, {})
                    constant_values.update(
                        {name: val
                         for name, val in zip(names, values)})
                except Exception as err:
                    G_LOGGER.warning(
                        "Inference failed. You may want to try enabling partitioning to see better results. "
                        "Note: Error was:\n{:}".format(err))
                    G_LOGGER.verbose(
                        "Note: Graph was:\n{:}".format(graph_clone))
                    if not error_ok:
                        raise
        elif not constant_values:
            G_LOGGER.info(
                "Could not find any nodes in this graph ({:}) that can be folded. "
                "This could mean that constant folding has already been run on this graph. "
                "Skipping.".format(self.name))

        # Finally, replace the Variables in the original graph with constants.
        if constant_values:
            graph_tensors = self.tensors()
            for name, values in constant_values.items():
                tensor = graph_tensors[name]
                if not isinstance(tensor, Constant):
                    tensor.to_constant(values)
                    tensor.inputs.clear()  # Constants do not need inputs

        # Folding subgraphs after the outer graph can lead to better folding.
        def fold_subgraphs():
            for node in self.nodes:
                for attr in node.attrs.values():
                    if isinstance(attr, Graph):
                        attr.fold_constants(fold_shapes=fold_shapes,
                                            partitioning=partitioning)

        if recurse_subgraphs:
            fold_subgraphs()

        return self