Example #1
0
    def schedule(self, fgraph: FunctionGraph) -> typing.List[Apply]:
        """Runs the scheduler (if set) or the toposort on the FunctionGraph.

        Parameters
        ----------
        fgraph : FunctionGraph
            A graph to compute the schedule for.

        Returns
        -------
        nodes : list of Apply nodes
            The result of the scheduling or toposort operation.
        """
        if callable(self._scheduler):
            return self._scheduler(fgraph)
        return fgraph.toposort()
Example #2
0
def raise_with_op(
    fgraph: FunctionGraph, node, thunk=None, exc_info=None, storage_map=None
):
    """
    Re-raise an exception while annotating the exception object with
    debug info.

    Parameters
    ----------
    node : Apply node
        The Apply node object that resulted in the raised exception.
    exc_info : tuple, optional
        A tuple containing the exception type, exception object and
        associated traceback, as would be returned by a call to
        `sys.exc_info()` (which is done if `None` is passed).
    storage_map: dict, optional
        storage map of the theano function that resulted in the
        raised exception.

    Notes
    -----
    This re-raises the exception described by `exc_info` (or the last
    one raised, if `exc_info` is omitted) and annotates the exception
    object with several new members which may be helpful for debugging
    Theano graphs. They are:

     * __op_instance__: The Op that is responsible for the exception
       being raised.
     * __thunk_trace__: A traceback corresponding to the code that
       actually generated the exception, if it is available.
     * __applynode_index__: The index of the Apply node corresponding
       to this op in `op.fgraph.toposort()`.

    The exception is not annotated if it is of type `KeyboardInterrupt`.

    TODO: Make this work with linker defined schedule
    """
    verbosity = config.exception_verbosity

    if exc_info is None:
        exc_info = sys.exc_info()
    exc_type, exc_value, exc_trace = exc_info
    if exc_type == KeyboardInterrupt:
        # print a simple traceback from KeyboardInterrupt
        raise exc_value.with_traceback(exc_trace)
    try:
        trace = node.outputs[0].tag.trace
    except AttributeError:
        try:
            trace = node.op.tag.trace
        except AttributeError:
            trace = ()
    exc_value.__thunk_trace__ = trace
    exc_value.__op_instance__ = node
    topo = fgraph.toposort()
    if node in topo:
        node_index = topo.index(node)
    else:
        node_index = None
    exc_value.__applynode_index__ = node_index

    hints = []
    detailed_err_msg = "\nApply node that caused the error: " + str(node)
    if exc_value.__applynode_index__ is not None:
        detailed_err_msg += f"\nToposort index: {int(node_index)}"

    types = [getattr(ipt, "type", "No type") for ipt in node.inputs]
    detailed_err_msg += f"\nInputs types: {types}\n"

    if thunk is not None:
        if hasattr(thunk, "inputs"):
            shapes = [getattr(ipt[0], "shape", "No shapes") for ipt in thunk.inputs]
            strides = [getattr(ipt[0], "strides", "No strides") for ipt in thunk.inputs]
            scalar_values = []
            for ipt in thunk.inputs:
                if getattr(ipt[0], "size", -1) <= 5:
                    scalar_values.append(ipt[0])
                else:
                    scalar_values.append("not shown")
        else:
            shapes = "The thunk don't have an inputs attributes."
            strides = "So we can't access the strides of inputs values"
            scalar_values = "And can't print its inputs scalar value"
        clients = [[c[0] for c in fgraph.clients[var]] for var in node.outputs]
        detailed_err_msg += (
            f"Inputs shapes: {shapes}"
            + f"\nInputs strides: {strides}"
            + f"\nInputs values: {scalar_values}"
        )
        if verbosity == "high":
            detailed_err_msg += "\nInputs type_num: %s" % str(
                [getattr(getattr(i[0], "dtype", ""), "num", "") for i in thunk.inputs]
            )
        if hasattr(node.op, "__input_name__"):
            detailed_err_msg += f"\nInputs name: {node.op.__input_name__}\n"

        detailed_err_msg += f"\nOutputs clients: {clients}\n"
    else:
        hints.append(
            "HINT: Use another linker then the c linker to"
            " have the inputs shapes and strides printed."
        )

    # Print node backtraces
    tr = getattr(node.outputs[0].tag, "trace", [])
    if isinstance(tr, list) and len(tr) > 0:
        detailed_err_msg += "\nBacktrace when the node is created(use Theano flag traceback__limit=N to make it longer):\n"

        # Print separate message for each element in the list of batcktraces
        sio = io.StringIO()
        for subtr in tr:
            traceback.print_list(subtr, sio)
        detailed_err_msg += str(sio.getvalue())
    else:
        hints.append(
            "HINT: Re-running with most Theano optimization disabled could"
            " give you a back-trace of when this node was created. This can"
            " be done with by setting the Theano flag"
            " 'optimizer=fast_compile'. If that does not work,"
            " Theano optimizations can be disabled with 'optimizer=None'."
        )

    if verbosity == "high":

        import theano.printing

        f = io.StringIO()
        theano.printing.debugprint(node, file=f, stop_on_name=True, print_type=True)
        detailed_err_msg += "\nDebugprint of the apply node: \n"
        detailed_err_msg += f.getvalue()

    # Prints output_map
    if verbosity == "high" and storage_map is not None:
        detailed_err_msg += "\nStorage map footprint:\n"
        shared_input_list = [
            item
            for item in fgraph.inputs
            if isinstance(item, theano.compile.SharedVariable)
        ]
        nonshared_input_list = [
            item
            for item in fgraph.inputs
            if not isinstance(item, theano.compile.SharedVariable)
        ]
        storage_map_list = []
        total_size = 0
        total_size_inputs = 0
        for k in storage_map:
            storage_map_item = []

            # storage_map_item[0]: the variable
            storage_map_item.append(str(k))

            # storage_map_item[1]: the shape
            shapeinfo = None
            if hasattr(storage_map[k][0], "shape"):
                shapeinfo = storage_map[k][0].shape
                if len(shapeinfo) != 0:
                    storage_map_item.append(shapeinfo)
                else:
                    storage_map_item.append(tuple())
            else:
                storage_map_item.append(None)

            # storage_map_item[2]: itemsize
            # storage_map_item[3]: bytes
            if hasattr(storage_map[k][0], "dtype"):
                dtype = storage_map[k][0].dtype
                storage_map_item.append(np.dtype(dtype).itemsize)
                if shapeinfo is None:
                    storage_map_item.append(-1)
                else:
                    sz = np.dtype(dtype).itemsize * np.prod(shapeinfo)
                    storage_map_item.append(sz)
                    total_size += sz
                    if not k.owner:
                        total_size_inputs += sz
                    else:
                        # If it is a view, don't count it twice.
                        if getattr(k.owner.op, "view_map", None):
                            vmap = k.owner.op.view_map
                            out_idx = k.owner.outputs.index(k)
                            data = storage_map[k][0]
                            if out_idx in vmap:
                                assert len(vmap[out_idx]) == 1
                                input_data = storage_map[
                                    k.owner.inputs[vmap[out_idx][0]]
                                ][0]
                                if k.type.may_share_memory(data, input_data):
                                    total_size -= sz
                        # If it is a destroyed input, the input
                        # shouldn't be in the storage_map anymore
                        # except if there is a special flag used. So
                        # we still must check it.
                        if getattr(k.owner.op, "destroy_map", None):
                            vmap = k.owner.op.destroy_map
                            out_idx = k.owner.outputs.index(k)
                            data = storage_map[k][0]
                            if out_idx in vmap:
                                assert len(vmap[out_idx]) == 1
                                input_data = storage_map[
                                    k.owner.inputs[vmap[out_idx][0]]
                                ][0]
                                if k.type.may_share_memory(data, input_data):
                                    total_size -= sz
            else:
                bytes = sys.getsizeof(storage_map[k][0])
                storage_map_item.append(bytes)
                storage_map_item.append(-1)

            # Flag of shared val
            # storage_map_item[4]
            if k in shared_input_list:
                storage_map_item.append(True)
            elif k in nonshared_input_list:
                storage_map_item.append(False)
            else:
                storage_map_item.append(None)
            storage_map_list.append(storage_map_item)

        storage_map_list.sort(key=itemgetter(3), reverse=True)
        for item in storage_map_list:
            if item[3] == -1:
                continue
            detailed_err_msg += " - " + item[0] + ", "
            if item[4] is True:
                detailed_err_msg += "Shared Input, "
            elif item[4] is False:
                detailed_err_msg += "Input, "
            if item[1] is not None:
                detailed_err_msg += f"Shape: {item[1]}, "
            detailed_err_msg += f"ElemSize: {item[2]} Byte(s)"
            if item[3] is not None:
                detailed_err_msg += f", TotalSize: {item[3]} Byte(s)\n"
            else:
                detailed_err_msg += "\n"
        detailed_err_msg += " TotalSize: {} Byte(s) {:.3f} GB\n".format(
            total_size,
            total_size / 1024 / 1024 / 1024,
        )
        detailed_err_msg += " TotalSize inputs: {} Byte(s) {:.3f} GB\n".format(
            total_size_inputs,
            total_size_inputs / 1024 / 1024 / 1024,
        )

    else:
        hints.append(
            "HINT: Use the Theano flag 'exception_verbosity=high'"
            " for a debugprint and storage map footprint of this apply node."
        )

    try:
        exc_value = exc_type(
            str(exc_value) + detailed_err_msg + "\n" + "\n".join(hints)
        )
    except TypeError:
        warnings.warn(f"{exc_type} error does not allow us to add extra error message")
        # Some exception need extra parameter in inputs. So forget the
        # extra long error message in that case.
    raise exc_value.with_traceback(exc_trace)
Example #3
0
    def __call__(self, fct, graph=None):
        """Create pydot graph from function.

        Parameters
        ----------
        fct : theano.compile.function.types.Function
            A compiled Theano function, variable, apply or a list of variables.
        graph: pydot.Dot
            `pydot` graph to which nodes are added. Creates new one if
            undefined.

        Returns
        -------
        pydot.Dot
            Pydot graph of `fct`
        """
        if graph is None:
            graph = pd.Dot()

        self.__nodes = {}

        profile = None

        if isinstance(fct, Function):
            profile = getattr(fct, "profile", None)
            fgraph = fct.maker.fgraph
        elif isinstance(fct, FunctionGraph):
            fgraph = fct
        else:
            if isinstance(fct, Variable):
                fct = [fct]
            elif isinstance(fct, Apply):
                fct = fct.outputs
            assert isinstance(fct, (list, tuple))
            assert all(isinstance(v, Variable) for v in fct)
            fgraph = FunctionGraph(inputs=graph_inputs(fct), outputs=fct)

        outputs = fgraph.outputs
        topo = fgraph.toposort()
        outputs = list(outputs)

        # Loop over apply nodes
        for node in topo:
            nparams = {}
            __node_id = self.__node_id(node)
            nparams["name"] = __node_id
            nparams["label"] = apply_label(node)
            nparams["profile"] = apply_profile(fgraph, node, profile)
            nparams["node_type"] = "apply"
            nparams["apply_op"] = nparams["label"]
            nparams["shape"] = self.shapes["apply"]

            use_color = None
            for opName, color in self.apply_colors.items():
                if opName in node.op.__class__.__name__:
                    use_color = color
            if use_color:
                nparams["style"] = "filled"
                nparams["fillcolor"] = use_color
                nparams["type"] = "colored"

            pd_node = dict_to_pdnode(nparams)
            graph.add_node(pd_node)

            # Loop over input nodes
            for id, var in enumerate(node.inputs):
                var_id = self.__node_id(var.owner if var.owner else var)
                if var.owner is None:
                    vparams = {
                        "name": var_id,
                        "label": var_label(var),
                        "node_type": "input",
                    }
                    if isinstance(var, Constant):
                        vparams["node_type"] = "constant_input"
                    elif isinstance(var, theano.tensor.sharedvar.TensorSharedVariable):
                        vparams["node_type"] = "shared_input"
                    vparams["dtype"] = type_to_str(var.type)
                    vparams["tag"] = var_tag(var)
                    vparams["style"] = "filled"
                    vparams["fillcolor"] = self.node_colors[vparams["node_type"]]
                    vparams["shape"] = self.shapes["input"]
                    pd_var = dict_to_pdnode(vparams)
                    graph.add_node(pd_var)

                edge_params = {}
                if hasattr(node.op, "view_map") and id in reduce(
                    list.__add__, node.op.view_map.values(), []
                ):
                    edge_params["color"] = self.node_colors["output"]
                elif hasattr(node.op, "destroy_map") and id in reduce(
                    list.__add__, node.op.destroy_map.values(), []
                ):
                    edge_params["color"] = "red"

                edge_label = vparams["dtype"]
                if len(node.inputs) > 1:
                    edge_label = str(id) + " " + edge_label
                pdedge = pd.Edge(var_id, __node_id, label=edge_label, **edge_params)
                graph.add_edge(pdedge)

            # Loop over output nodes
            for id, var in enumerate(node.outputs):
                var_id = self.__node_id(var)

                if var in outputs or len(fgraph.clients[var]) == 0:
                    vparams = {
                        "name": var_id,
                        "label": var_label(var),
                        "node_type": "output",
                        "dtype": type_to_str(var.type),
                        "tag": var_tag(var),
                        "style": "filled",
                    }
                    if len(fgraph.clients[var]) == 0:
                        vparams["fillcolor"] = self.node_colors["unused"]
                    else:
                        vparams["fillcolor"] = self.node_colors["output"]
                    vparams["shape"] = self.shapes["output"]
                    pd_var = dict_to_pdnode(vparams)
                    graph.add_node(pd_var)

                    graph.add_edge(pd.Edge(__node_id, var_id, label=vparams["dtype"]))
                elif var.name or not self.compact:
                    graph.add_edge(pd.Edge(__node_id, var_id, label=vparams["dtype"]))

            # Create sub-graph for OpFromGraph nodes
            if isinstance(node.op, builders.OpFromGraph):
                subgraph = pd.Cluster(__node_id)
                gf = PyDotFormatter()
                # Use different node prefix for sub-graphs
                gf.__node_prefix = __node_id
                node.op.prepare_node(node, None, None, "py")
                gf(node.op.fn, subgraph)
                graph.add_subgraph(subgraph)
                pd_node.get_attributes()["subg"] = subgraph.get_name()

                def format_map(m):
                    return str([list(x) for x in m])

                # Inputs mapping
                ext_inputs = [self.__node_id(x) for x in node.inputs]
                int_inputs = [gf.__node_id(x) for x in node.op.local_inputs]
                assert len(ext_inputs) == len(int_inputs)
                h = format_map(zip(ext_inputs, int_inputs))
                pd_node.get_attributes()["subg_map_inputs"] = h

                # Outputs mapping
                ext_outputs = [self.__node_id(x) for x in node.outputs]
                int_outputs = [gf.__node_id(x) for x in node.op.local_outputs]
                assert len(ext_outputs) == len(int_outputs)
                h = format_map(zip(int_outputs, ext_outputs))
                pd_node.get_attributes()["subg_map_outputs"] = h

        return graph