class TensorflowGraphToDot:
    def __init__(self, op, suppressed_nodes=[]):
        from graphviz import Digraph
        self.dot = Digraph()
        self.nodes = collections.OrderedDict()
        import tensorflow.contrib.graph_editor as ge
        for node in ge.get_backward_walk_ops(op):
            self.nodes[node.node_def.name] = node.node_def.input
        self.suppressed_nodes = suppressed_nodes
        for node_name in self.nodes:
            self.draw_node(node_name)
            for input_node in self.nodes[node_name]:
                self.draw_edge(input_node, node_name)

    def is_suppressed(self, node_name):
        return any([s in node_name for s in self.suppressed_nodes])

    def draw_edge(self, from_node_name, to_node_name):
        if self.is_suppressed(from_node_name):
            if from_node_name in self.nodes:
                for next_node_name in self.nodes[from_node_name]:
                    self.draw_edge(next_node_name, to_node_name)
        else:
            if not self.is_suppressed(to_node_name):
                self.dot.edge(from_node_name, to_node_name)

    def draw_node(self, node_name):
        if not self.is_suppressed(node_name):
            self.dot.node(node_name, label=node_name)

    def _repr_svg_(self):
        return self.dot._repr_svg_()
Example #2
0
    def _repr_svg_(self):
        """Display the structure of graph in Jupyter."""
        dot = Digraph(engine='fdp')
        for i in range(self.size):
            dot.node(str(i))

        for i in range(self.size):
            for e in self.edges[i]:
                dot.edge(str(i), str(e.target), label=str(e.weight))

        return dot._repr_svg_()
Example #3
0
    def _repr_svg_(self):
        """Display the structure of graph in Jupyter."""
        dot = Digraph(engine='fdp')
        for i in range(self.size):
            dot.node(str(i))

        for i in range(self.size):
            for j in range(self.size):
                if self.edges[i][j] == 0:
                    continue
                dot.edge(str(i), str(j), str(self.edges[i][j]))

        return dot._repr_svg_()
class DerivaCatalogToGraph:
    def __init__(self, catalog, engine='dot'):
        self.graph = Digraph(
            engine=engine,
            format='pdf',
            edge_attr=None,
            strict=True,
        )

        self.catalog = catalog
        self._model = catalog.getCatalogModel()
        self._chaise_base = "https://{}/chaise/recordset/#{}/".format(
            urlparse(catalog.get_server_uri()).netloc, self.catalog.catalog_id)

        self.graph.attr('graph', rankdir='LR')
        self.graph.attr('graph', overlap='false', splines='true')
        #self.graph.attr('graph', concentrate=True)

    def clear(self):
        self.graph.clear()

    def view(self):
        self.graph.view()

    def catalog_to_graph(self, schemas=None, skip_terms=False, skip_association_tables=False):
        """
        Convert a catalog to a DOT based graph.
        :param schemas:  List of schemas that should be included.  Use whole catalog if None.
        :param skip_terms: Do not include term tables in the graph
        :param skip_association_tables: Collapse association tables so that only edges between endpoints are used
        :return:
        """

        schemas = [s.name for s in self._model.schemas.values() if s.name not in ['_acl_admin', 'public', 'WWW']] \
            if schemas is None else schemas

        for schema in schemas:
            self.schema_to_graph(schema, skip_terms=skip_terms, schemas=schemas,
                                 skip_association_tables=skip_association_tables)

    def schema_to_graph(self, schema_name, schemas=[], skip_terms=False, skip_association_tables=False):
        """
        Create a graph for the specified schema.
        :param schema_name: Name of the schema in the model to be used.
        :param schemas: List of additional schemas to include in the graph.
        :param skip_terms:
        :param skip_association_tables:
        :return:
        """

        schema = self._model.schemas[schema_name]

        # Put nodes for each schema in a seperate subgraph.
        with self.graph.subgraph(name='cluster_' + schema_name, node_attr={'shape': 'box'}) as schema_graph:
            schema_graph.attr(style='invis')
            for table in schema.tables.values():
                node_name = '{}_{}'.format(schema_name, table.name)
                if DerivaCatalogToGraph._is_vocabulary_table(table):
                    if not skip_terms:
                        schema_graph.node(node_name, label='{}:{}'.format(schema_name, table.name),
                                          shape='ellipse',
                                          URL=self._chaise_uri(table))
                else:
                    # Skip over current table if it is a association table and option is set.
                    if not (table.is_association() and skip_association_tables):
                        schema_graph.node(node_name, label='{}:{}'.format(schema_name, table.name),
                                          shape='box',
                                          URL=self._chaise_uri(table))
                    else:
                        print('Skipping node', node_name)

        # We have all the nodes out now, so run over and add edges.
        for table in schema.tables.values():
            self.foreign_key_defs_to_graph(table,
                                           skip_terms=skip_terms,
                                           schemas=schemas,
                                           skip_association_tables=skip_association_tables)
        return

    def foreign_key_defs_to_graph(self, table, skip_terms=False, skip_association_tables=False, schemas=[]):
        """
        Add edges for each foreign key relationship in the specified table.
        :param table:
        :param skip_terms:
        :param skip_association_tables:
        :param skip_schemas:
        :return:
        """

        # If table is an association table, put in a edge between the two endpoints in the relation.
        if table.is_association() == 2 and skip_association_tables:
            t1 = table.foreign_keys[0].referenced_columns[0].table
            t2 = table.foreign_keys[1].referenced_columns[0].table
            t1_name = '{}_{}'.format(t1.schema.name, t1.name)
            t2_name = '{}_{}'.format(t2.schema.name, t2.name)
            self.graph.edge(t1_name, t2_name, dir='both', color='gray')
        else:
            for fkey in table.foreign_keys:
                referenced_table = list(fkey.column_map.values())[0].table
                table_name = '{}_{}'.format(referenced_table.schema.name, referenced_table.name)

                # If the target is a schema we are skipping, do not add an edge.
                if (referenced_table.schema.name not in schemas or table.schema.name not in schemas):
                    continue
                # If the target is a term table, and we are not including terms, do not add an edge.
                if DerivaCatalogToGraph._is_vocabulary_table(referenced_table) and skip_terms:
                    continue

                # Add an edge from the current node to the target table.
                self.graph.edge('{}_{}'.format(table.schema.name, table.name), table_name)

        return

    def save(self, filename=None, format='pdf', view=False):
        (dir, file) = os.path.split(os.path.abspath(filename))
        if 'gv' in format:
            self.graph.save(filename=file, directory=dir)
        else:
            print('dumping graph in file', file, format)
            self.graph.render(filename=file, directory=dir, view=view, cleanup=True, format=format)

    def _repr_svg_(self):
        return self.graph._repr_svg_()

    @staticmethod
    def _is_vocabulary_table(t):
        if t.schema.name.lower() in 'vocabulary':
            return True
        try:
            return t.columns['ID'] and t.columns['Name'] and t.columns['URI'] and t.columns['Synonyms']
        except KeyError:
            return False

    def _chaise_uri(self, table):
        return self._chaise_base + "{}:{}".format(table.schema.name, table.name)
class GraphvizPainter:
    """
    Class represents a graph visualizer that uses a graphviz visualizing tool.

    Attributes
    ----------
    _digraph: Digraph
        A graphviz's object that represents a graph.

    Examples
    --------
    >>> import pandas as pd
    >>> from sberpm import DataHolder
    >>> from sberpm.miners import HeuMiner
    >>> from sberpm.visual import GraphvizPainter
    >>>
    >>> # Create data_holder
    >>> df = pd.DataFrame({
    ...     'id_column': [1, 1, 2],
    ...     'activity_column':['st1', 'st2', 'st1'],
    ...     'dt_column':[123456, 123457, 123458]})
    >>> data_holder = DataHolder(df, 'id_column', 'activity_column', 'dt_column')
    >>>
    >>> # Create graph using a miner algorithm
    >>> miner = HeuMiner(data_holder)
    >>> miner.apply()
    >>>
    >>> # Visualize graph
    >>> painter = GraphvizPainter()
    >>> painter.apply(miner.graph)
    >>>
    >>> # Or visualize graph with metrics
    >>> graph = miner.graph
    >>> graph.add_node_metric('count', {'st1': 2, 'st2': 1})
    >>> painter = GraphvizPainter()
    >>> painter.apply(graph, node_style_metric='count')
    >>>
    >>> # Save and show result
    >>> painter.write_graph('graph.svg', 'svg')
    >>> painter.show()  # Works in Jupyter Notebook
    """
    def __init__(self):
        self._digraph = None

    def apply(self,
              graph,
              node_style_metric=None,
              edge_style_metric=None,
              hide_disconnected_nodes=True):
        """
        Visualizes the given graph.
        (Creates graphviz's object that can be displayed or saved to file.)

        Parameters
        ----------
        graph : Graph or ProcessTreeNode
            Graph object.

        node_style_metric: str
            Name of the node's metric that will influence the colour of the nodes.
            If None or given metric in not contained in the given graph, nodes will have the same colour.
            Is not used if graph is a ProcessTreeNode object.

        edge_style_metric: str
            Name of the edge's metric that will influence the thickness of the edges.
            If None or given metric in not contained in the given graph, edges will have the same width.
            Is not used if graph is a ProcessTreeNode object.

        hide_disconnected_nodes: bool, default=True
            If True, nodes without any input and output edges will not be displayed.
            Is not used if graph is a ProcessTreeNode object.
        """
        if isinstance(graph, ProcessTreeNode):
            self._apply_process_tree(graph)
            return

        self._digraph = Digraph()

        node_color_dict = GraphvizPainter._calc_nodes_colors_by_metric(
            graph, node_style_metric)
        edge_width_dict = GraphvizPainter._calc_edges_widths_by_metric(
            graph, edge_style_metric)

        for node in graph.get_nodes():
            if not (hide_disconnected_nodes and len(node.output_edges) == 0
                    and len(node.input_edges) == 0):
                gv_node = _get_gv_node(node, node_color_dict)
                self._add_node_in_digraph(gv_node)

        for edge in graph.get_edges():
            self._digraph.edge(remove_bad_symbols(edge.source_node.id),
                               remove_bad_symbols(edge.target_node.id),
                               penwidth=str(edge_width_dict[edge.id])
                               if edge.id in edge_width_dict else None,
                               label=str(edge.metrics[edge_style_metric])
                               if edge_style_metric in edge.metrics else None)

    def apply_insights(self,
                       graph,
                       edge_style_metric='insights',
                       hide_disconnected_nodes=True):
        self._digraph = Digraph()
        edge_width_dict = GraphvizPainter._calc_edges_widths_by_metric(
            graph, edge_style_metric)

        for node in graph.get_nodes():
            if hide_disconnected_nodes and len(node.output_edges) == 0 and len(node.input_edges) == 0 \
                    and node.id not in ['legend_good', 'legend_bad']:
                continue
            style = 'filled'
            if node.type == NodeType.TASK:
                shape = 'box'
                color = node.color
            elif node.type == NodeType.START_EVENT:
                color = 'green'
                shape = 'circle'
            elif node.type == NodeType.END_EVENT:
                color = 'red'
                shape = 'circle'
            if color == 'black':
                color = 'white'
            self._digraph.node(node.id,
                               shape=shape,
                               style=style,
                               fillcolor=color,
                               label=node.label)

        for edge in graph.get_edges():
            penwidth = edge_width_dict.get(edge.id)
            if penwidth:
                penwidth = str(penwidth)

            self._digraph.edge(edge.source_node.id,
                               edge.target_node.id,
                               label=edge.label,
                               color=edge.color,
                               penwidth=penwidth)

    def _apply_process_tree(self, root_node):
        """
        Graphviz visualizer for ProcessTreeNode class.

        Parameters
        ----------
        root_node: ProcessTreeNode
        """
        digraph = Digraph()

        # Add nodes
        label_dict = {
            ProcessTreeNodeType.EXCLUSIVE_CHOICE: 'X',
            ProcessTreeNodeType.SEQUENTIAL: '->',
            ProcessTreeNodeType.PARALLEL: '||',
            ProcessTreeNodeType.LOOP: '*',
            ProcessTreeNodeType.FLOWER: '?',
        }
        node2gvnode = dict()
        GraphvizPainter._add_process_tree_nodes(digraph, root_node, label_dict,
                                                node2gvnode)

        # Add edges
        GraphvizPainter._add_process_tree_edges(digraph, root_node,
                                                node2gvnode)

        self._digraph = digraph

    @staticmethod
    def _add_process_tree_nodes(digraph: Digraph, node, label_dict,
                                node2gvnode):

        if node.type == ProcessTreeNodeType.SINGLE_ACTIVITY:
            if node.label is not None:
                node_id = node.label
                label = node.label
                shape = 'box'
                color = 'white'
            else:
                node_id = node.type + '_' + str(len(node2gvnode))
                label = ''
                shape = 'box'
                color = 'black'
        else:
            node_id = node.type + '_' + str(len(node2gvnode))
            label = label_dict[node.type]
            shape = 'circle'
            color = 'white'
        node2gvnode[node] = node_id
        digraph.node(node_id,
                     label,
                     shape=shape,
                     fillcolor=color,
                     style='filled')
        for n in node.children:
            GraphvizPainter._add_process_tree_nodes(digraph, n, label_dict,
                                                    node2gvnode)

    @staticmethod
    def _add_process_tree_edges(digraph: Digraph, node, node2gvnode):
        n1 = node2gvnode[node]
        for node2 in node.children:
            n2 = node2gvnode[node2]
            digraph.edge(n1, n2)
        for node2 in node.children:
            GraphvizPainter._add_process_tree_edges(digraph, node2,
                                                    node2gvnode)

    def _add_node_in_digraph(self, gv_node):
        """
        Adds a GvNode to the graph.

        Parameters
        ----------
        gv_node: GvNode
            Represents a node object with graphviz's parameters.
        """
        self._digraph.node(gv_node.id,
                           shape=gv_node.shape,
                           fillcolor=gv_node.fillcolor,
                           style=gv_node.style,
                           label=gv_node.label)

    def write_graph(self, filename, format, prog='dot'):
        """
        Saves a graph visualization to file.

        Parameters
        ----------
        filename : str
            Name of the file to save the result to.

        format : {'gv', 'svg', 'png', 'pdf'}
            Format of the file.

        prog : {'dot', 'neato', ...}, default='dot'
            Graphviz's engine used to render the visualization.
        """
        self._digraph.__setattr__('engine', prog)
        binary_data = self._digraph.pipe(format=format)
        with open(filename, mode='wb') as f:
            f.write(binary_data)

    def show(self):
        """
        Shows visualization of the graph in Jupyter Notebook.

        Returns
        -------
        digraph : IPython.core.display.HTML
            Graph in HTML format.
        """
        return HTML(self._digraph._repr_svg_())

    @staticmethod
    def _add_metrics_to_node_label(label, metrics):
        """
        Adds information about node's metrics to its label.

        Parameters
        -------------
        label: str
            Label of the node.
        metrics: dict of {str: number}
            Metrics of this node.

        Returns
        -------------
        label: str
            Modified node's label.
        """
        for metric_name, metric_value in metrics.items():
            label += '\\n' + metric_name + ': ' + str(round(metric_value, 3))
        return label

    @staticmethod
    def _calc_nodes_colors_by_metric(graph, node_style_metric):
        """
        Calculate nodes' colours according to given metric.

        Parameters
        -------------
        graph: Graph
            Graph object.
        node_style_metric
            Name of the metric.

        Returns
        -------------
        node_color_dict: dict of (str: str)
            Key: node name, value: its colour according to the metric.
            If something goes wrong, an empty dict is returned.
        """
        if node_style_metric is None:
            return {}
        if not graph.contains_node_metric(node_style_metric):
            print(
                f'WARNING: graph does not contain node metric "{node_style_metric}". '
                f'Nodes will have the same colour.')
            return {}
        nodes = graph.get_nodes()
        metric_values = [
            node.metrics[node_style_metric] for node in nodes
            if node_style_metric in node.metrics
        ]
        if any(np.isnan(metric_values)):
            print(
                f"WARNING: metric \"{node_style_metric}\" contains None values, "
                f"impossible to use it for changing the nodes' style")
            return {}

        node_color_dict = {}
        min_value = min(metric_values)
        max_value = max(metric_values)
        if min_value == max_value:
            return {}
        darkest_color = 100  # 0 is the darkest
        lightest_color = 250  # 255 is the lightest

        for node in nodes:
            if node_style_metric in node.metrics:
                node_metric_value = node.metrics[node_style_metric]
                node_color_int = int(lightest_color -
                                     (lightest_color - darkest_color) *
                                     (node_metric_value - min_value) /
                                     (max_value - min_value))
                node_color_dict[node.id] = _get_hex_color(node_color_int)
            else:
                node_color_dict[node.id] = None
        return node_color_dict

    @staticmethod
    def _calc_edges_widths_by_metric(graph, edge_style_metric):
        """
        Calculate edges' width according to given metric.

        Parameters
        -------------
        graph: Graph
            Graph object.
        edge_style_metric
            Name of the metric.

        Returns
        -------------
        node_color_dict: dict of (str: str)
            Key: edge name, value: its width according to the metric.
            If something goes wrong, an empty dict is returned.
        """
        if edge_style_metric is None:
            return {}
        if not graph.contains_edge_metric(edge_style_metric):
            print(
                f'WARNING: graph does not contain edge metric "{edge_style_metric}". '
                f'Edges will have the same width.')
            return {}
        metric_values = [
            edge.metrics[edge_style_metric] for edge in graph.get_edges()
            if edge_style_metric in edge.metrics
        ]
        if any(np.isnan(metric_values)):
            print(
                f"WARNING: metric \"{edge_style_metric}\" contains None values, "
                f"impossible to use it for changing the edges' style")
            return {}

        edge_width_dict = {}
        min_value = min(metric_values)
        max_value = max(metric_values)
        if min_value == max_value:
            return {}
        min_penwidth = 0.1
        max_penwidth = 5

        for edge in graph.get_edges():
            if edge_style_metric in edge.metrics:
                edge_metric_value = edge.metrics[edge_style_metric]
                score = (edge_metric_value - min_value) / (max_value -
                                                           min_value)
                edge_width_dict[edge.id] = min_penwidth + (
                    max_penwidth - min_penwidth) * score
        return edge_width_dict