class TensorflowGraphToDot: def __init__(self, op, suppressed_nodes=[]): from graphviz import Digraph self.dot = Digraph() self.nodes = collections.OrderedDict() import tensorflow.contrib.graph_editor as ge for node in ge.get_backward_walk_ops(op): self.nodes[node.node_def.name] = node.node_def.input self.suppressed_nodes = suppressed_nodes for node_name in self.nodes: self.draw_node(node_name) for input_node in self.nodes[node_name]: self.draw_edge(input_node, node_name) def is_suppressed(self, node_name): return any([s in node_name for s in self.suppressed_nodes]) def draw_edge(self, from_node_name, to_node_name): if self.is_suppressed(from_node_name): if from_node_name in self.nodes: for next_node_name in self.nodes[from_node_name]: self.draw_edge(next_node_name, to_node_name) else: if not self.is_suppressed(to_node_name): self.dot.edge(from_node_name, to_node_name) def draw_node(self, node_name): if not self.is_suppressed(node_name): self.dot.node(node_name, label=node_name) def _repr_svg_(self): return self.dot._repr_svg_()
def _repr_svg_(self): """Display the structure of graph in Jupyter.""" dot = Digraph(engine='fdp') for i in range(self.size): dot.node(str(i)) for i in range(self.size): for e in self.edges[i]: dot.edge(str(i), str(e.target), label=str(e.weight)) return dot._repr_svg_()
def _repr_svg_(self): """Display the structure of graph in Jupyter.""" dot = Digraph(engine='fdp') for i in range(self.size): dot.node(str(i)) for i in range(self.size): for j in range(self.size): if self.edges[i][j] == 0: continue dot.edge(str(i), str(j), str(self.edges[i][j])) return dot._repr_svg_()
class DerivaCatalogToGraph: def __init__(self, catalog, engine='dot'): self.graph = Digraph( engine=engine, format='pdf', edge_attr=None, strict=True, ) self.catalog = catalog self._model = catalog.getCatalogModel() self._chaise_base = "https://{}/chaise/recordset/#{}/".format( urlparse(catalog.get_server_uri()).netloc, self.catalog.catalog_id) self.graph.attr('graph', rankdir='LR') self.graph.attr('graph', overlap='false', splines='true') #self.graph.attr('graph', concentrate=True) def clear(self): self.graph.clear() def view(self): self.graph.view() def catalog_to_graph(self, schemas=None, skip_terms=False, skip_association_tables=False): """ Convert a catalog to a DOT based graph. :param schemas: List of schemas that should be included. Use whole catalog if None. :param skip_terms: Do not include term tables in the graph :param skip_association_tables: Collapse association tables so that only edges between endpoints are used :return: """ schemas = [s.name for s in self._model.schemas.values() if s.name not in ['_acl_admin', 'public', 'WWW']] \ if schemas is None else schemas for schema in schemas: self.schema_to_graph(schema, skip_terms=skip_terms, schemas=schemas, skip_association_tables=skip_association_tables) def schema_to_graph(self, schema_name, schemas=[], skip_terms=False, skip_association_tables=False): """ Create a graph for the specified schema. :param schema_name: Name of the schema in the model to be used. :param schemas: List of additional schemas to include in the graph. :param skip_terms: :param skip_association_tables: :return: """ schema = self._model.schemas[schema_name] # Put nodes for each schema in a seperate subgraph. with self.graph.subgraph(name='cluster_' + schema_name, node_attr={'shape': 'box'}) as schema_graph: schema_graph.attr(style='invis') for table in schema.tables.values(): node_name = '{}_{}'.format(schema_name, table.name) if DerivaCatalogToGraph._is_vocabulary_table(table): if not skip_terms: schema_graph.node(node_name, label='{}:{}'.format(schema_name, table.name), shape='ellipse', URL=self._chaise_uri(table)) else: # Skip over current table if it is a association table and option is set. if not (table.is_association() and skip_association_tables): schema_graph.node(node_name, label='{}:{}'.format(schema_name, table.name), shape='box', URL=self._chaise_uri(table)) else: print('Skipping node', node_name) # We have all the nodes out now, so run over and add edges. for table in schema.tables.values(): self.foreign_key_defs_to_graph(table, skip_terms=skip_terms, schemas=schemas, skip_association_tables=skip_association_tables) return def foreign_key_defs_to_graph(self, table, skip_terms=False, skip_association_tables=False, schemas=[]): """ Add edges for each foreign key relationship in the specified table. :param table: :param skip_terms: :param skip_association_tables: :param skip_schemas: :return: """ # If table is an association table, put in a edge between the two endpoints in the relation. if table.is_association() == 2 and skip_association_tables: t1 = table.foreign_keys[0].referenced_columns[0].table t2 = table.foreign_keys[1].referenced_columns[0].table t1_name = '{}_{}'.format(t1.schema.name, t1.name) t2_name = '{}_{}'.format(t2.schema.name, t2.name) self.graph.edge(t1_name, t2_name, dir='both', color='gray') else: for fkey in table.foreign_keys: referenced_table = list(fkey.column_map.values())[0].table table_name = '{}_{}'.format(referenced_table.schema.name, referenced_table.name) # If the target is a schema we are skipping, do not add an edge. if (referenced_table.schema.name not in schemas or table.schema.name not in schemas): continue # If the target is a term table, and we are not including terms, do not add an edge. if DerivaCatalogToGraph._is_vocabulary_table(referenced_table) and skip_terms: continue # Add an edge from the current node to the target table. self.graph.edge('{}_{}'.format(table.schema.name, table.name), table_name) return def save(self, filename=None, format='pdf', view=False): (dir, file) = os.path.split(os.path.abspath(filename)) if 'gv' in format: self.graph.save(filename=file, directory=dir) else: print('dumping graph in file', file, format) self.graph.render(filename=file, directory=dir, view=view, cleanup=True, format=format) def _repr_svg_(self): return self.graph._repr_svg_() @staticmethod def _is_vocabulary_table(t): if t.schema.name.lower() in 'vocabulary': return True try: return t.columns['ID'] and t.columns['Name'] and t.columns['URI'] and t.columns['Synonyms'] except KeyError: return False def _chaise_uri(self, table): return self._chaise_base + "{}:{}".format(table.schema.name, table.name)
class GraphvizPainter: """ Class represents a graph visualizer that uses a graphviz visualizing tool. Attributes ---------- _digraph: Digraph A graphviz's object that represents a graph. Examples -------- >>> import pandas as pd >>> from sberpm import DataHolder >>> from sberpm.miners import HeuMiner >>> from sberpm.visual import GraphvizPainter >>> >>> # Create data_holder >>> df = pd.DataFrame({ ... 'id_column': [1, 1, 2], ... 'activity_column':['st1', 'st2', 'st1'], ... 'dt_column':[123456, 123457, 123458]}) >>> data_holder = DataHolder(df, 'id_column', 'activity_column', 'dt_column') >>> >>> # Create graph using a miner algorithm >>> miner = HeuMiner(data_holder) >>> miner.apply() >>> >>> # Visualize graph >>> painter = GraphvizPainter() >>> painter.apply(miner.graph) >>> >>> # Or visualize graph with metrics >>> graph = miner.graph >>> graph.add_node_metric('count', {'st1': 2, 'st2': 1}) >>> painter = GraphvizPainter() >>> painter.apply(graph, node_style_metric='count') >>> >>> # Save and show result >>> painter.write_graph('graph.svg', 'svg') >>> painter.show() # Works in Jupyter Notebook """ def __init__(self): self._digraph = None def apply(self, graph, node_style_metric=None, edge_style_metric=None, hide_disconnected_nodes=True): """ Visualizes the given graph. (Creates graphviz's object that can be displayed or saved to file.) Parameters ---------- graph : Graph or ProcessTreeNode Graph object. node_style_metric: str Name of the node's metric that will influence the colour of the nodes. If None or given metric in not contained in the given graph, nodes will have the same colour. Is not used if graph is a ProcessTreeNode object. edge_style_metric: str Name of the edge's metric that will influence the thickness of the edges. If None or given metric in not contained in the given graph, edges will have the same width. Is not used if graph is a ProcessTreeNode object. hide_disconnected_nodes: bool, default=True If True, nodes without any input and output edges will not be displayed. Is not used if graph is a ProcessTreeNode object. """ if isinstance(graph, ProcessTreeNode): self._apply_process_tree(graph) return self._digraph = Digraph() node_color_dict = GraphvizPainter._calc_nodes_colors_by_metric( graph, node_style_metric) edge_width_dict = GraphvizPainter._calc_edges_widths_by_metric( graph, edge_style_metric) for node in graph.get_nodes(): if not (hide_disconnected_nodes and len(node.output_edges) == 0 and len(node.input_edges) == 0): gv_node = _get_gv_node(node, node_color_dict) self._add_node_in_digraph(gv_node) for edge in graph.get_edges(): self._digraph.edge(remove_bad_symbols(edge.source_node.id), remove_bad_symbols(edge.target_node.id), penwidth=str(edge_width_dict[edge.id]) if edge.id in edge_width_dict else None, label=str(edge.metrics[edge_style_metric]) if edge_style_metric in edge.metrics else None) def apply_insights(self, graph, edge_style_metric='insights', hide_disconnected_nodes=True): self._digraph = Digraph() edge_width_dict = GraphvizPainter._calc_edges_widths_by_metric( graph, edge_style_metric) for node in graph.get_nodes(): if hide_disconnected_nodes and len(node.output_edges) == 0 and len(node.input_edges) == 0 \ and node.id not in ['legend_good', 'legend_bad']: continue style = 'filled' if node.type == NodeType.TASK: shape = 'box' color = node.color elif node.type == NodeType.START_EVENT: color = 'green' shape = 'circle' elif node.type == NodeType.END_EVENT: color = 'red' shape = 'circle' if color == 'black': color = 'white' self._digraph.node(node.id, shape=shape, style=style, fillcolor=color, label=node.label) for edge in graph.get_edges(): penwidth = edge_width_dict.get(edge.id) if penwidth: penwidth = str(penwidth) self._digraph.edge(edge.source_node.id, edge.target_node.id, label=edge.label, color=edge.color, penwidth=penwidth) def _apply_process_tree(self, root_node): """ Graphviz visualizer for ProcessTreeNode class. Parameters ---------- root_node: ProcessTreeNode """ digraph = Digraph() # Add nodes label_dict = { ProcessTreeNodeType.EXCLUSIVE_CHOICE: 'X', ProcessTreeNodeType.SEQUENTIAL: '->', ProcessTreeNodeType.PARALLEL: '||', ProcessTreeNodeType.LOOP: '*', ProcessTreeNodeType.FLOWER: '?', } node2gvnode = dict() GraphvizPainter._add_process_tree_nodes(digraph, root_node, label_dict, node2gvnode) # Add edges GraphvizPainter._add_process_tree_edges(digraph, root_node, node2gvnode) self._digraph = digraph @staticmethod def _add_process_tree_nodes(digraph: Digraph, node, label_dict, node2gvnode): if node.type == ProcessTreeNodeType.SINGLE_ACTIVITY: if node.label is not None: node_id = node.label label = node.label shape = 'box' color = 'white' else: node_id = node.type + '_' + str(len(node2gvnode)) label = '' shape = 'box' color = 'black' else: node_id = node.type + '_' + str(len(node2gvnode)) label = label_dict[node.type] shape = 'circle' color = 'white' node2gvnode[node] = node_id digraph.node(node_id, label, shape=shape, fillcolor=color, style='filled') for n in node.children: GraphvizPainter._add_process_tree_nodes(digraph, n, label_dict, node2gvnode) @staticmethod def _add_process_tree_edges(digraph: Digraph, node, node2gvnode): n1 = node2gvnode[node] for node2 in node.children: n2 = node2gvnode[node2] digraph.edge(n1, n2) for node2 in node.children: GraphvizPainter._add_process_tree_edges(digraph, node2, node2gvnode) def _add_node_in_digraph(self, gv_node): """ Adds a GvNode to the graph. Parameters ---------- gv_node: GvNode Represents a node object with graphviz's parameters. """ self._digraph.node(gv_node.id, shape=gv_node.shape, fillcolor=gv_node.fillcolor, style=gv_node.style, label=gv_node.label) def write_graph(self, filename, format, prog='dot'): """ Saves a graph visualization to file. Parameters ---------- filename : str Name of the file to save the result to. format : {'gv', 'svg', 'png', 'pdf'} Format of the file. prog : {'dot', 'neato', ...}, default='dot' Graphviz's engine used to render the visualization. """ self._digraph.__setattr__('engine', prog) binary_data = self._digraph.pipe(format=format) with open(filename, mode='wb') as f: f.write(binary_data) def show(self): """ Shows visualization of the graph in Jupyter Notebook. Returns ------- digraph : IPython.core.display.HTML Graph in HTML format. """ return HTML(self._digraph._repr_svg_()) @staticmethod def _add_metrics_to_node_label(label, metrics): """ Adds information about node's metrics to its label. Parameters ------------- label: str Label of the node. metrics: dict of {str: number} Metrics of this node. Returns ------------- label: str Modified node's label. """ for metric_name, metric_value in metrics.items(): label += '\\n' + metric_name + ': ' + str(round(metric_value, 3)) return label @staticmethod def _calc_nodes_colors_by_metric(graph, node_style_metric): """ Calculate nodes' colours according to given metric. Parameters ------------- graph: Graph Graph object. node_style_metric Name of the metric. Returns ------------- node_color_dict: dict of (str: str) Key: node name, value: its colour according to the metric. If something goes wrong, an empty dict is returned. """ if node_style_metric is None: return {} if not graph.contains_node_metric(node_style_metric): print( f'WARNING: graph does not contain node metric "{node_style_metric}". ' f'Nodes will have the same colour.') return {} nodes = graph.get_nodes() metric_values = [ node.metrics[node_style_metric] for node in nodes if node_style_metric in node.metrics ] if any(np.isnan(metric_values)): print( f"WARNING: metric \"{node_style_metric}\" contains None values, " f"impossible to use it for changing the nodes' style") return {} node_color_dict = {} min_value = min(metric_values) max_value = max(metric_values) if min_value == max_value: return {} darkest_color = 100 # 0 is the darkest lightest_color = 250 # 255 is the lightest for node in nodes: if node_style_metric in node.metrics: node_metric_value = node.metrics[node_style_metric] node_color_int = int(lightest_color - (lightest_color - darkest_color) * (node_metric_value - min_value) / (max_value - min_value)) node_color_dict[node.id] = _get_hex_color(node_color_int) else: node_color_dict[node.id] = None return node_color_dict @staticmethod def _calc_edges_widths_by_metric(graph, edge_style_metric): """ Calculate edges' width according to given metric. Parameters ------------- graph: Graph Graph object. edge_style_metric Name of the metric. Returns ------------- node_color_dict: dict of (str: str) Key: edge name, value: its width according to the metric. If something goes wrong, an empty dict is returned. """ if edge_style_metric is None: return {} if not graph.contains_edge_metric(edge_style_metric): print( f'WARNING: graph does not contain edge metric "{edge_style_metric}". ' f'Edges will have the same width.') return {} metric_values = [ edge.metrics[edge_style_metric] for edge in graph.get_edges() if edge_style_metric in edge.metrics ] if any(np.isnan(metric_values)): print( f"WARNING: metric \"{edge_style_metric}\" contains None values, " f"impossible to use it for changing the edges' style") return {} edge_width_dict = {} min_value = min(metric_values) max_value = max(metric_values) if min_value == max_value: return {} min_penwidth = 0.1 max_penwidth = 5 for edge in graph.get_edges(): if edge_style_metric in edge.metrics: edge_metric_value = edge.metrics[edge_style_metric] score = (edge_metric_value - min_value) / (max_value - min_value) edge_width_dict[edge.id] = min_penwidth + ( max_penwidth - min_penwidth) * score return edge_width_dict