def from_json(obj: dict, ont: Ontology) -> 'SSD': g = Graph(True, True, True) node2attr = {x['node']: x['attribute'] for x in obj['mappings']} idmap = {} raw_attributes = {} for raw_attr in obj['attributes']: assert len(raw_attr['columnIds'] ) == 1 and raw_attr['columnIds'][0] == raw_attr['id'] raw_attributes[raw_attr['id']] = raw_attr attrs = [] for n in obj['semanticModel']['nodes']: if n['type'] == 'DataNode': node_type = GraphNodeType.DATA_NODE attr = raw_attributes[node2attr[n['id']]] n_lbl = attr['name'] attrs.append(SSDAttribute(n['id'], n_lbl)) else: node_type = GraphNodeType.CLASS_NODE n_lbl = n['prefix'] + n['label'] n_lbl = ont.simplify_uri(n_lbl) idmap[n['id']] = g.add_new_node(node_type, n_lbl.encode()).id for e in obj['semanticModel']['links']: e_lbl = e['prefix'] + e['label'] e_lbl = ont.simplify_uri(e_lbl) g.add_new_link(GraphLinkType.UNSPECIFIED, e_lbl.encode(), idmap[e['source']], idmap[e['target']]) return SSD(obj['name'], attrs, g, ont)
def build(self, g: Graph) -> GraphExplorer: # TODO: can make it more efficient by giving estimation to graph explorer g_explorer = GraphExplorer() for node in g.iter_nodes(): g_explorer.real_add_new_node(GraphNodeHop(0), node.type, node.label) for link in g.iter_links(): g_explorer.add_new_link(link.type, link.label, link.source_id, link.target_id) self.explore(g_explorer) return g_explorer
def preserved_structure( gold_sm: Graph, pred_sm: Graph, gold_triples: Set[Tuple[int, bytes, Union[bytes, int]]] ) -> Tuple[Dict[int, bool], Dict[int, Optional[int]]]: alignment = align_graph(gold_sm, pred_sm, DataNodeMode.IGNORE_DATA_NODE) bijections = alignment['_bijections'] best_bijection = None best_link2label = None best_score = -1 # build example from this candidate model for bijection in bijections: link2label = {} for node in pred_sm.iter_class_nodes(): outgoing_links = list(node.iter_outgoing_links()) for link in outgoing_links: dest_node = link.get_target_node() if dest_node.is_class_node(): dest_label = bijection.prime2x[link.target_id] else: dest_label = dest_node.label triple = (bijection.prime2x[link.source_id], link.label, dest_label) link2label[link.id] = triple in gold_triples score = sum(link2label.values()) if score > best_score: best_score = score best_bijection = bijection best_link2label = link2label return best_link2label, best_bijection.prime2x
def convert_graph(graph: Graph): node_index: Dict[int, Node] = {} for v in graph.iter_nodes(): type = Node.DATA_NODE if v.is_data_node() else Node.CLASS_NODE node_index[v.id] = Node(v.id, type, v.label) for l in graph.iter_links(): if data_node_mode == 2: if node_index[l.target_id].type == Node.DATA_NODE: # ignore data node continue link = Link(l.id, l.label, l.source_id, l.target_id) Node.add_outgoing_link(node_index[l.source_id], link) Node.add_incoming_link(node_index[l.target_id], link) if data_node_mode == DataNodeMode.IGNORE_DATA_NODE: for v2 in [ v for v in node_index.values() if v.type == Node.DATA_NODE ]: del node_index[v2.id] if data_node_mode == DataNodeMode.IGNORE_LABEL_DATA_NODE: # we convert label of node to DATA_NODE leaf_source_nodes: Set[Node] = set() for v in [ v for v in node_index.values() if v.type == Node.DATA_NODE ]: assert len(v.incoming_links) == 1 link = v.incoming_links[0] source = node_index[link.source_id] leaf_source_nodes.add(source) for node in leaf_source_nodes: link_label_count = {} for link in node.outgoing_links: target = node_index[link.target_id] if target.type == Node.DATA_NODE: if link.label not in link_label_count: link_label_count[link.label] = 0 link_label_count[link.label] += 1 target.label = 'DATA_NODE' + str( link_label_count[link.label]) return node_index
def clear_serene_footprint(self, remove_unknown: bool = True) -> 'SSD': g = Graph(True, True, True) idmap = {} serene_all = None serene_unknown = None for n in self.graph.iter_nodes(): if n.label == b"serene:All": serene_all = n continue if n.label == b"serene:Unknown": serene_unknown = n continue ignore_nodes = set() if serene_all is not None: ignore_nodes.add(serene_all.id) if remove_unknown and serene_unknown is not None: ignore_nodes.add(serene_unknown.id) for e in self.graph.iter_links(): if e.source_id == serene_unknown.id: assert e.get_target_node().is_data_node() ignore_nodes.add(e.target_id) if len(ignore_nodes) == 0: # no serene footprint to remove return self for n in self.graph.iter_nodes(): if n.id in ignore_nodes: continue idmap[n.id] = g.add_new_node(n.type, n.label).id for e in self.graph.iter_links(): if e.label == b"serene:connect": continue if remove_unknown and e.label == b"serene:unknown": continue g.add_new_link(e.type, e.label, idmap[e.source_id], idmap[e.target_id]) self.graph = g return self
def mask_dnode(self, g: Graph) -> Graph: """deprecated""" g2 = Graph(True, True, True, g.get_n_nodes(), g.get_n_links()) for n in g.iter_nodes(): assert g2.add_new_node( n.type, n.label if n.type == GraphNodeType.CLASS_NODE else b"DataNode").id == n.id for e in g.iter_links(): assert g2.add_new_link(e.type, e.label, e.source_id, e.target_id).id == e.id return g2
def render_factor_graph(model_or_factors: Union[LogLinearModel, List[Factor]], vars: List[TripleLabel], fpath: str): if isinstance(model_or_factors, LogLinearModel): factors = model_or_factors.get_factors(vars) else: factors = model_or_factors def get_fnode_lbl(fnode: Union[TripleLabel, Factor]) -> bytes: if isinstance(fnode, Factor): label = fnode.__class__.__name__ else: s = fnode.triple.link.get_source_node() t = fnode.triple.link.get_target_node() label = "%s:%s--%s:%s" % (s.id, s.label.decode('utf-8'), t.id, t.label.decode('utf-8')) return label.encode('utf-8') class Node(GraphNode): def __init__(self, fnode: Union[TripleLabel, Factor]) -> None: super().__init__() self.fnode = fnode def get_dot_format(self, max_text_width: int): label = self.get_printed_label(max_text_width).encode( 'unicode_escape').decode() if isinstance(self.fnode, Variable): return '"%s"[style="filled",color="white",fillcolor="gold",label="%s"];' % ( self.id, label) return '"%s"[shape="plaintext",style="filled",fillcolor="lightgray",label="%s"];' % ( self.id, label) class Link(GraphLink): var2factor = "var2factor" var2var = "var2var" def __init__(self, link_type: str) -> None: super().__init__() self.link_type = link_type def get_dot_format(self, max_text_width: int): label = self.get_printed_label(max_text_width).encode( 'unicode_escape').decode() if self.link_type == Link.var2factor: return '"%s" -> "%s"[dir=none,color="brown",fontcolor="black",label="%s"];' % ( self.source_id, self.target_id, label) return '"%s" -> "%s"[color="brown",style="dashed",fontcolor="black",label="%s"];' % ( self.source_id, self.target_id, label) """Render factor graph for debugging""" g = Graph() # build graphs fnode2id: Dict[Union[Variable, Factor], int] = _( vars, factors).enumerate().imap(lambda v: (v[1], v[0])).todict() _(vars, factors).forall(lambda fnode: g.real_add_new_node( Node(fnode), GraphNodeType.CLASS_NODE, get_fnode_lbl(fnode))) for factor in factors: for var in factor.unobserved_variables: g.real_add_new_link(Link(Link.var2factor), GraphLinkType.UNSPECIFIED, b"", fnode2id[var], fnode2id[factor]) for var in vars: if var.triple.parent is not None: g.real_add_new_link(Link(Link.var2var), GraphLinkType.UNSPECIFIED, b"", fnode2id[var.triple.parent.label], fnode2id[var]) for var in vars: var.myid = "%s: %s" % (fnode2id[var], g.get_node_by_id( fnode2id[var]).label) for factor in factors: factor.myid = fnode2id[factor] g.render2pdf(fpath)