Example #1
0
    def from_json(obj: dict, ont: Ontology) -> 'SSD':
        g = Graph(True, True, True)
        node2attr = {x['node']: x['attribute'] for x in obj['mappings']}
        idmap = {}
        raw_attributes = {}
        for raw_attr in obj['attributes']:
            assert len(raw_attr['columnIds']
                       ) == 1 and raw_attr['columnIds'][0] == raw_attr['id']
            raw_attributes[raw_attr['id']] = raw_attr

        attrs = []
        for n in obj['semanticModel']['nodes']:
            if n['type'] == 'DataNode':
                node_type = GraphNodeType.DATA_NODE
                attr = raw_attributes[node2attr[n['id']]]
                n_lbl = attr['name']
                attrs.append(SSDAttribute(n['id'], n_lbl))
            else:
                node_type = GraphNodeType.CLASS_NODE
                n_lbl = n['prefix'] + n['label']
                n_lbl = ont.simplify_uri(n_lbl)

            idmap[n['id']] = g.add_new_node(node_type, n_lbl.encode()).id

        for e in obj['semanticModel']['links']:
            e_lbl = e['prefix'] + e['label']
            e_lbl = ont.simplify_uri(e_lbl)
            g.add_new_link(GraphLinkType.UNSPECIFIED, e_lbl.encode(),
                           idmap[e['source']], idmap[e['target']])

        return SSD(obj['name'], attrs, g, ont)
Example #2
0
    def build(self, g: Graph) -> GraphExplorer:
        # TODO: can make it more efficient by giving estimation to graph explorer
        g_explorer = GraphExplorer()
        for node in g.iter_nodes():
            g_explorer.real_add_new_node(GraphNodeHop(0), node.type,
                                         node.label)
        for link in g.iter_links():
            g_explorer.add_new_link(link.type, link.label, link.source_id,
                                    link.target_id)

        self.explore(g_explorer)
        return g_explorer
Example #3
0
def preserved_structure(
    gold_sm: Graph, pred_sm: Graph, gold_triples: Set[Tuple[int, bytes,
                                                            Union[bytes, int]]]
) -> Tuple[Dict[int, bool], Dict[int, Optional[int]]]:
    alignment = align_graph(gold_sm, pred_sm, DataNodeMode.IGNORE_DATA_NODE)
    bijections = alignment['_bijections']
    best_bijection = None
    best_link2label = None
    best_score = -1

    # build example from this candidate model
    for bijection in bijections:
        link2label = {}
        for node in pred_sm.iter_class_nodes():
            outgoing_links = list(node.iter_outgoing_links())
            for link in outgoing_links:
                dest_node = link.get_target_node()
                if dest_node.is_class_node():
                    dest_label = bijection.prime2x[link.target_id]
                else:
                    dest_label = dest_node.label

                triple = (bijection.prime2x[link.source_id], link.label,
                          dest_label)
                link2label[link.id] = triple in gold_triples
        score = sum(link2label.values())
        if score > best_score:
            best_score = score
            best_bijection = bijection
            best_link2label = link2label

    return best_link2label, best_bijection.prime2x
Example #4
0
    def convert_graph(graph: Graph):
        node_index: Dict[int, Node] = {}

        for v in graph.iter_nodes():
            type = Node.DATA_NODE if v.is_data_node() else Node.CLASS_NODE
            node_index[v.id] = Node(v.id, type, v.label)

        for l in graph.iter_links():
            if data_node_mode == 2:
                if node_index[l.target_id].type == Node.DATA_NODE:
                    # ignore data node
                    continue

            link = Link(l.id, l.label, l.source_id, l.target_id)
            Node.add_outgoing_link(node_index[l.source_id], link)
            Node.add_incoming_link(node_index[l.target_id], link)

        if data_node_mode == DataNodeMode.IGNORE_DATA_NODE:
            for v2 in [
                    v for v in node_index.values() if v.type == Node.DATA_NODE
            ]:
                del node_index[v2.id]

        if data_node_mode == DataNodeMode.IGNORE_LABEL_DATA_NODE:
            # we convert label of node to DATA_NODE
            leaf_source_nodes: Set[Node] = set()
            for v in [
                    v for v in node_index.values() if v.type == Node.DATA_NODE
            ]:
                assert len(v.incoming_links) == 1
                link = v.incoming_links[0]
                source = node_index[link.source_id]
                leaf_source_nodes.add(source)

            for node in leaf_source_nodes:
                link_label_count = {}
                for link in node.outgoing_links:
                    target = node_index[link.target_id]
                    if target.type == Node.DATA_NODE:
                        if link.label not in link_label_count:
                            link_label_count[link.label] = 0

                        link_label_count[link.label] += 1
                        target.label = 'DATA_NODE' + str(
                            link_label_count[link.label])

        return node_index
Example #5
0
    def clear_serene_footprint(self, remove_unknown: bool = True) -> 'SSD':
        g = Graph(True, True, True)
        idmap = {}

        serene_all = None
        serene_unknown = None
        for n in self.graph.iter_nodes():
            if n.label == b"serene:All":
                serene_all = n
                continue

            if n.label == b"serene:Unknown":
                serene_unknown = n
                continue

        ignore_nodes = set()
        if serene_all is not None:
            ignore_nodes.add(serene_all.id)

        if remove_unknown and serene_unknown is not None:
            ignore_nodes.add(serene_unknown.id)
            for e in self.graph.iter_links():
                if e.source_id == serene_unknown.id:
                    assert e.get_target_node().is_data_node()
                    ignore_nodes.add(e.target_id)

        if len(ignore_nodes) == 0:
            # no serene footprint to remove
            return self

        for n in self.graph.iter_nodes():
            if n.id in ignore_nodes:
                continue

            idmap[n.id] = g.add_new_node(n.type, n.label).id
        for e in self.graph.iter_links():
            if e.label == b"serene:connect":
                continue
            if remove_unknown and e.label == b"serene:unknown":
                continue
            g.add_new_link(e.type, e.label, idmap[e.source_id],
                           idmap[e.target_id])

        self.graph = g
        return self
Example #6
0
 def mask_dnode(self, g: Graph) -> Graph:
     """deprecated"""
     g2 = Graph(True, True, True, g.get_n_nodes(), g.get_n_links())
     for n in g.iter_nodes():
         assert g2.add_new_node(
             n.type, n.label if n.type == GraphNodeType.CLASS_NODE else
             b"DataNode").id == n.id
     for e in g.iter_links():
         assert g2.add_new_link(e.type, e.label, e.source_id,
                                e.target_id).id == e.id
     return g2
Example #7
0
def render_factor_graph(model_or_factors: Union[LogLinearModel, List[Factor]],
                        vars: List[TripleLabel], fpath: str):
    if isinstance(model_or_factors, LogLinearModel):
        factors = model_or_factors.get_factors(vars)
    else:
        factors = model_or_factors

    def get_fnode_lbl(fnode: Union[TripleLabel, Factor]) -> bytes:
        if isinstance(fnode, Factor):
            label = fnode.__class__.__name__
        else:
            s = fnode.triple.link.get_source_node()
            t = fnode.triple.link.get_target_node()
            label = "%s:%s--%s:%s" % (s.id, s.label.decode('utf-8'), t.id,
                                      t.label.decode('utf-8'))

        return label.encode('utf-8')

    class Node(GraphNode):
        def __init__(self, fnode: Union[TripleLabel, Factor]) -> None:
            super().__init__()
            self.fnode = fnode

        def get_dot_format(self, max_text_width: int):
            label = self.get_printed_label(max_text_width).encode(
                'unicode_escape').decode()
            if isinstance(self.fnode, Variable):
                return '"%s"[style="filled",color="white",fillcolor="gold",label="%s"];' % (
                    self.id, label)

            return '"%s"[shape="plaintext",style="filled",fillcolor="lightgray",label="%s"];' % (
                self.id, label)

    class Link(GraphLink):
        var2factor = "var2factor"
        var2var = "var2var"

        def __init__(self, link_type: str) -> None:
            super().__init__()
            self.link_type = link_type

        def get_dot_format(self, max_text_width: int):
            label = self.get_printed_label(max_text_width).encode(
                'unicode_escape').decode()
            if self.link_type == Link.var2factor:
                return '"%s" -> "%s"[dir=none,color="brown",fontcolor="black",label="%s"];' % (
                    self.source_id, self.target_id, label)
            return '"%s" -> "%s"[color="brown",style="dashed",fontcolor="black",label="%s"];' % (
                self.source_id, self.target_id, label)

    """Render factor graph for debugging"""
    g = Graph()

    # build graphs
    fnode2id: Dict[Union[Variable, Factor], int] = _(
        vars, factors).enumerate().imap(lambda v: (v[1], v[0])).todict()
    _(vars, factors).forall(lambda fnode: g.real_add_new_node(
        Node(fnode), GraphNodeType.CLASS_NODE, get_fnode_lbl(fnode)))

    for factor in factors:
        for var in factor.unobserved_variables:
            g.real_add_new_link(Link(Link.var2factor),
                                GraphLinkType.UNSPECIFIED, b"", fnode2id[var],
                                fnode2id[factor])
    for var in vars:
        if var.triple.parent is not None:
            g.real_add_new_link(Link(Link.var2var), GraphLinkType.UNSPECIFIED,
                                b"", fnode2id[var.triple.parent.label],
                                fnode2id[var])

    for var in vars:
        var.myid = "%s: %s" % (fnode2id[var], g.get_node_by_id(
            fnode2id[var]).label)
    for factor in factors:
        factor.myid = fnode2id[factor]

    g.render2pdf(fpath)