Esempio n. 1
0
def _get_counterpart_candidates(graph: Graph,
                                mapping: GraphMapping,
                                edge: Edge,
                                already_mapped: Set[Node],
                                known_src: bool = True):
    if known_src:
        src = mapping.m_node[edge.src]
        dst = None
        entity = mapping.m_ent.get(edge.dst.entity, None)
    else:
        src = None
        dst = mapping.m_node[edge.dst]
        entity = mapping.m_ent.get(edge.src.entity, None)

    candidates: List[Node] = []
    for e in graph.iter_edges(src=src, dst=dst, label=edge.label):
        cand = e.dst if known_src else e.src
        if cand.label != edge.dst.label:
            continue

        if cand in already_mapped:
            continue

        if entity is not None and entity is not cand.entity:
            continue

        candidates.append(cand)

    return candidates
Esempio n. 2
0
def _create_symbolic_copy(graph: Graph) -> Tuple[Graph, GraphMapping]:
    mapping = GraphMapping()
    for entity in graph.iter_entities():
        mapping.m_ent[entity] = Entity(value=SYMBOLIC_VALUE)

    for node in graph.iter_nodes():
        mapping.m_node[node] = Node(label=node.label,
                                    entity=mapping.m_ent[node.entity],
                                    value=SYMBOLIC_VALUE)

    new_graph = Graph.from_nodes_and_edges(nodes=set(mapping.m_node.values()),
                                           edges={
                                               Edge(src=mapping.m_node[e.src],
                                                    dst=mapping.m_node[e.dst],
                                                    label=e.label)
                                               for e in graph.iter_edges()
                                           })

    return new_graph, mapping
Esempio n. 3
0
def _init_state_and_worklist(
        query: Graph, graph: Graph, candidate_mappings: GraphMultiMapping,
        _worklist_order: List[Node]) -> Tuple[List[Node], SearchState]:
    """
    Helper function to initialize the search state and the worklist. The worklist starts of with an order that
    tries to maximize the information gathered from the initial assignments.

    Args:
        query:
        graph:
        candidate_mappings:
        _worklist_order: For debugging purposes

    Returns:

    """
    current_mapping = GraphMapping()
    worklist = []
    for k, v in candidate_mappings.m_node.items():
        if len(v) == 1:
            n_v = next(iter(v))
            current_mapping.m_node[k] = n_v
            current_mapping.m_ent[k.entity] = n_v.entity
        else:
            worklist.append(k)

    #  Set the initial order of nodes in the worklist based on the in/out degrees of the nodes
    #  Assigning nodes with high degrees first enables quick pruning of the space for the other nodes.
    #  NOTE : Should be cached ideally, but keeping it simple here.
    degree_counts = collections.Counter()
    for e in query.iter_edges():
        degree_counts[e.src] += 1
        degree_counts[e.dst] += 1

    if _worklist_order is None:
        worklist = sorted(worklist, key=lambda x: -degree_counts[x])
    else:
        worklist = sorted(worklist, key=lambda x: _worklist_order.index(x))

    state = SearchState(worklist, current_mapping, candidate_mappings)

    return worklist, state
Esempio n. 4
0
def _propagate_unit_nodes(
    candidates: GraphMultiMapping,
    query: Graph,
    graph: Graph,
    processed: Optional[Set[Node]],
) -> bool:
    """
    The unit-propagation procedure. If a node is forced to be assigned to a single node, use the edge-profile of that
    node to establish mappings for its neighbours. This may result in more unit-nodes, for which we repeat the process.

    Args:
        candidates: The candidate mappings to use.
        query: The query graph
        graph: The graph the query is to be processed against.
        processed: The nodes which have already been processed and hence should be ignored.

    Returns:
        bool: `True` if successful, `False` if inconsistencies discovered.
    """

    if processed is None:
        processed = set()

    worklist = collections.deque(
        k for k, v in candidates.m_node.items()
        if v is not None and len(v) == 1 and k not in processed)
    while len(worklist) > 0:
        n_query = worklist.popleft()
        if n_query in processed:
            continue

        processed.add(n_query)
        n_graph = next(iter(candidates.m_node[n_query]))

        #  Use edge-profiles to narrow down possibilities for other nodes
        for edge in query.iter_edges(src=n_query):
            dst = edge.dst
            label = edge.label
            dst_candidates = {
                e.dst
                for e in graph.iter_edges(src=n_graph, label=label)
                if e.dst.label == dst.label and (
                    dst.value is SYMBOLIC_VALUE or dst.value == e.dst.value)
            }

            #  Compare with the existing set of mappings.
            if candidates.m_node[dst] is None:
                candidates.m_node[dst] = dst_candidates
            else:
                candidates.m_node[dst].intersection_update(dst_candidates)

            new_len = len(candidates.m_node[dst])
            if new_len == 0:
                return False
            elif new_len == 1:
                worklist.append(dst)

        for edge in query.iter_edges(dst=n_query):
            src = edge.src
            label = edge.label
            src_candidates = {
                e.src
                for e in graph.iter_edges(dst=n_graph, label=label)
                if e.src.label == src.label and (
                    src.value is SYMBOLIC_VALUE or src.value == e.src.value)
            }

            #  Compare with the existing set of mappings.
            if candidates.m_node[src] is None:
                candidates.m_node[src] = src_candidates
            else:
                candidates.m_node[src].intersection_update(src_candidates)

            new_len = len(candidates.m_node[src])
            if new_len == 0:
                return False
            elif new_len == 1:
                worklist.append(src)

    return True
Esempio n. 5
0
def _get_candidate_mappings(
    query: Graph,
    graph: Graph,
    partial_mapping: Optional[GraphMapping] = None,
    entity_groups_query: Optional[Dict[Entity, int]] = None,
    entity_groups_graph: Optional[Dict[Entity, int]] = None
) -> Optional[GraphMultiMapping]:
    """
    Given a `query` to check against a graph, this procedure returns the candidate mappings from the
    entities and nodes of `query` to the entities and nodes of `graph` respectively. This essentially
    establishes the search space for the isomorphisms. If there is no valid mapping, `None` is returned.

    Args:
        query: Query graph.
        graph: Graph to get the isomorphism mappings from query.
        partial_mapping: An existing mapping from entities and nodes of `query` to entities and nodes of `graph`.
        entity_groups_query: Entity group info for `query`. Only entities belonging to the same group can be matched.
        entity_groups_graph: Entity group info for `graph`. Only entities belonging to the same group can be matched.

    Returns:
        Optional[GraphMultiMapping]: The candidate mappings. `None` if no valid mapping exists.
    """

    candidates = GraphMultiMapping()
    candidates.m_ent.update({ent: None for ent in query.iter_entities()})
    candidates.m_node.update({node: None for node in query.iter_nodes()})

    if partial_mapping is not None:
        if not _init_candidate_mappings(candidates, partial_mapping):
            return None

    #  Stage 1 : Initial Unit Propagation
    #  Decide as much of the mapping as possible, starting with the partial mapping. If a node in `query` is forced
    #  to be assigned to a particular node in `graph`, called a `unit` node, use the edge-profile of that unit node
    #  to establish mappings of its neighbors. This may produce more `unit` nodes, for which we repeat the process.
    processed = set()
    if not _propagate_unit_nodes(candidates, query, graph,
                                 processed=processed):
        return None

    #  Stage 2 : Use neighbour profiles to find candidates for non-mapped nodes
    for n_query in query.iter_nodes():
        if candidates.m_node[n_query] is not None:
            continue

        #  Was not assigned yet. Get all the nodes matching the label, value and entity, if any.
        label = n_query.label
        value = None if n_query.value is SYMBOLIC_VALUE else n_query.value
        entities = candidates.m_ent.get(n_query.entity, None) or [None]
        cands = set()
        for entity in entities:
            cands.update(
                graph.iter_nodes(label=label, entity=entity, value=value))

        candidates.m_node[n_query] = cands

        #  Verify that the neighbour profiles of the candidates for n_query are consistent with the neighbour profile
        #  of n_query. A neighbour profile is simply a dictionary with counts for each edge type with the src/dst as
        #  n_query. The consistency criterion enforces that the number of edges of a certain type emanating from a
        #  candidate should be at least as large as the the number of edges of that type emanating from n_query.
        query_profile_src = collections.Counter(
            e.label for e in query.iter_edges(src=n_query))
        query_profile_dst = collections.Counter(
            e.label for e in query.iter_edges(dst=n_query))
        filtered_candidates = []
        for n_graph in candidates.m_node[n_query]:
            profile_src = collections.Counter(
                e.label for e in graph.iter_edges(src=n_graph))
            if any(profile_src[k] < v for k, v in query_profile_src.items()):
                continue

            profile_dst = collections.Counter(
                e.label for e in graph.iter_edges(dst=n_graph))
            if any(profile_dst[k] < v for k, v in query_profile_dst.items()):
                continue

            filtered_candidates.append(n_graph)

        if len(filtered_candidates) == 0:
            return None

        candidates.m_node[n_query].intersection_update(filtered_candidates)

    #  Stage 3 : Perform a final unit propagation.
    if not _propagate_unit_nodes(candidates, query, graph,
                                 processed=processed):
        return None

    #  Stage 4 : Final pruning using entity groups, if any.
    if entity_groups_query is not None:
        assert entity_groups_graph is not None, "Entity groups have to be supplied for both query and graph."
        candidates.m_node = {
            k: {
                n
                for n in v if entity_groups_query.get(k.entity, 0) ==
                entity_groups_graph.get(n.entity, 0)
            }
            for k, v in candidates.m_node.items()
        }
        if any(len(v) == 0 for v in candidates.m_node.values()):
            return None

    #  Stage 5 : Use Hopcroft-Karp maximum matching for bipartite-graphs to verify if a one-to-one mapping is possible
    #  TODO : Do if needed, doesn't affect correctness

    return candidates
Esempio n. 6
0
def _get_subgraph_mappings_recursive(
        worklist: List[Node],
        query: Graph,
        graph: Graph,
        state: SearchState,
        _depth: int = 0) -> Iterator[GraphMapping]:
    """
    The recursive driver of the subgraph isomorphism finder.

    Args:
        worklist:
        query:
        graph:
        state:
        _depth: The current recursive depth, starts of with zero.

    Returns:

    """
    if _depth == len(worklist):
        #  Return a copy to safeguard from in-place editing
        yield state.current_mapping.copy()
        for i in range(len(worklist)):
            state.success_record[i] = True

        return

    current_mapping = state.current_mapping
    cur_node: Node = worklist[_depth]
    mapped_entity: Optional[Entity] = current_mapping.m_ent.get(
        cur_node.entity, None)
    entity_assigned_here: bool = mapped_entity is None
    failure_depth: int = -1
    state.success_record[_depth] = False

    for graph_node in state.get_candidates(cur_node):
        ok = True

        #  Check consistency with the current entity mapping
        if (not entity_assigned_here
            ) and mapped_entity is not graph_node.entity:
            #  The decision point where the entity was actually assigned is a candidate for conflict analysis
            failure_depth = max(
                failure_depth,
                state.get_entity_assignment_depth(mapped_entity))
            continue

        elif entity_assigned_here and state.entity_already_mapped(
                graph_node.entity):
            #  The decision point where the entity was actually assigned is a candidate for conflict analysis
            failure_depth = max(
                failure_depth,
                state.get_entity_assignment_depth(graph_node.entity))
            continue

        #  Check consistency of the edge profile
        #  In principle, we could do something similar to unit propagation, which would update the mappings
        #  for all the other nodes, but that entails creation of temporary objects to a large extent,
        #  so we stick with on-demand checks. However, this would be desirable in C++. Resources on BCP in
        #  modern SAT solvers should be useful.
        for edge in query.iter_edges(src=cur_node):
            if edge.dst in current_mapping.m_node:
                #  Check if the edge is present in graph as well
                dst_mapped = current_mapping.m_node[edge.dst]
                if not graph.has_edge(
                        src=graph_node, dst=dst_mapped, label=edge.label):
                    #  The decision point where the node was assigned to is a candidate for conflict analysis
                    failure_depth = max(
                        failure_depth,
                        state.get_node_assignment_depth(dst_mapped))
                    ok = False
                    break

            else:
                #  Check if the edge is present for one of the candidates of dst
                if all(not graph.has_edge(
                        src=graph_node, dst=cand, label=edge.label)
                       for cand in state.get_candidates(edge.dst)):
                    #  Hard to say which decision point would have done this, so nothing on that front
                    #  TODO : Think about this
                    #  Being conservative for now
                    failure_depth = max(failure_depth, _depth - 1)
                    ok = False
                    break

        #  Move on to the next candidate if the check failed.
        if not ok:
            continue

        #  Do a similar check for the edges with dst as node
        for edge in query.iter_edges(dst=cur_node):
            if edge.src in current_mapping.m_node:
                #  Check if the edge is present in graph as well
                src_mapped = current_mapping.m_node[edge.src]
                if not graph.has_edge(
                        src=src_mapped, dst=graph_node, label=edge.label):
                    #  The decision point where the node was assigned to is a candidate for conflict analysis
                    failure_depth = max(
                        failure_depth,
                        state.get_node_assignment_depth(src_mapped))
                    ok = False
                    break

            else:
                #  Check if the edge is present for one of the candidates of src
                if all(not graph.has_edge(
                        src=cand, dst=graph_node, label=edge.label)
                       for cand in state.get_candidates(edge.src)):
                    #  Hard to say which decision point would have done this, so nothing on that front
                    #  TODO : Think about this
                    #  Being conservative for now
                    failure_depth = max(failure_depth, _depth - 1)
                    ok = False
                    break

        #  Move on to the next candidate if the check failed.
        if not ok:
            continue

        #  Update the mapping and move on to the next item on the worklist
        state.perform_assignment(_depth,
                                 cur_node,
                                 graph_node,
                                 entity_assigned=entity_assigned_here)
        yield from _get_subgraph_mappings_recursive(worklist,
                                                    query,
                                                    graph,
                                                    state,
                                                    _depth=_depth + 1)

        #  Rollback the assignment
        state.undo_assignment(_depth,
                              cur_node,
                              graph_node,
                              entity_assigned=entity_assigned_here)

        if state.return_depth != -2:
            if _depth > state.return_depth:
                #  Pop the call stack further as the root cause of the conflict downstream is further up the call stack
                return

            else:
                #  We are at the right depth, reset.
                state.return_depth = -2

    if not state.success_record[_depth]:
        #  No combination of decisions from this point onwards yielded a solution.
        #  Perform conflict analysis to find the latest decision point which could influence the current point.
        #  Then pop the stack till that point. Also modify the worklist to push this decision point earlier so this
        #  conflict is solved first before making any decisions for other nodes.

        #  Was a viable candidate consumed at a previous decision point?
        for n in state.get_original_candidates(cur_node):
            if state.node_already_mapped(n):
                failure_depth = max(failure_depth,
                                    state.get_node_assignment_depth(n))

        state.return_depth = failure_depth
        if failure_depth == _depth - 1:
            state.return_depth = -2
        else:
            if failure_depth >= 0:
                #  Swap the worklist items
                worklist[failure_depth +
                         1], worklist[_depth] = worklist[_depth], worklist[
                             failure_depth + 1]
Esempio n. 7
0
def get_greatest_common_universal_supergraph(
    query: Graph,
    graphs: List[Graph],
    all_mappings: Optional[Dict[Graph, List[GraphMapping]]] = None,
) -> Tuple[Graph, GraphMapping]:
    """
    Returns the universal supergraph corresponding to greatest lower bound of all the maximal universal supergraphs of
    `query` w.r.t `graphs` in the partial order of universal supergraphs of `query` w.r.t `graphs`.

    Args:
        query: The query graph to find the universal subgraph for.
        graphs: The graphs w.r.t which the universal subgraph is to be computed.
        all_mappings: Mapping of graphs to subgraph isomorphism mappings of `query` for each graph in `graphs`.
            If None, they are computed by iterating over the result of `get_subgraph_mappings` till exhaustion.

    Returns:
        Tuple[Graph, GraphMapping]: The universal supergraph along with mappings to query.
            The mapping only contains nodes already present in `query`.
    """

    if all_mappings is None:
        all_mappings = {
            g: list(query.get_subgraph_mappings(g))
            for g in graphs
        }
        #  Filter out the graphs in which query is not present at all
        graphs = [g for g in graphs if len(all_mappings[g]) != 0]
        all_mappings = {g: all_mappings[g] for g in graphs}
        assert len(all_mappings
                   ) > 0, "Did not find any graph which contains the query."

    #  We will use the first mapping for the first graph to incrementally construct the desired supergraph.
    #  The rationale is that since the universal supergraph needs to be consistent it all the mappings of
    #  all the graphs, we can use one mapping to grow the graph while using the others to check correctness of
    #  every incremental update.
    exemplar = all_mappings[graphs[0]][0].copy()

    #  Instead of tracking mappings w.r.t the query, track them w.r.t the exemplar mapping.
    work_mappings: Dict[Graph, List[GraphMapping]] = {
        g: [m.apply_mapping(exemplar, only_keys=True) for m in g_mappings]
        for g, g_mappings in all_mappings.items()
    }

    known_nodes: Set[Node] = set(exemplar.m_node.values())
    orig_known_nodes: Set[Node] = known_nodes.copy()
    known_edges: Set[Edge] = {
        Edge(src=exemplar.m_node[e.src],
             dst=exemplar.m_node[e.dst],
             label=e.label)
        for e in query.iter_edges()
    }

    assert all(e.src in known_nodes and e.dst in known_nodes
               for e in known_edges)

    #  Maintain a worklist of edges with at least one end-point already known
    worklist: Deque[Edge] = collections.deque(
        e for e in graphs[0].iter_edges() if e not in known_edges and (
            e.src in known_nodes or e.dst in known_nodes))

    #  Also keep track of all the nodes mapped in every mapping
    already_mapped_dict: Dict[GraphMapping, Set[Node]] = {
        m: set(m.m_node.values())
        for mappings in work_mappings.values() for m in mappings
    }

    while len(worklist) > 0:
        edge = worklist.popleft()
        if edge in known_edges:
            continue

        if edge.src in known_nodes and edge.dst in known_nodes:
            #  Both end-points already present, simply check for the presence of this edge
            #  in all the graphs and w.r.t all the mappings for every graph.
            for graph, mappings in work_mappings.items():
                if any(not graph.has_edge(src=m.m_node[edge.src],
                                          dst=m.m_node[edge.dst],
                                          label=edge.label) for m in mappings):
                    break

            else:
                #  Did not break, so we can safely add this edge.
                known_edges.add(edge)

        elif edge.src in known_nodes:
            #  edge.dst is not known yet. Check if a counter-part of edge.dst exists for every mapping for every graph,
            #  such that there is an incoming edge of label edge.label with the counter-part of edge.src as the src.
            success = True
            counterparts_dict = {}
            for graph, mappings in work_mappings.items():

                for mapping in mappings:
                    already_mapped = already_mapped_dict[mapping]
                    #  Get the possible counter-parts.
                    candidates = _get_counterpart_candidates(graph,
                                                             mapping,
                                                             edge,
                                                             already_mapped,
                                                             known_src=True)
                    if len(candidates) == 0:
                        #  Can't extend this mapping, so this edge is not useful overall.
                        #  Exit out of all the loops.
                        success = False
                        break

                    else:
                        counterparts_dict[mapping] = candidates

                if not success:
                    break

            if not success:
                #  Can't do anything with this edge. Move on to the next item on the worklist.
                continue

            #  Can safely add this edge to the current supergraph. Adjust the meta-data being tracked.
            #  Specifically, extend all the mappings with the node corresponding to edge.dst
            work_mappings = _get_new_work_mappings(work_mappings,
                                                   counterparts_dict,
                                                   already_mapped_dict,
                                                   edge.dst)

            known_nodes.add(edge.dst)
            known_edges.add(edge)
            worklist.extend(graphs[0].iter_edges(src=edge.dst))
            worklist.extend(graphs[0].iter_edges(dst=edge.dst))

        elif edge.dst in known_nodes:
            #  Like above, but edge.src is unknown in this case.
            success = True
            counterparts_dict = {}
            for graph, mappings in work_mappings.items():

                for mapping in mappings:
                    already_mapped = already_mapped_dict[mapping]
                    #  Get the possible counter-parts.
                    candidates = _get_counterpart_candidates(graph,
                                                             mapping,
                                                             edge,
                                                             already_mapped,
                                                             known_src=False)
                    if len(candidates) == 0:
                        #  Can't extend this mapping, so this edge is not useful overall.
                        #  Exit out of all the loops.
                        success = False
                        break

                    else:
                        counterparts_dict[mapping] = candidates

                if not success:
                    break

            if not success:
                #  Can't do anything with this edge. Move on to the next item on the worklist.
                continue

            #  Can safely add this edge to the current supergraph. Adjust the meta-data being tracked.
            #  Specifically, extend all the mappings with the node corresponding to edge.src
            work_mappings = _get_new_work_mappings(work_mappings,
                                                   counterparts_dict,
                                                   already_mapped_dict,
                                                   edge.src)

            known_nodes.add(edge.src)
            known_edges.add(edge)
            worklist.extend(graphs[0].iter_edges(src=edge.src))
            worklist.extend(graphs[0].iter_edges(dst=edge.src))

    #  Similarly, try to extend the supergraph with graph-level tags and tagged edges as well.
    common_tags = set.intersection(*(set(g.iter_tags()) for g in graphs))
    common_tagged_edges: Set[TaggedEdge] = set()
    worklist_tagged = [
        e for e in graphs[0].iter_tagged_edges()
        if e.src in known_nodes and e.dst in known_nodes
    ]

    for tagged_edge in worklist_tagged:
        src = tagged_edge.src
        dst = tagged_edge.dst
        tag = tagged_edge.tag

        #  Check if this tagged edge is present in every graph for every mapping.
        for graph, mappings in work_mappings.items():
            if any(not graph.has_tagged_edge(
                    src=m.m_node[src], dst=m.m_node[dst], tag=tag)
                   for m in mappings):

                break

        else:
            common_tagged_edges.add(tagged_edge)

    #  At this point, we have all the nodes, edges, tags and tagged edges belonging to the supergraph.
    #  We now assemble the greatest common universal graph and the graph mapping w.r.t the query.
    universal_supergraph, mapping_wrt_exemplar = _create_symbolic_copy(
        Graph.from_nodes_and_edges(nodes=known_nodes, edges=known_edges))

    mapping_wrt_query = mapping_wrt_exemplar.slice(
        nodes=orig_known_nodes).apply_mapping(exemplar.reverse(),
                                              only_keys=True)
    #  Add in the tags and tagged edges.
    universal_supergraph.add_tags(common_tags)
    universal_supergraph.add_tagged_edges(
        TaggedEdge(src=mapping_wrt_exemplar.m_node[e.src],
                   dst=mapping_wrt_exemplar.m_node[e.dst],
                   tag=e.tag) for e in common_tagged_edges)

    return universal_supergraph, mapping_wrt_query