def _get_counterpart_candidates(graph: Graph, mapping: GraphMapping, edge: Edge, already_mapped: Set[Node], known_src: bool = True): if known_src: src = mapping.m_node[edge.src] dst = None entity = mapping.m_ent.get(edge.dst.entity, None) else: src = None dst = mapping.m_node[edge.dst] entity = mapping.m_ent.get(edge.src.entity, None) candidates: List[Node] = [] for e in graph.iter_edges(src=src, dst=dst, label=edge.label): cand = e.dst if known_src else e.src if cand.label != edge.dst.label: continue if cand in already_mapped: continue if entity is not None and entity is not cand.entity: continue candidates.append(cand) return candidates
def _create_symbolic_copy(graph: Graph) -> Tuple[Graph, GraphMapping]: mapping = GraphMapping() for entity in graph.iter_entities(): mapping.m_ent[entity] = Entity(value=SYMBOLIC_VALUE) for node in graph.iter_nodes(): mapping.m_node[node] = Node(label=node.label, entity=mapping.m_ent[node.entity], value=SYMBOLIC_VALUE) new_graph = Graph.from_nodes_and_edges(nodes=set(mapping.m_node.values()), edges={ Edge(src=mapping.m_node[e.src], dst=mapping.m_node[e.dst], label=e.label) for e in graph.iter_edges() }) return new_graph, mapping
def _init_state_and_worklist( query: Graph, graph: Graph, candidate_mappings: GraphMultiMapping, _worklist_order: List[Node]) -> Tuple[List[Node], SearchState]: """ Helper function to initialize the search state and the worklist. The worklist starts of with an order that tries to maximize the information gathered from the initial assignments. Args: query: graph: candidate_mappings: _worklist_order: For debugging purposes Returns: """ current_mapping = GraphMapping() worklist = [] for k, v in candidate_mappings.m_node.items(): if len(v) == 1: n_v = next(iter(v)) current_mapping.m_node[k] = n_v current_mapping.m_ent[k.entity] = n_v.entity else: worklist.append(k) # Set the initial order of nodes in the worklist based on the in/out degrees of the nodes # Assigning nodes with high degrees first enables quick pruning of the space for the other nodes. # NOTE : Should be cached ideally, but keeping it simple here. degree_counts = collections.Counter() for e in query.iter_edges(): degree_counts[e.src] += 1 degree_counts[e.dst] += 1 if _worklist_order is None: worklist = sorted(worklist, key=lambda x: -degree_counts[x]) else: worklist = sorted(worklist, key=lambda x: _worklist_order.index(x)) state = SearchState(worklist, current_mapping, candidate_mappings) return worklist, state
def _propagate_unit_nodes( candidates: GraphMultiMapping, query: Graph, graph: Graph, processed: Optional[Set[Node]], ) -> bool: """ The unit-propagation procedure. If a node is forced to be assigned to a single node, use the edge-profile of that node to establish mappings for its neighbours. This may result in more unit-nodes, for which we repeat the process. Args: candidates: The candidate mappings to use. query: The query graph graph: The graph the query is to be processed against. processed: The nodes which have already been processed and hence should be ignored. Returns: bool: `True` if successful, `False` if inconsistencies discovered. """ if processed is None: processed = set() worklist = collections.deque( k for k, v in candidates.m_node.items() if v is not None and len(v) == 1 and k not in processed) while len(worklist) > 0: n_query = worklist.popleft() if n_query in processed: continue processed.add(n_query) n_graph = next(iter(candidates.m_node[n_query])) # Use edge-profiles to narrow down possibilities for other nodes for edge in query.iter_edges(src=n_query): dst = edge.dst label = edge.label dst_candidates = { e.dst for e in graph.iter_edges(src=n_graph, label=label) if e.dst.label == dst.label and ( dst.value is SYMBOLIC_VALUE or dst.value == e.dst.value) } # Compare with the existing set of mappings. if candidates.m_node[dst] is None: candidates.m_node[dst] = dst_candidates else: candidates.m_node[dst].intersection_update(dst_candidates) new_len = len(candidates.m_node[dst]) if new_len == 0: return False elif new_len == 1: worklist.append(dst) for edge in query.iter_edges(dst=n_query): src = edge.src label = edge.label src_candidates = { e.src for e in graph.iter_edges(dst=n_graph, label=label) if e.src.label == src.label and ( src.value is SYMBOLIC_VALUE or src.value == e.src.value) } # Compare with the existing set of mappings. if candidates.m_node[src] is None: candidates.m_node[src] = src_candidates else: candidates.m_node[src].intersection_update(src_candidates) new_len = len(candidates.m_node[src]) if new_len == 0: return False elif new_len == 1: worklist.append(src) return True
def _get_candidate_mappings( query: Graph, graph: Graph, partial_mapping: Optional[GraphMapping] = None, entity_groups_query: Optional[Dict[Entity, int]] = None, entity_groups_graph: Optional[Dict[Entity, int]] = None ) -> Optional[GraphMultiMapping]: """ Given a `query` to check against a graph, this procedure returns the candidate mappings from the entities and nodes of `query` to the entities and nodes of `graph` respectively. This essentially establishes the search space for the isomorphisms. If there is no valid mapping, `None` is returned. Args: query: Query graph. graph: Graph to get the isomorphism mappings from query. partial_mapping: An existing mapping from entities and nodes of `query` to entities and nodes of `graph`. entity_groups_query: Entity group info for `query`. Only entities belonging to the same group can be matched. entity_groups_graph: Entity group info for `graph`. Only entities belonging to the same group can be matched. Returns: Optional[GraphMultiMapping]: The candidate mappings. `None` if no valid mapping exists. """ candidates = GraphMultiMapping() candidates.m_ent.update({ent: None for ent in query.iter_entities()}) candidates.m_node.update({node: None for node in query.iter_nodes()}) if partial_mapping is not None: if not _init_candidate_mappings(candidates, partial_mapping): return None # Stage 1 : Initial Unit Propagation # Decide as much of the mapping as possible, starting with the partial mapping. If a node in `query` is forced # to be assigned to a particular node in `graph`, called a `unit` node, use the edge-profile of that unit node # to establish mappings of its neighbors. This may produce more `unit` nodes, for which we repeat the process. processed = set() if not _propagate_unit_nodes(candidates, query, graph, processed=processed): return None # Stage 2 : Use neighbour profiles to find candidates for non-mapped nodes for n_query in query.iter_nodes(): if candidates.m_node[n_query] is not None: continue # Was not assigned yet. Get all the nodes matching the label, value and entity, if any. label = n_query.label value = None if n_query.value is SYMBOLIC_VALUE else n_query.value entities = candidates.m_ent.get(n_query.entity, None) or [None] cands = set() for entity in entities: cands.update( graph.iter_nodes(label=label, entity=entity, value=value)) candidates.m_node[n_query] = cands # Verify that the neighbour profiles of the candidates for n_query are consistent with the neighbour profile # of n_query. A neighbour profile is simply a dictionary with counts for each edge type with the src/dst as # n_query. The consistency criterion enforces that the number of edges of a certain type emanating from a # candidate should be at least as large as the the number of edges of that type emanating from n_query. query_profile_src = collections.Counter( e.label for e in query.iter_edges(src=n_query)) query_profile_dst = collections.Counter( e.label for e in query.iter_edges(dst=n_query)) filtered_candidates = [] for n_graph in candidates.m_node[n_query]: profile_src = collections.Counter( e.label for e in graph.iter_edges(src=n_graph)) if any(profile_src[k] < v for k, v in query_profile_src.items()): continue profile_dst = collections.Counter( e.label for e in graph.iter_edges(dst=n_graph)) if any(profile_dst[k] < v for k, v in query_profile_dst.items()): continue filtered_candidates.append(n_graph) if len(filtered_candidates) == 0: return None candidates.m_node[n_query].intersection_update(filtered_candidates) # Stage 3 : Perform a final unit propagation. if not _propagate_unit_nodes(candidates, query, graph, processed=processed): return None # Stage 4 : Final pruning using entity groups, if any. if entity_groups_query is not None: assert entity_groups_graph is not None, "Entity groups have to be supplied for both query and graph." candidates.m_node = { k: { n for n in v if entity_groups_query.get(k.entity, 0) == entity_groups_graph.get(n.entity, 0) } for k, v in candidates.m_node.items() } if any(len(v) == 0 for v in candidates.m_node.values()): return None # Stage 5 : Use Hopcroft-Karp maximum matching for bipartite-graphs to verify if a one-to-one mapping is possible # TODO : Do if needed, doesn't affect correctness return candidates
def _get_subgraph_mappings_recursive( worklist: List[Node], query: Graph, graph: Graph, state: SearchState, _depth: int = 0) -> Iterator[GraphMapping]: """ The recursive driver of the subgraph isomorphism finder. Args: worklist: query: graph: state: _depth: The current recursive depth, starts of with zero. Returns: """ if _depth == len(worklist): # Return a copy to safeguard from in-place editing yield state.current_mapping.copy() for i in range(len(worklist)): state.success_record[i] = True return current_mapping = state.current_mapping cur_node: Node = worklist[_depth] mapped_entity: Optional[Entity] = current_mapping.m_ent.get( cur_node.entity, None) entity_assigned_here: bool = mapped_entity is None failure_depth: int = -1 state.success_record[_depth] = False for graph_node in state.get_candidates(cur_node): ok = True # Check consistency with the current entity mapping if (not entity_assigned_here ) and mapped_entity is not graph_node.entity: # The decision point where the entity was actually assigned is a candidate for conflict analysis failure_depth = max( failure_depth, state.get_entity_assignment_depth(mapped_entity)) continue elif entity_assigned_here and state.entity_already_mapped( graph_node.entity): # The decision point where the entity was actually assigned is a candidate for conflict analysis failure_depth = max( failure_depth, state.get_entity_assignment_depth(graph_node.entity)) continue # Check consistency of the edge profile # In principle, we could do something similar to unit propagation, which would update the mappings # for all the other nodes, but that entails creation of temporary objects to a large extent, # so we stick with on-demand checks. However, this would be desirable in C++. Resources on BCP in # modern SAT solvers should be useful. for edge in query.iter_edges(src=cur_node): if edge.dst in current_mapping.m_node: # Check if the edge is present in graph as well dst_mapped = current_mapping.m_node[edge.dst] if not graph.has_edge( src=graph_node, dst=dst_mapped, label=edge.label): # The decision point where the node was assigned to is a candidate for conflict analysis failure_depth = max( failure_depth, state.get_node_assignment_depth(dst_mapped)) ok = False break else: # Check if the edge is present for one of the candidates of dst if all(not graph.has_edge( src=graph_node, dst=cand, label=edge.label) for cand in state.get_candidates(edge.dst)): # Hard to say which decision point would have done this, so nothing on that front # TODO : Think about this # Being conservative for now failure_depth = max(failure_depth, _depth - 1) ok = False break # Move on to the next candidate if the check failed. if not ok: continue # Do a similar check for the edges with dst as node for edge in query.iter_edges(dst=cur_node): if edge.src in current_mapping.m_node: # Check if the edge is present in graph as well src_mapped = current_mapping.m_node[edge.src] if not graph.has_edge( src=src_mapped, dst=graph_node, label=edge.label): # The decision point where the node was assigned to is a candidate for conflict analysis failure_depth = max( failure_depth, state.get_node_assignment_depth(src_mapped)) ok = False break else: # Check if the edge is present for one of the candidates of src if all(not graph.has_edge( src=cand, dst=graph_node, label=edge.label) for cand in state.get_candidates(edge.src)): # Hard to say which decision point would have done this, so nothing on that front # TODO : Think about this # Being conservative for now failure_depth = max(failure_depth, _depth - 1) ok = False break # Move on to the next candidate if the check failed. if not ok: continue # Update the mapping and move on to the next item on the worklist state.perform_assignment(_depth, cur_node, graph_node, entity_assigned=entity_assigned_here) yield from _get_subgraph_mappings_recursive(worklist, query, graph, state, _depth=_depth + 1) # Rollback the assignment state.undo_assignment(_depth, cur_node, graph_node, entity_assigned=entity_assigned_here) if state.return_depth != -2: if _depth > state.return_depth: # Pop the call stack further as the root cause of the conflict downstream is further up the call stack return else: # We are at the right depth, reset. state.return_depth = -2 if not state.success_record[_depth]: # No combination of decisions from this point onwards yielded a solution. # Perform conflict analysis to find the latest decision point which could influence the current point. # Then pop the stack till that point. Also modify the worklist to push this decision point earlier so this # conflict is solved first before making any decisions for other nodes. # Was a viable candidate consumed at a previous decision point? for n in state.get_original_candidates(cur_node): if state.node_already_mapped(n): failure_depth = max(failure_depth, state.get_node_assignment_depth(n)) state.return_depth = failure_depth if failure_depth == _depth - 1: state.return_depth = -2 else: if failure_depth >= 0: # Swap the worklist items worklist[failure_depth + 1], worklist[_depth] = worklist[_depth], worklist[ failure_depth + 1]
def get_greatest_common_universal_supergraph( query: Graph, graphs: List[Graph], all_mappings: Optional[Dict[Graph, List[GraphMapping]]] = None, ) -> Tuple[Graph, GraphMapping]: """ Returns the universal supergraph corresponding to greatest lower bound of all the maximal universal supergraphs of `query` w.r.t `graphs` in the partial order of universal supergraphs of `query` w.r.t `graphs`. Args: query: The query graph to find the universal subgraph for. graphs: The graphs w.r.t which the universal subgraph is to be computed. all_mappings: Mapping of graphs to subgraph isomorphism mappings of `query` for each graph in `graphs`. If None, they are computed by iterating over the result of `get_subgraph_mappings` till exhaustion. Returns: Tuple[Graph, GraphMapping]: The universal supergraph along with mappings to query. The mapping only contains nodes already present in `query`. """ if all_mappings is None: all_mappings = { g: list(query.get_subgraph_mappings(g)) for g in graphs } # Filter out the graphs in which query is not present at all graphs = [g for g in graphs if len(all_mappings[g]) != 0] all_mappings = {g: all_mappings[g] for g in graphs} assert len(all_mappings ) > 0, "Did not find any graph which contains the query." # We will use the first mapping for the first graph to incrementally construct the desired supergraph. # The rationale is that since the universal supergraph needs to be consistent it all the mappings of # all the graphs, we can use one mapping to grow the graph while using the others to check correctness of # every incremental update. exemplar = all_mappings[graphs[0]][0].copy() # Instead of tracking mappings w.r.t the query, track them w.r.t the exemplar mapping. work_mappings: Dict[Graph, List[GraphMapping]] = { g: [m.apply_mapping(exemplar, only_keys=True) for m in g_mappings] for g, g_mappings in all_mappings.items() } known_nodes: Set[Node] = set(exemplar.m_node.values()) orig_known_nodes: Set[Node] = known_nodes.copy() known_edges: Set[Edge] = { Edge(src=exemplar.m_node[e.src], dst=exemplar.m_node[e.dst], label=e.label) for e in query.iter_edges() } assert all(e.src in known_nodes and e.dst in known_nodes for e in known_edges) # Maintain a worklist of edges with at least one end-point already known worklist: Deque[Edge] = collections.deque( e for e in graphs[0].iter_edges() if e not in known_edges and ( e.src in known_nodes or e.dst in known_nodes)) # Also keep track of all the nodes mapped in every mapping already_mapped_dict: Dict[GraphMapping, Set[Node]] = { m: set(m.m_node.values()) for mappings in work_mappings.values() for m in mappings } while len(worklist) > 0: edge = worklist.popleft() if edge in known_edges: continue if edge.src in known_nodes and edge.dst in known_nodes: # Both end-points already present, simply check for the presence of this edge # in all the graphs and w.r.t all the mappings for every graph. for graph, mappings in work_mappings.items(): if any(not graph.has_edge(src=m.m_node[edge.src], dst=m.m_node[edge.dst], label=edge.label) for m in mappings): break else: # Did not break, so we can safely add this edge. known_edges.add(edge) elif edge.src in known_nodes: # edge.dst is not known yet. Check if a counter-part of edge.dst exists for every mapping for every graph, # such that there is an incoming edge of label edge.label with the counter-part of edge.src as the src. success = True counterparts_dict = {} for graph, mappings in work_mappings.items(): for mapping in mappings: already_mapped = already_mapped_dict[mapping] # Get the possible counter-parts. candidates = _get_counterpart_candidates(graph, mapping, edge, already_mapped, known_src=True) if len(candidates) == 0: # Can't extend this mapping, so this edge is not useful overall. # Exit out of all the loops. success = False break else: counterparts_dict[mapping] = candidates if not success: break if not success: # Can't do anything with this edge. Move on to the next item on the worklist. continue # Can safely add this edge to the current supergraph. Adjust the meta-data being tracked. # Specifically, extend all the mappings with the node corresponding to edge.dst work_mappings = _get_new_work_mappings(work_mappings, counterparts_dict, already_mapped_dict, edge.dst) known_nodes.add(edge.dst) known_edges.add(edge) worklist.extend(graphs[0].iter_edges(src=edge.dst)) worklist.extend(graphs[0].iter_edges(dst=edge.dst)) elif edge.dst in known_nodes: # Like above, but edge.src is unknown in this case. success = True counterparts_dict = {} for graph, mappings in work_mappings.items(): for mapping in mappings: already_mapped = already_mapped_dict[mapping] # Get the possible counter-parts. candidates = _get_counterpart_candidates(graph, mapping, edge, already_mapped, known_src=False) if len(candidates) == 0: # Can't extend this mapping, so this edge is not useful overall. # Exit out of all the loops. success = False break else: counterparts_dict[mapping] = candidates if not success: break if not success: # Can't do anything with this edge. Move on to the next item on the worklist. continue # Can safely add this edge to the current supergraph. Adjust the meta-data being tracked. # Specifically, extend all the mappings with the node corresponding to edge.src work_mappings = _get_new_work_mappings(work_mappings, counterparts_dict, already_mapped_dict, edge.src) known_nodes.add(edge.src) known_edges.add(edge) worklist.extend(graphs[0].iter_edges(src=edge.src)) worklist.extend(graphs[0].iter_edges(dst=edge.src)) # Similarly, try to extend the supergraph with graph-level tags and tagged edges as well. common_tags = set.intersection(*(set(g.iter_tags()) for g in graphs)) common_tagged_edges: Set[TaggedEdge] = set() worklist_tagged = [ e for e in graphs[0].iter_tagged_edges() if e.src in known_nodes and e.dst in known_nodes ] for tagged_edge in worklist_tagged: src = tagged_edge.src dst = tagged_edge.dst tag = tagged_edge.tag # Check if this tagged edge is present in every graph for every mapping. for graph, mappings in work_mappings.items(): if any(not graph.has_tagged_edge( src=m.m_node[src], dst=m.m_node[dst], tag=tag) for m in mappings): break else: common_tagged_edges.add(tagged_edge) # At this point, we have all the nodes, edges, tags and tagged edges belonging to the supergraph. # We now assemble the greatest common universal graph and the graph mapping w.r.t the query. universal_supergraph, mapping_wrt_exemplar = _create_symbolic_copy( Graph.from_nodes_and_edges(nodes=known_nodes, edges=known_edges)) mapping_wrt_query = mapping_wrt_exemplar.slice( nodes=orig_known_nodes).apply_mapping(exemplar.reverse(), only_keys=True) # Add in the tags and tagged edges. universal_supergraph.add_tags(common_tags) universal_supergraph.add_tagged_edges( TaggedEdge(src=mapping_wrt_exemplar.m_node[e.src], dst=mapping_wrt_exemplar.m_node[e.dst], tag=e.tag) for e in common_tagged_edges) return universal_supergraph, mapping_wrt_query