def build(cls, strengthening_constraint: Graph) -> 'Intelligence': all_uids: Set[str] = set() selected: Dict[str, Set[Union[Node, str]]] = collections.defaultdict(set) not_selected: Dict[str, Set[Union[Node, str]]] = collections.defaultdict(set) for tag in strengthening_constraint.iter_tags(): # Tags correspond to SelectConst invocations # Format is (SELECTED/NOT_SELECTED)@(val)@(uid) # Values are guaranteed to be strings in the RLang domain. selected_bool, value, uid = tag.split('@') selected_bool = selected_bool == "SELECTED" all_uids.add(uid) if selected_bool: selected[uid].add(value) else: not_selected[uid].add(value) for tagged_edge in strengthening_constraint.iter_tagged_edges(): # Tagged edges should be self edges for the RLang domain. # Format is (SELECTED/NOT_SELECTED)@(uid) node = tagged_edge.src selected_bool, uid = tagged_edge.tag.split('@') selected_bool = selected_bool == "SELECTED" all_uids.add(uid) if selected_bool: selected[uid].add(node) else: not_selected[uid].add(node) return Intelligence(all_uids=all_uids, selected=selected, not_selected=not_selected)
def extract_paths(graph: Graph, input_entities: List[Entity], output_entity: Entity): path_dict: Dict[Entity, List[Path]] = {ent: [] for ent in input_entities} for node in itertools.chain(*(graph.iter_nodes(entity=ent) for ent in input_entities)): # Find all the paths from node to an output node, without any other input or output nodes in between. # An entry is the set of visited nodes, the current node to explore, and the current set of edges. entry: Tuple[Set[Node], Node, List[Edge]] = ({node}, node, []) worklist = collections.deque([entry]) paths: List[Path] = [] while len(worklist) > 0: visited, cur_node, edges = worklist.popleft() for edge in graph.iter_edges(src=cur_node): dst = edge.dst if dst in visited or dst.entity in input_entities: continue if dst.entity is output_entity: paths.append((visited | {dst}, edges + [edge])) else: worklist.append((visited | {dst}, dst, edges + [edge])) path_dict[node.entity].extend(paths) return path_dict
def equality_transitive_closure(graph: Graph, equality_label: int, join_nodes: Optional[Set[Node]] = None, valid_combinations: Optional[Set[int]] = None): worklist = collections.deque( e for e in graph.iter_edges(label=equality_label)) seen_edges = set(worklist) while len(worklist) > 0: edge_item = worklist.popleft() added = set() if join_nodes is None or edge_item.src in join_nodes: for e in graph.iter_edges(dst=edge_item.src): if valid_combinations is None or e.label in valid_combinations: added.add(Edge(e.src, edge_item.dst, e.label)) if join_nodes is None or edge_item.dst in join_nodes: for e in graph.iter_edges(src=edge_item.dst): if valid_combinations is None or e.label in valid_combinations: added.add(Edge(edge_item.src, e.dst, e.label)) added -= seen_edges if len(added) > 0: graph.add_nodes_and_edges(edges=added) for e in added: if e.label == equality_label: worklist.append(e) seen_edges.add(e)
def get_strengthening_constraint(self, input_graph: Graph) -> Graph: strengthened_input_graph = Graph() for constraint in self.constraints.values(): strengthened_input_graph.merge( constraint.get_strengthening_constraint(input_graph)) return strengthened_input_graph
def _get_canonical_query_plans(self, sequence: List[str], transformation: Transformation) -> Dict[Skeleton, Set[QueryPlan]]: meta_plan = self._meta_plans[transformation] blueprint_item_lists = self._get_blueprint_item_lists(sequence, meta_plan, _d=len(sequence)) canonical_transformation = meta_plan.canonical_transformations[len(sequence)] mapping = next(canonical_transformation.get_subgraph_mappings(transformation)) skeletons_to_plans: Dict[Skeleton, Set[QueryPlan]] = collections.defaultdict(set) for blueprint_item_list in blueprint_item_lists: # Breakdown the overall transformation in terms of the unit plans contained in the blueprint items. # Store the connections between them as a graph mapping. connections = GraphMapping() connections.update(mapping) graph = Graph() for item in blueprint_item_list: graph.merge(item.unit.transformation) connections = connections.apply_mapping(item.canonical_mapping, only_keys=True) if item.border_mapping: connections.update(item.border_mapping) connections = connections.apply_mapping(connections, only_values=True) # Assemble the query plan query_plan = QueryPlan(transformation, units=[item.unit.transformation for item in blueprint_item_list], all_connections=connections, strengthenings=[item.unit.strengthenings[component_name] for component_name, item in zip(sequence, blueprint_item_list)]) # Obtain the skeletons for which this query plan would work. # External inputs are negative integers. See gauss.synthesis.skeleton for details. ent_to_idx = {ent: -idx for idx, ent in enumerate(transformation.get_input_entities(), 1)} possible_arg_ints_lists = [] for component_name, (idx, item) in zip(sequence, enumerate(blueprint_item_list, 1)): # Get the mapped entities to the inputs of this unit's transformation, and look up their idx values. arg_ints = [ent_to_idx[connections.m_ent[ent]] for ent in item.unit.transformation.get_input_entities()] # Get all the permutations as well. arg_ints_list = [arg_num_mapping.apply_list(arg_ints) for arg_num_mapping in item.unit.component_entries[component_name].argument_mappings] possible_arg_ints_lists.append(arg_ints_list) ent_to_idx[item.unit.transformation.get_output_entity()] = idx # The skeletons are then simply the all the combinations for arg_ints_list in itertools.product(*possible_arg_ints_lists): skeleton = Skeleton(list(zip(sequence, arg_ints_list))) skeletons_to_plans[skeleton].add(query_plan) return skeletons_to_plans
def _get_explanation_expr_str( graph: Graph, node: Node, node_label_dict: Dict[int, str], edge_label_dict: Dict[int, str]) -> Optional[str]: args = collections.defaultdict(list) for edge in graph.iter_edges(dst=node): label = edge_label_dict[edge.label] if label.startswith("CUM") or label == "COLUMN" or label == "ROW": continue if node_label_dict[edge.src.label] == "INTERM": args[label].append( _get_explanation_expr_str(graph, edge.src, node_label_dict, edge_label_dict)) else: args[label].append(str(edge.src.value)) if len(args) == 0: return None if "EQUAL" in args: return args["EQUAL"][0] key = next(iter(args.keys())) arg_str = ", ".join(args[key]) return f"({key.upper()}({arg_str}))"
def _adapt_query_plan(self, plan: QueryPlan, query: Query): # The given plan is assumed to be a canonical query plan. # Also, the transformation in plan should be the same as the transformation in query. This should be # guaranteed by construction of the query plan. # Create a fresh copy of the plan where the transformation is the actual subgraph. adapted_plan = plan.deepcopy().adapt(new_transformation=query.subgraph, mapping_old_to_new=query.mapping) equality_label = self._domain.get_equality_edge_label() if equality_label is None: return None # Propagate known values amongst the nodes with the equality edge influence: Dict[Node, Set[Node]] = collections.defaultdict(set) for k, v in adapted_plan.all_connections.m_node.items(): influence[v].add(k) seen = set() worklist = collections.deque(adapted_plan.transformation.iter_nodes()) while len(worklist) > 0: node = worklist.popleft() if node in seen: continue seen.add(node) if node.value is SYMBOLIC_VALUE: continue # Connected nodes inherit the value for n in influence[node]: if n.value is SYMBOLIC_VALUE: n.value = node.value worklist.append(n) # Equality edges with src and dst as node also propagate the values for unit in adapted_plan.units: for e in unit.iter_edges(src=node, label=equality_label): if e.dst.value is SYMBOLIC_VALUE: e.dst.value = node.value worklist.append(e.dst) for e in unit.iter_edges(dst=node, label=equality_label): if e.src.value is SYMBOLIC_VALUE: e.src.value = node.value worklist.append(e.src) # We may have wrecked the internal data-structures of the unit transformations by changing values directly. # Create shallow copies which force a rebuild adapted_plan.units = [Transformation.build_from_graph(Graph.from_nodes_and_edges(unit.get_all_nodes(), unit.get_all_edges()), unit.get_input_entities(), unit.get_output_entity()) for unit in adapted_plan.units] return adapted_plan
def create_symbolic_copy(graph: Graph) -> Tuple[Graph, GraphMapping]: mapping = GraphMapping() for entity in graph.iter_entities(): mapping.m_ent[entity] = Entity(value=SYMBOLIC_VALUE) for node in graph.iter_nodes(): mapping.m_node[node] = Node(label=node.label, entity=mapping.m_ent[node.entity], value=SYMBOLIC_VALUE) new_graph = Graph.from_nodes_and_edges(nodes=set(mapping.m_node.values()), edges={ Edge(src=mapping.m_node[e.src], dst=mapping.m_node[e.dst], label=e.label) for e in graph.iter_edges() }) return new_graph, mapping
def get_strengthening_constraint(self, input_graph: Graph) -> Graph: common_tags = None common_edges = None common_tagged_edges = None for plan, partial_mappings in self._checks.items(): strengthening, s_mapping = plan.strengthenings[self.depth] s_edges = strengthening.get_all_edges() s_tagged_edges = set(strengthening.iter_tagged_edges()) plan_tags = set(strengthening.iter_tags()) plan_tagged_edges = None plan_edges = None for partial_mapping in partial_mappings: mapping_wrt_inp_graph = partial_mapping.apply_mapping( s_mapping, only_keys=True) for m in strengthening.get_subgraph_mappings( input_graph, partial_mapping=mapping_wrt_inp_graph): if plan_tagged_edges is None: plan_tagged_edges = { TaggedEdge(m.m_node[e.src], m.m_node[e.dst], e.tag) for e in s_tagged_edges } plan_edges = { Edge(m.m_node[e.src], m.m_node[e.dst], e.label) for e in s_edges } else: plan_tagged_edges.intersection_update( TaggedEdge(m.m_node[e.src], m.m_node[e.dst], e.tag) for e in s_tagged_edges) plan_edges.intersection_update( Edge(m.m_node[e.src], m.m_node[e.dst], e.label) for e in s_edges) if common_tags is None: common_tags = plan_tags or set() common_tagged_edges = plan_tagged_edges or set() common_edges = plan_edges or set() else: common_tags.intersection_update(plan_tags or set()) common_tagged_edges.intersection_update(plan_tagged_edges or set()) common_edges.intersection_update(plan_edges or set()) nodes = {e.src for e in common_tagged_edges} nodes.update(e.dst for e in common_tagged_edges) nodes.update(e.src for e in common_edges) nodes.update(e.dst for e in common_edges) result = Graph.from_nodes_and_edges(nodes=nodes, edges=common_edges) result.add_tagged_edges(common_tagged_edges) result.add_tags(common_tags) return result
def test_1(self): from gauss.graphs.python.subgraph import _get_candidate_mappings g1 = Graph() n1 = Node(label=0, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE) n2 = Node(label=1, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE) g1.add_node(n1) g1.add_node(n2) g1.add_edge(Edge(n1, n2, 0)) g2 = Graph() n3 = Node(label=0, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE) g2.add_node(n3) m1 = _get_candidate_mappings(g2, g1) m2 = _get_candidate_mappings(g1, g2) self.assertIsNotNone(m1) self.assertIsNone(m2) self.assertIn(n3, m1.m_node) self.assertSetEqual({n1}, m1.m_node[n3])
def _extract_unit_plans(self, component_name: str, witness_entry: WitnessEntry): graph = witness_entry.graph input_entities = witness_entry.get_input_entities() output_entity = witness_entry.get_output_entity() placeholder_dict = {} # A placeholder node can represent any node belonging to an entity. # This helps coalesce equivalent query plans. for ent in itertools.chain(input_entities, [output_entity]): placeholder_dict[ent] = PlaceholderNode(entity=ent) path_dict: Dict[Entity, List[Path]] = extract_paths(graph, input_entities, output_entity) # Find queries by taking exactly one path, and placeholder nodes for the # input entities not present in the path. for path_ent, paths in path_dict.items(): remaining_entities = [ent for ent in input_entities if ent is not path_ent] for path in paths: path_nodes, path_edges = path nodes = list(path_nodes) + [placeholder_dict[ent] for ent in remaining_entities] edges = path_edges # Get the corresponding subgraph. subgraph = Graph.from_nodes_and_edges(nodes=set(nodes), edges=set(edges)) self._record_unit_meta_query_plan(component_name, subgraph, input_entities, output_entity) # Include empty transformations to help with evolution (the second stage). # An empty transformation plays the role of a *wildcard* plan, that is, any transformation is valid. # We add all empty transformations with input nodes spanning all distinct input node types # (including placeholders) and the output being the placeholder node. label_canonical_node: Dict[Entity, Dict[int, Node]] = collections.defaultdict(dict) for ent in input_entities: for node in graph.iter_nodes(entity=ent): label_canonical_node[ent][node.label] = node canonical_nodes: List[List[Node]] = [list(v.values()) for v in label_canonical_node.values()] canonical_nodes.append([placeholder_dict[output_entity]]) for subgraph_nodes in itertools.product(*canonical_nodes): subgraph = Graph.from_nodes_and_edges(nodes=subgraph_nodes, edges=[]) self._record_unit_meta_query_plan(component_name, subgraph, input_entities, output_entity, empty=True)
def _get_involved_nodes(graph: Graph, node: Node, node_label_dict: Dict[int, str], edge_label_dict: Dict[int, str]) -> Set[Node]: result = set() for edge in graph.iter_edges(dst=node): label = edge_label_dict[edge.label] if label.startswith("CUM") or label == "COLUMN" or label == "ROW": continue result.add(edge.src) result.update( _get_involved_nodes(graph, edge.src, node_label_dict, edge_label_dict)) return result
def prepare_solution(self, output: Any, output_graph: Graph) -> Solution: if self.problem.input_names is not None: int_to_names: Dict[int, str] = { -idx: name for idx, name in enumerate(self.problem.input_names, 1) } else: int_to_names: Dict[int, str] = { -idx: f"inp{idx}" for idx in range(1, len(self.problem.inputs) + 1) } int_to_names[self.skeleton.length] = self.problem.output_name graph = Graph() for g in self.graphs: graph.merge(g) # Perform transitive closure w.r.t the nodes corresponding to the intermediate outputs # and take the induced subgraph containing all nodes except those if self.skeleton.length > 1: join_nodes = set.union(*(set(self.int_to_graph[i].iter_nodes()) for i in range(1, self.skeleton.length))) self.domain.perform_transitive_closure(graph, join_nodes=join_nodes) graph = graph.induced_subgraph(keep_nodes=set(graph.iter_nodes()) - join_nodes) return self.domain.prepare_solution( self.problem.inputs, output, graph, self.problem.graph_inputs, output_graph, self.enumeration_items, arguments=[arg_ints for (comp_name, arg_ints) in self.skeleton], int_to_names=int_to_names, int_to_obj=self.int_to_val)
def test_4(self): from gauss.graphs.python.subgraph import _get_candidate_mappings query = Graph() n11 = Node(label=0, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE) n12 = Node(label=1, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE) n13 = Node(label=2, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE) query.add_node(n11) query.add_node(n12) query.add_node(n13) query.add_edge(Edge(n11, n12, 0)) query.add_edge(Edge(n11, n13, 1)) graph = Graph() n1 = Node(label=0, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE) n2 = Node(label=1, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE) n3 = Node(label=3, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE) # 3, not 2 n4 = Node(label=0, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE) n5 = Node(label=1, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE) n6 = Node(label=2, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE) n7 = Node(label=2, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE) graph.add_node(n1) graph.add_node(n2) graph.add_node(n3) graph.add_node(n4) graph.add_node(n5) graph.add_node(n6) graph.add_node(n7) graph.add_edge(Edge(n1, n2, 0)) graph.add_edge(Edge(n1, n3, 1)) graph.add_edge(Edge(n4, n5, 0)) graph.add_edge(Edge(n4, n6, 1)) graph.add_edge(Edge(n4, n7, 1)) m = _get_candidate_mappings(query, graph) self.assertIsNotNone(m)
def test_3(self): g1 = Graph() n1 = Node(label=0, entity=DEFAULT_ENTITY, value=10) n2 = Node(label=0, entity=DEFAULT_ENTITY, value=20) n3 = Node(label=1, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE) n4 = Node(label=1, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE) n5 = Node(label=2, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE) n6 = Node(label=2, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE) g1.add_node(n1) g1.add_node(n2) g1.add_node(n3) g1.add_node(n4) g1.add_node(n5) g1.add_node(n6) g1.add_edge(Edge(n1, n3, 0)) g1.add_edge(Edge(n3, n5, 1)) g1.add_edge(Edge(n2, n4, 0)) g1.add_edge(Edge(n4, n6, 1)) g2 = Graph() n21 = Node(label=0, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE) n22 = Node(label=1, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE) n23 = Node(label=2, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE) g2.add_node(n21) g2.add_node(n22) g2.add_node(n23) g2.add_edge(Edge(n21, n22, 0)) g2.add_edge(Edge(n22, n23, 1)) mappings_21 = list(g2.get_subgraph_mappings(g1)) self.assertEqual(2, len(mappings_21)) g3 = Graph() n31 = Node(label=0, entity=DEFAULT_ENTITY, value=10) n32 = Node(label=1, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE) n33 = Node(label=2, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE) g3.add_node(n31) g3.add_node(n32) g3.add_node(n33) g3.add_edge(Edge(n31, n32, 0)) g3.add_edge(Edge(n32, n33, 1)) mappings_31 = list(g3.get_subgraph_mappings(g1)) self.assertEqual(1, len(mappings_31))
def _solve_for_skeleton_recursive( self, problem: SynthesisProblem, skeleton: Skeleton, query_plans: QueryPlans, context: SolverContext, _depth: int = 0) -> Iterator[Tuple[Any, Graph]]: domain = self._domain component_name, arg_ints = skeleton[_depth] inputs, g_inputs = context.get_arguments(depth=_depth) inp_entities = [ next(iter(g_inp.iter_entities())) for g_inp in g_inputs ] inp_graph = Graph() for g_inp in g_inputs: inp_graph.merge(g_inp) # Get the strengthening constraint for this depth. # Specifically, for every query, get the intersection of the strengthenings of all the query plans for that # query at this particular depth. Then take the union of all of these. # In other words, this strengthening constraint is a graph containing the nodes, edges, tags and tagged edges # that must be satisfied by the graph containing the inputs, that is `inp_graph` in this context. # This constraint can then be used by the `enumerate` procedure to speed up the search. strengthening_constraint: Graph = context.waypoints[ _depth].get_strengthening_constraint(inp_graph) enumeration_item: EnumerationItem for enumeration_item in domain.enumerate( component_name=component_name, inputs=inputs, g_inputs=g_inputs, constants=problem.constants, strengthening_constraint=strengthening_constraint): output = enumeration_item.output c_graph = enumeration_item.graph o_graph = enumeration_item.o_graph # for g in g_inputs: # assert set(g.iter_nodes()).issubset(set(c_graph.iter_nodes())) if problem.timeout is not None and time.time( ) - self._time_start > problem.timeout: raise TimeoutError("Exceeded time limit.") out_entity = next(iter(o_graph.iter_entities())) c_graph.add_node(PlaceholderNode(entity=out_entity)) c_graph = Transformation.build_from_graph( c_graph, input_entities=inp_entities, output_entity=out_entity) # Check if the returned graph is consistent with the query plans. if not context.check_validity(c_graph, depth=_depth): continue # Prepare for the next round. context.step(output=output, graph=c_graph, output_graph=o_graph, enumeration_item=enumeration_item, depth=_depth) if _depth == skeleton.length - 1: # This was the last component, prepare the program and return it along with the final output and graph. yield output, o_graph else: # Move on to the next component. yield from self._solve_for_skeleton_recursive(problem, skeleton, query_plans, context, _depth=_depth + 1)
def test_5(self): # Stress-tests the intelligence of back-tracking query = Graph() n11 = Node(label=0, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE) n12 = Node(label=0, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE) n13 = Node(label=1, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE) n14 = Node(label=1, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE) n15 = Node(label=2, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE) n16 = Node(label=2, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE) query.add_node(n11) query.add_node(n12) query.add_node(n13) query.add_node(n14) query.add_node(n15) query.add_node(n16) query.add_edge(Edge(n13, n15, 0)) query.add_edge(Edge(n13, n15, 1)) query.add_edge(Edge(n14, n16, 0)) query.add_edge(Edge(n14, n16, 1)) graph = Graph() n1 = Node(label=0, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE) n2 = Node(label=0, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE) n3 = Node(label=1, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE) n4 = Node(label=1, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE) n5 = Node(label=2, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE) n6 = Node(label=2, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE) n7 = Node(label=0, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE) n8 = Node(label=0, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE) n9 = Node(label=0, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE) n10 = Node(label=0, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE) graph.add_node(n1) graph.add_node(n2) graph.add_node(n3) graph.add_node(n4) graph.add_node(n5) graph.add_node(n6) graph.add_node(n7) graph.add_node(n8) graph.add_node(n9) graph.add_node(n10) graph.add_edge(Edge(n3, n5, 0)) graph.add_edge(Edge(n3, n6, 1)) graph.add_edge(Edge(n4, n5, 1)) graph.add_edge(Edge(n4, n6, 0)) mappings = list( query.get_subgraph_mappings( graph, _worklist_order=[n11, n12, n13, n14, n15, n16])) self.assertEqual(0, len(mappings))
def test_4(self): query = Graph() n11 = Node(label=0, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE) n12 = Node(label=1, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE) n13 = Node(label=2, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE) query.add_node(n11) query.add_node(n12) query.add_node(n13) query.add_edge(Edge(n11, n12, 0)) query.add_edge(Edge(n11, n13, 1)) graph = Graph() n1 = Node(label=0, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE) n2 = Node(label=1, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE) n3 = Node(label=3, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE) # 3, not 2 n4 = Node(label=0, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE) n5 = Node(label=1, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE) n6 = Node(label=2, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE) n7 = Node(label=2, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE) graph.add_node(n1) graph.add_node(n2) graph.add_node(n3) graph.add_node(n4) graph.add_node(n5) graph.add_node(n6) graph.add_node(n7) graph.add_edge(Edge(n1, n2, 0)) graph.add_edge(Edge(n1, n3, 1)) graph.add_edge(Edge(n4, n5, 0)) graph.add_edge(Edge(n4, n6, 1)) graph.add_edge(Edge(n4, n7, 1)) mappings = list(query.get_subgraph_mappings(graph)) self.assertEqual(2, len(mappings))
def _evolve_meta_plan(self, plan: MetaQueryPlan, depth: int, nlabel_to_unit_plan: Dict[int, Set[UnitMetaPlan]]): # Replace an input node of plan with the output of a unit meta-plan (contained in nlabel_to_meta_plan). # Thus we extend an existing plan with the output of exactly one component, thus increasing the program # depth by exactly one. plan_transformation: Transformation = plan.canonical_transformations[depth - 1] all_input_nodes: Set[Node] = set(plan_transformation.get_input_nodes()) for inp_node in all_input_nodes: # Even if it is a placeholder node, it can only be extended via the empty transform. This makes sense # as no matter what the extension is, it will never "influence" the final output, as there is no path # between a placeholder node and the output node. This helps reduce the size of the collection of # meta query-plans by a large margin. remaining_inputs = all_input_nodes - {inp_node} for extender in nlabel_to_unit_plan[int(inp_node.label)]: # We can map the other inputs to the inputs of the extender plan. They can also be distinct # inputs of their own. The total number of inputs should, however, be less than max_inputs. # For the remaining inputs, if they are a placeholder node, they can be mapped to *any* of the # input nodes of the extender plan, regardless of the label. nlabel_to_inp_node: Dict[int, Set[Node]] = collections.defaultdict(set) extender_inputs: Set[Node] = set(extender.transformation.get_input_nodes()) # Guaranteed to be a single output node by construction. extender_output: Node = next(extender.transformation.get_output_nodes()) for inp in extender_inputs: nlabel_to_inp_node[inp.label].add(inp) nlabel_to_inp_node[extender_output.label].add(extender_output) mapping_possibilities: Dict[Node, Set[Node]] = {inp_node: {extender_output}} for inp in remaining_inputs: if inp.label == PLACEHOLDER_LABEL: mapping_possibilities[inp] = extender_inputs | {extender_output, inp} else: mapping_possibilities[inp] = nlabel_to_inp_node[inp.label] | {inp} node_list = list(all_input_nodes) for border_node_mapping in itertools.product(*[mapping_possibilities[n] for n in node_list]): border_node_mapping: Dict[Node, Node] = dict(zip(node_list, border_node_mapping)) border_mapping = GraphMapping(m_ent={k.entity: v.entity for k, v in border_node_mapping.items()}, m_node=border_node_mapping.copy()) # Create a deepcopy of the extender for safety copied_extender, copy_mapping = extender.deepcopy() copied_extender_inputs: Set[Node] = set(copied_extender.transformation.get_input_nodes()) copied_extender_output: Node = next(copied_extender.transformation.get_output_nodes()) border_mapping = border_mapping.apply_mapping(copy_mapping, only_values=True) assert border_mapping.m_node != border_node_mapping # The new inputs are the inputs of extender, plus the nodes of the current plan # which were not bound to any of the inputs of extender. # We also decide the order of the nodes/entities right now. new_input_nodes: List[Node] = [] for inp in plan_transformation.get_input_nodes(): mapped = border_mapping.m_node[inp] if mapped is inp: new_input_nodes.append(inp) elif mapped is copied_extender_output: new_input_nodes.extend(i for i in copied_extender.transformation.get_input_nodes() if i not in new_input_nodes) elif mapped not in new_input_nodes: assert mapped in copied_extender_inputs new_input_nodes.append(mapped) new_input_entities = [n.entity for n in new_input_nodes] # Every entity is associated with one node so the following should hold true. assert len(new_input_entities) == len(set(new_input_entities)) if len(new_input_entities) > self._config.max_inputs: continue new_output_entity = plan_transformation.get_output_entity() # Obtain the transformation by establishing common edges between the node pairs in # border_node_mapping, taking the transitive closure w.r.t equality, and finally the # induced subgraph by removing the input nodes of the current plan. joint_graph = Graph.from_graph(copied_extender.transformation) joint_graph.merge(plan_transformation) final_border_mapping = GraphMapping() for k, v in border_mapping.m_node.items(): if k is not v: final_border_mapping.m_node[k] = v final_border_mapping.m_ent[k.entity] = v.entity for edge in plan_transformation.iter_edges(src=k): joint_graph.add_edge(Edge(v, edge.dst, edge.label)) for edge in plan_transformation.iter_edges(dst=k): joint_graph.add_edge(Edge(edge.src, v, edge.label)) join_nodes: Set[Node] = set(all_input_nodes) join_nodes.difference_update(new_input_nodes) join_nodes.add(copied_extender_output) self._domain.perform_transitive_closure(joint_graph, join_nodes=join_nodes) keep_nodes = set(joint_graph.iter_nodes()) keep_nodes.difference_update(join_nodes) new_transformation_subgraph = joint_graph.induced_subgraph(keep_nodes=keep_nodes) new_transformation = Transformation.build_from_graph(new_transformation_subgraph, input_entities=new_input_entities, output_entity=new_output_entity) # Record the transformation and how it was obtained. if new_transformation not in self._meta_plans: # The transformation was never seen before. blueprint = collections.defaultdict(lambda: collections.defaultdict(list)) meta_plan = MetaQueryPlan(transformation=new_transformation.deepcopy()[0], blueprint=blueprint) self._meta_plans[new_transformation] = meta_plan else: meta_plan = self._meta_plans[new_transformation] if depth not in meta_plan.canonical_transformations: copy, mapping = new_transformation.deepcopy() meta_plan.canonical_transformations[depth] = copy # mapping = mapping.slice(nodes=set(copied_extender.transformation.iter_nodes())) mapping = mapping.reverse() else: canonical = meta_plan.canonical_transformations[depth] mapping = next(new_transformation.get_subgraph_mappings(canonical)) # mapping = mapping.slice(nodes=set(copied_extender.transformation.iter_nodes())) mapping = mapping.reverse() bp_item = MetaQueryPlan.BlueprintItem(depth=depth, unit=copied_extender, canonical_mapping=mapping, sub_plan=plan, border_mapping=final_border_mapping) for c in copied_extender.component_entries: meta_plan.blueprint[depth][c].append(bp_item)
def test_3(self): from gauss.graphs.python.subgraph import _get_candidate_mappings g1 = Graph() n1 = Node(label=0, entity=DEFAULT_ENTITY, value=10) n2 = Node(label=0, entity=DEFAULT_ENTITY, value=20) n3 = Node(label=1, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE) n4 = Node(label=1, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE) n5 = Node(label=2, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE) n6 = Node(label=2, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE) g1.add_node(n1) g1.add_node(n2) g1.add_node(n3) g1.add_node(n4) g1.add_node(n5) g1.add_node(n6) g1.add_edge(Edge(n1, n3, 0)) g1.add_edge(Edge(n3, n5, 1)) g1.add_edge(Edge(n2, n4, 0)) g1.add_edge(Edge(n4, n6, 1)) g2 = Graph() n21 = Node(label=0, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE) n22 = Node(label=1, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE) n23 = Node(label=2, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE) g2.add_node(n21) g2.add_node(n22) g2.add_node(n23) g2.add_edge(Edge(n21, n22, 0)) g2.add_edge(Edge(n22, n23, 1)) m21 = _get_candidate_mappings(g2, g1) self.assertIsNotNone(m21) self.assertSetEqual({n1, n2}, m21.m_node[n21]) self.assertSetEqual({n3, n4}, m21.m_node[n22]) self.assertSetEqual({n5, n6}, m21.m_node[n23]) g3 = Graph() n31 = Node(label=0, entity=DEFAULT_ENTITY, value=10) n32 = Node(label=1, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE) n33 = Node(label=2, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE) g3.add_node(n31) g3.add_node(n32) g3.add_node(n33) g3.add_edge(Edge(n31, n32, 0)) g3.add_edge(Edge(n32, n33, 1)) m31 = _get_candidate_mappings(g3, g1) self.assertIsNotNone(m31) self.assertSetEqual({n1}, m31.m_node[n31]) self.assertSetEqual({n3}, m31.m_node[n32]) self.assertSetEqual({n5}, m31.m_node[n33]) g4 = Graph() n41 = Node(label=0, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE) n42 = Node(label=1, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE) n43 = Node(label=2, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE) g4.add_node(n41) g4.add_node(n42) g4.add_node(n43) g4.add_edge(Edge(n41, n42, 0)) g4.add_edge(Edge(n42, n43, 1)) m41 = _get_candidate_mappings(g4, g1, GraphMapping(m_node={n41: n1})) self.assertIsNotNone(m41) self.assertSetEqual({n1}, m41.m_node[n41]) self.assertSetEqual({n3}, m41.m_node[n42]) self.assertSetEqual({n5}, m41.m_node[n43])
def test_greatest_common_universal_subgraph_1(self): g1 = Graph() n1 = Node(label=0, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE) n2 = Node(label=1, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE) n3 = Node(label=2, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE) n4 = Node(label=3, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE) g1.add_nodes_and_edges(nodes=[n1, n2, n3, n4]) g1.add_tags(["TAG_1", "TAG_2"]) g1.add_tagged_edges( [TaggedEdge(n2, n2, "TAG_L1"), TaggedEdge(n3, n3, "TAG_L2")]) # Linear chain from n1 to n2 and n2 to n3 and n3 to n4 g1.add_edge(Edge(src=n1, dst=n2, label=10)) g1.add_edge(Edge(src=n2, dst=n3, label=11)) g1.add_edge(Edge(src=n3, dst=n4, label=12)) g2 = Graph() n1 = Node(label=0, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE) n2 = Node(label=1, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE) n3 = Node(label=2, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE) n4 = Node(label=2, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE) n5 = Node(label=2, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE) n6 = Node(label=2, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE) n7 = Node(label=2, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE) n8 = Node(label=3, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE) g2.add_nodes_and_edges(nodes=[n1, n2, n3, n4, n5, n6, n7, n8]) g2.add_tags(["TAG_2", "TAG_3"]) g2.add_tagged_edges( [TaggedEdge(n2, n2, "TAG_L1"), TaggedEdge(n3, n3, "TAG_L2")]) # Only one of label=2 has an edge to a label=3 g2.add_edge(Edge(src=n1, dst=n2, label=10)) g2.add_edge(Edge(src=n2, dst=n3, label=11)) g2.add_edge(Edge(src=n2, dst=n4, label=11)) g2.add_edge(Edge(src=n2, dst=n5, label=11)) g2.add_edge(Edge(src=n2, dst=n6, label=11)) g2.add_edge(Edge(src=n2, dst=n7, label=11)) g2.add_edge(Edge(src=n7, dst=n8, label=12)) query = Graph() n1 = Node(label=0, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE) n2 = Node(label=1, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE) query.add_nodes_and_edges(nodes=[n1, n2]) query.add_edge(Edge(n1, n2, 10)) supergraph, mapping = query.get_greatest_common_universal_supergraph( [g1]) # We expect the supergraph to be equivalent to g1 self.assertEqual(3, supergraph.get_num_edges()) self.assertEqual(4, supergraph.get_num_nodes()) self.assertSetEqual({0, 1, 2, 3}, {n.label for n in supergraph.iter_nodes()}) self.assertSetEqual({10, 11, 12}, {e.label for e in supergraph.iter_edges()}) self.assertSetEqual({"TAG_1", "TAG_2"}, set(supergraph.iter_tags())) self.assertEqual({"TAG_L1", "TAG_L2"}, {e.tag for e in supergraph.iter_tagged_edges()}) for node in mapping.m_node: self.assertIn(node, query.get_all_nodes()) supergraph, mapping = query.get_greatest_common_universal_supergraph( [g1, g2]) # We expect the supergraph to be the linear chain 0 to 1 and 1 to 2 self.assertEqual(2, supergraph.get_num_edges()) self.assertEqual(3, supergraph.get_num_nodes()) self.assertSetEqual({0, 1, 2}, {n.label for n in supergraph.iter_nodes()}) self.assertSetEqual({10, 11}, {e.label for e in supergraph.iter_edges()}) self.assertSetEqual({"TAG_2"}, set(supergraph.iter_tags())) self.assertEqual({"TAG_L1"}, {e.tag for e in supergraph.iter_tagged_edges()}) for node in mapping.m_node: self.assertIn(node, query.get_all_nodes())
def build(cls, domain: SynthesisDomain, config: EngineConfig, witness_set: WitnessSet) -> 'QueryPlanner': q_planner = QueryPlanner(domain=domain, config=config) logger_color = logger.opt(colors=True) # ---------------------------------------------------------------------------------------------------------- # # Stage 1 : Build unit plans # ---------------------------------------------------------------------------------------------------------- # logger.debug("Extracting meta query-plans of length 1 from individual components...") for component_name in domain.get_available_components(): for witness_entry in witness_set.entries[component_name]: q_planner._extract_unit_plans(component_name, witness_entry) # Log some information for post-mortem analysis if necessary # Total plans found. logger_color.debug(f"Found <green>{len(q_planner._unit_plans)}</green> unit plans in total.") # Plans found per component. components_to_units: Dict[str, List[UnitMetaPlan]] = collections.defaultdict(list) for unit in q_planner._unit_plans.values(): for c in unit.component_entries: components_to_units[c].append(unit) with logutils.temporary_add(f"{config.path}/logs/query_planner/unit_plans.log", level="TRACE", only_sink=True) as logger_: logger_ = logger_.opt(raw=True) for component_name, units in components_to_units.items(): logger_color.debug(f"Found <green>{len(units)}</green> unit plans from " f"<blue>{component_name}</blue>.") logger_.opt(colors=True).debug(f"Found <green>{len(units)}</green> unit plans from " f"<blue>{component_name}</blue>.") for unit in units: logger_.trace(f"Component: {component_name}\n") logger_.trace("-----------------\n") logger_.trace("Transformation\n") logger_.trace("-----------------\n") logger_.trace(unit.transformation.to_str(domain)) logger_.trace("\n-----------------\n") logger_.trace("Argument Mappings\n") logger_.trace("-----------------\n") logger_.trace(unit.component_entries[component_name].argument_mappings) logger_.trace("\n========xxx========\n\n") # ---------------------------------------------------------------------------------------------------------- # # Stage 2 : Strengthen Unit Plans # ---------------------------------------------------------------------------------------------------------- # logger_color.debug("Strengthening Unit Plans...") for unit_plan in debug_iter(list(q_planner._unit_plans.values()), desc='Strengthening Unit Plans'): query: Transformation = unit_plan.transformation for component_name in unit_plan.component_entries.keys(): # The witness set examples are guaranteed to have a placeholder node for each entity, # so the use of placeholder nodes in transformations should not cause issues. if unit_plan.empty: strengthened, mapping = query.deepcopy() strengthened = Graph.from_graph(strengthened) else: examples: List[Transformation] = witness_set.get_transformations(component_name) strengthened, mapping = query.get_greatest_common_universal_supergraph(examples) inp_entities = set(query.get_input_entities()) m_reverse = mapping.reverse() keep_nodes = [n for n in strengthened.iter_nodes() if m_reverse.m_ent.get(n.entity, None) in inp_entities] strengthened = strengthened.induced_subgraph(keep_nodes=keep_nodes) unit_plan.strengthenings[component_name] = (strengthened, mapping) logger_color.debug(f"Strengthened <green>{len(q_planner._unit_plans)}</green> unit plans.") # ---------------------------------------------------------------------------------------------------------- # # Stage 3 : Combining unit plans to obtain query plans upto max-depth # Note that these are not explicitly constructed. Rather, a recursive formulation is established # to construct the query plans quickly during test-time. # ---------------------------------------------------------------------------------------------------------- # # Initialize the meta-plans from these unit plans logger.debug("Initializing meta-plans from unit-plans...") for transformation, unit_plan in q_planner._unit_plans.items(): q_planner._meta_plans[transformation] = MetaQueryPlan.initialize_from_unit_plan(unit_plan.deepcopy()[0]) logger_color.debug(f"Evolving meta query-plans upto a maximum length of " f"<blue>{config.max_length}</blue>") # Setup a mapping from the label of the output node of transformations # to the meta plan for that transformation. This helps in quickly finding # appropriate unit plans to extend a plan with. nlabel_to_unit_plan: Dict[int, Set[UnitMetaPlan]] = collections.defaultdict(set) for transformation, unit_plan in q_planner._unit_plans.items(): # Guaranteed to be a single node with that entity. output_node = next(transformation.get_output_nodes()) nlabel_to_unit_plan[output_node.label].add(unit_plan) # At every depth, the worklist will be the set of query plans with an entry for depth-1 in the blueprint. worklist: Set[MetaQueryPlan] = set(q_planner._meta_plans.values()) for depth in range(2, config.max_length + 1): for unit in worklist: q_planner._evolve_meta_plan(unit, depth, nlabel_to_unit_plan) worklist = {plan for plan in q_planner._meta_plans.values() if depth in plan.blueprint} logger_color.debug(f"Found <green>{len(worklist)}</green> transformations in total " f"at depth <blue>{depth}</blue>.") if len(worklist) == 0: break logger_color.debug(f"Found <green>{len(q_planner._meta_plans)}</green> transformations and meta " f"query plans in total with <blue>max_depth={config.max_length}</blue>.") # TODO : Log the transformations to a file. # ---------------------------------------------------------------------------------------------------------- # # Stage 4 : Setup auxiliary data-structures # ---------------------------------------------------------------------------------------------------------- # # Flattened blue-print items enable access to all the items agnostic of the component name. for plan in q_planner._meta_plans.values(): plan.blueprint_items = collections.defaultdict(set) for depth, bp_dict in plan.blueprint.items(): for items_list in bp_dict.values(): plan.blueprint_items[depth].update(items_list) # Sequence tries help in quickly computing candidate sequences given a synthesis problem. logger_color.debug("Constructing Sequence Tries...") q_planner._compute_sequence_tries() logger_color.debug(f"Sequence Tries constructed for every depth.") return q_planner
def init(self): domain = PandasLiteSynthesisDomain() replay = {k: iter(v) for k, v in self.replay_map.items()} graph = Graph() g_inputs = self._g_inputs = [ self._convert_inp_to_graph(inp) for inp in self.inputs ] int_to_val = {-idx: inp for idx, inp in enumerate(self.inputs, 1)} int_to_graph = {-idx: g_inp for idx, g_inp in enumerate(g_inputs, 1)} # Run the generators to extract the programs and graphs for each component call. # Merge the individual graphs into the master graph. call_strs: List[str] = [] for idx, (component_name, arg_ints) in enumerate(self.skeleton, 1): c_inputs = [int_to_val[i] for i in arg_ints] g_c_inputs = [int_to_graph[i] for i in arg_ints] output, program, c_graph, output_graph = next( domain.enumerate(component_name, c_inputs, g_c_inputs, replay=replay)) int_to_val[idx] = output int_to_graph[idx] = output_graph call_strs.append(program) graph.merge(c_graph) # Check that the final output is equivalent to the original output specified in the benchmark. assert domain.check_equivalent(self.output, int_to_val[self.skeleton.length]), \ f"Generated output inconsistent with specified output in Pandas benchmark {self.b_id}" # Retrofit the value of the output entity to the original output cur_out_entity = next(ent for ent in graph.iter_entities() if ent.value is int_to_val[self.skeleton.length]) cur_out_entity.value = self.output # Perform transitive closure w.r.t the nodes corresponding to the intermediate outputs # and take the induced subgraph containing all nodes except those if self.skeleton.length > 1: join_nodes = set.union(*(set(int_to_graph[i].iter_nodes()) for i in range(1, self.skeleton.length))) domain.perform_transitive_closure(graph, join_nodes=join_nodes) intent_graph = graph.induced_subgraph( keep_nodes=set(graph.iter_nodes()) - join_nodes) else: intent_graph = graph self._graph = intent_graph # Also construct the string representation of the ground-truth program. program_list: List[str] = [] for depth, (call_str, (component_name, arg_ints)) in enumerate(zip(call_strs, self.skeleton), 1): arg_strs = [f"inp{-i}" if i < 0 else f"v{i}" for i in arg_ints] call_str = call_str.format(**{ f"inp{idx}": arg_str for idx, arg_str in enumerate(arg_strs, 1) }) if depth == self.skeleton.length: program_list.append(call_str) else: program_list.append(f"v{depth} = {call_str}") self.program = "\n".join(program_list)
def extract_queries( problem: SynthesisProblem) -> Dict[Transformation, List[Query]]: # Stage 1 : Get all paths from one of the input nodes to an output node without any input/output node in between. graph = problem.graph inputs = problem.inputs output = problem.output entities = list(graph.iter_entities()) input_entities = [ next(ent for ent in entities if ent.value is inp) for inp in inputs ] output_entity = next(ent for ent in entities if ent.value is output) arg_numbering = {ent: idx for idx, ent in enumerate(input_entities)} placeholder_dict = {} # A placeholder node can represent any node belonging to an entity. # This helps coalesce equivalent query plans. for ent in itertools.chain(input_entities, [output_entity]): placeholder_dict[ent] = PlaceholderNode(entity=ent) path_dict: Dict[Entity, List[Path]] = extract_paths(graph, input_entities, output_entity) canonical_transformations: Dict[Transformation, Transformation] = {} # Only keep one transformation for a set of nodes as satisfying that query means satisfying all the others. seen: Set[Tuple[Transformation, FrozenSet[Node]]] = set() queries: Dict[Transformation, List[Query]] = collections.defaultdict(list) set_input_entities = set(input_entities) # Find queries by taking exactly one path, and placeholder nodes for the # input entities not present in the path. for path_ent, paths in path_dict.items(): remaining_entities = [ ent for ent in set_input_entities if ent is not path_ent ] for path in paths: path_nodes, path_edges = path nodes = list(path_nodes) + [ placeholder_dict[ent] for ent in remaining_entities ] edges = path_edges # Get the corresponding subgraph. subgraph = Graph.from_nodes_and_edges(nodes=set(nodes), edges=set(edges)) subgraph_transformation = Transformation.build_from_graph( subgraph, input_entities=input_entities, output_entity=output_entity) # Compute the symbolic counter-part to group subgraphs together by the underlying transformation. symbolic_copy, mapping = create_symbolic_copy(subgraph) mapped_input_entities = [mapping.m_ent[i] for i in input_entities] mapped_output_entity = mapping.m_ent[output_entity] transformation = Transformation.build_from_graph( symbolic_copy, input_entities=mapped_input_entities, output_entity=mapped_output_entity) # Check if the transformation was seen before if transformation not in canonical_transformations: canonical_transformations[transformation] = transformation seen.add((transformation, frozenset(n for n in subgraph.iter_nodes() if n.entity in set_input_entities))) mapping = mapping.reverse() arg_number_mapping = ArgumentNumberMapping({ idx: arg_numbering[mapping.m_ent[ent]] for idx, ent in enumerate(mapped_input_entities) }) # We need a mapping from transformation to the subgraph. queries[transformation].append( Query(transformation=transformation, subgraph=subgraph_transformation, mapping=mapping, arg_number_mapping=arg_number_mapping)) else: canonical = canonical_transformations[transformation] key = (transformation, frozenset(n for n in subgraph.iter_nodes() if n.entity in set_input_entities)) # Check if the transformation was seen before with the same input nodes. If yes, continue. This is # because if a graph satisfies the already seen transformation for these input nodes, it will satisfy # this one as well. So no point in checking it. if key in seen: continue seen.add(key) # We need a mapping from the canonical transformation to the subgraph. mapping = next(canonical.get_subgraph_mappings( transformation)).apply_mapping(mapping.reverse()) arg_number_mapping = ArgumentNumberMapping({ idx: arg_numbering[mapping.m_ent[ent]] for idx, ent in enumerate(canonical.get_input_entities()) }) queries[canonical].append( Query(transformation=canonical, subgraph=subgraph_transformation, mapping=mapping, arg_number_mapping=arg_number_mapping)) return queries
def test_2(self): g1 = Graph() n1 = Node(label=0, entity=DEFAULT_ENTITY, value=10) n2 = Node(label=1, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE) g1.add_node(n1) g1.add_node(n2) g1.add_edge(Edge(n1, n2, 0)) g2 = Graph() n3 = Node(label=0, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE) g2.add_node(n3) g3 = Graph() n4 = Node(label=0, entity=DEFAULT_ENTITY, value=20) g3.add_node(n4) mappings_21 = list(g2.get_subgraph_mappings(g1)) mappings_31 = list(g3.get_subgraph_mappings(g1)) self.assertEqual(1, len(mappings_21)) self.assertEqual(mappings_21[0].m_node[n3], n1) self.assertEqual(mappings_21[0].m_ent[n3.entity], n1.entity) self.assertEqual(0, len(mappings_31))