Beispiel #1
0
    def build(cls, strengthening_constraint: Graph) -> 'Intelligence':
        all_uids: Set[str] = set()
        selected: Dict[str, Set[Union[Node,
                                      str]]] = collections.defaultdict(set)
        not_selected: Dict[str, Set[Union[Node,
                                          str]]] = collections.defaultdict(set)

        for tag in strengthening_constraint.iter_tags():
            #  Tags correspond to SelectConst invocations
            #  Format is (SELECTED/NOT_SELECTED)@(val)@(uid)
            #  Values are guaranteed to be strings in the RLang domain.
            selected_bool, value, uid = tag.split('@')
            selected_bool = selected_bool == "SELECTED"
            all_uids.add(uid)
            if selected_bool:
                selected[uid].add(value)
            else:
                not_selected[uid].add(value)

        for tagged_edge in strengthening_constraint.iter_tagged_edges():
            #  Tagged edges should be self edges for the RLang domain.
            #  Format is (SELECTED/NOT_SELECTED)@(uid)
            node = tagged_edge.src
            selected_bool, uid = tagged_edge.tag.split('@')
            selected_bool = selected_bool == "SELECTED"
            all_uids.add(uid)

            if selected_bool:
                selected[uid].add(node)
            else:
                not_selected[uid].add(node)

        return Intelligence(all_uids=all_uids,
                            selected=selected,
                            not_selected=not_selected)
Beispiel #2
0
def extract_paths(graph: Graph, input_entities: List[Entity],
                  output_entity: Entity):
    path_dict: Dict[Entity, List[Path]] = {ent: [] for ent in input_entities}
    for node in itertools.chain(*(graph.iter_nodes(entity=ent)
                                  for ent in input_entities)):
        #  Find all the paths from node to an output node, without any other input or output nodes in between.
        #  An entry is the set of visited nodes, the current node to explore, and the current set of edges.
        entry: Tuple[Set[Node], Node, List[Edge]] = ({node}, node, [])
        worklist = collections.deque([entry])
        paths: List[Path] = []
        while len(worklist) > 0:
            visited, cur_node, edges = worklist.popleft()
            for edge in graph.iter_edges(src=cur_node):
                dst = edge.dst
                if dst in visited or dst.entity in input_entities:
                    continue

                if dst.entity is output_entity:
                    paths.append((visited | {dst}, edges + [edge]))
                else:
                    worklist.append((visited | {dst}, dst, edges + [edge]))

        path_dict[node.entity].extend(paths)

    return path_dict
def equality_transitive_closure(graph: Graph,
                                equality_label: int,
                                join_nodes: Optional[Set[Node]] = None,
                                valid_combinations: Optional[Set[int]] = None):
    worklist = collections.deque(
        e for e in graph.iter_edges(label=equality_label))
    seen_edges = set(worklist)

    while len(worklist) > 0:
        edge_item = worklist.popleft()
        added = set()
        if join_nodes is None or edge_item.src in join_nodes:
            for e in graph.iter_edges(dst=edge_item.src):
                if valid_combinations is None or e.label in valid_combinations:
                    added.add(Edge(e.src, edge_item.dst, e.label))

        if join_nodes is None or edge_item.dst in join_nodes:
            for e in graph.iter_edges(src=edge_item.dst):
                if valid_combinations is None or e.label in valid_combinations:
                    added.add(Edge(edge_item.src, e.dst, e.label))

        added -= seen_edges
        if len(added) > 0:
            graph.add_nodes_and_edges(edges=added)
            for e in added:
                if e.label == equality_label:
                    worklist.append(e)

                seen_edges.add(e)
Beispiel #4
0
    def get_strengthening_constraint(self, input_graph: Graph) -> Graph:
        strengthened_input_graph = Graph()
        for constraint in self.constraints.values():
            strengthened_input_graph.merge(
                constraint.get_strengthening_constraint(input_graph))

        return strengthened_input_graph
    def _get_canonical_query_plans(self,
                                   sequence: List[str],
                                   transformation: Transformation) -> Dict[Skeleton, Set[QueryPlan]]:

        meta_plan = self._meta_plans[transformation]
        blueprint_item_lists = self._get_blueprint_item_lists(sequence,
                                                              meta_plan,
                                                              _d=len(sequence))
        canonical_transformation = meta_plan.canonical_transformations[len(sequence)]
        mapping = next(canonical_transformation.get_subgraph_mappings(transformation))

        skeletons_to_plans: Dict[Skeleton, Set[QueryPlan]] = collections.defaultdict(set)

        for blueprint_item_list in blueprint_item_lists:
            #  Breakdown the overall transformation in terms of the unit plans contained in the blueprint items.
            #  Store the connections between them as a graph mapping.
            connections = GraphMapping()
            connections.update(mapping)
            graph = Graph()
            for item in blueprint_item_list:
                graph.merge(item.unit.transformation)
                connections = connections.apply_mapping(item.canonical_mapping, only_keys=True)

                if item.border_mapping:
                    connections.update(item.border_mapping)
                    connections = connections.apply_mapping(connections, only_values=True)

            #  Assemble the query plan
            query_plan = QueryPlan(transformation,
                                   units=[item.unit.transformation for item in blueprint_item_list],
                                   all_connections=connections,
                                   strengthenings=[item.unit.strengthenings[component_name]
                                                   for component_name, item in zip(sequence, blueprint_item_list)])

            #  Obtain the skeletons for which this query plan would work.
            #  External inputs are negative integers. See gauss.synthesis.skeleton for details.
            ent_to_idx = {ent: -idx for idx, ent in enumerate(transformation.get_input_entities(), 1)}
            possible_arg_ints_lists = []
            for component_name, (idx, item) in zip(sequence, enumerate(blueprint_item_list, 1)):
                #  Get the mapped entities to the inputs of this unit's transformation, and look up their idx values.
                arg_ints = [ent_to_idx[connections.m_ent[ent]] for ent in item.unit.transformation.get_input_entities()]

                #  Get all the permutations as well.
                arg_ints_list = [arg_num_mapping.apply_list(arg_ints)
                                 for arg_num_mapping in item.unit.component_entries[component_name].argument_mappings]

                possible_arg_ints_lists.append(arg_ints_list)
                ent_to_idx[item.unit.transformation.get_output_entity()] = idx

            #  The skeletons are then simply the all the combinations
            for arg_ints_list in itertools.product(*possible_arg_ints_lists):
                skeleton = Skeleton(list(zip(sequence, arg_ints_list)))
                skeletons_to_plans[skeleton].add(query_plan)

        return skeletons_to_plans
Beispiel #6
0
def _get_explanation_expr_str(
        graph: Graph, node: Node, node_label_dict: Dict[int, str],
        edge_label_dict: Dict[int, str]) -> Optional[str]:
    args = collections.defaultdict(list)
    for edge in graph.iter_edges(dst=node):
        label = edge_label_dict[edge.label]
        if label.startswith("CUM") or label == "COLUMN" or label == "ROW":
            continue

        if node_label_dict[edge.src.label] == "INTERM":
            args[label].append(
                _get_explanation_expr_str(graph, edge.src, node_label_dict,
                                          edge_label_dict))
        else:
            args[label].append(str(edge.src.value))

    if len(args) == 0:
        return None

    if "EQUAL" in args:
        return args["EQUAL"][0]

    key = next(iter(args.keys()))
    arg_str = ", ".join(args[key])
    return f"({key.upper()}({arg_str}))"
    def _adapt_query_plan(self, plan: QueryPlan, query: Query):
        #  The given plan is assumed to be a canonical query plan.
        #  Also, the transformation in plan should be the same as the transformation in query. This should be
        #  guaranteed by construction of the query plan.

        #  Create a fresh copy of the plan where the transformation is the actual subgraph.
        adapted_plan = plan.deepcopy().adapt(new_transformation=query.subgraph,
                                             mapping_old_to_new=query.mapping)

        equality_label = self._domain.get_equality_edge_label()

        if equality_label is None:
            return None

        #  Propagate known values amongst the nodes with the equality edge
        influence: Dict[Node, Set[Node]] = collections.defaultdict(set)
        for k, v in adapted_plan.all_connections.m_node.items():
            influence[v].add(k)

        seen = set()
        worklist = collections.deque(adapted_plan.transformation.iter_nodes())
        while len(worklist) > 0:
            node = worklist.popleft()
            if node in seen:
                continue

            seen.add(node)
            if node.value is SYMBOLIC_VALUE:
                continue

            #  Connected nodes inherit the value
            for n in influence[node]:
                if n.value is SYMBOLIC_VALUE:
                    n.value = node.value
                    worklist.append(n)

            #  Equality edges with src and dst as node also propagate the values
            for unit in adapted_plan.units:
                for e in unit.iter_edges(src=node, label=equality_label):
                    if e.dst.value is SYMBOLIC_VALUE:
                        e.dst.value = node.value
                        worklist.append(e.dst)

                for e in unit.iter_edges(dst=node, label=equality_label):
                    if e.src.value is SYMBOLIC_VALUE:
                        e.src.value = node.value
                        worklist.append(e.src)

        #  We may have wrecked the internal data-structures of the unit transformations by changing values directly.
        #  Create shallow copies which force a rebuild
        adapted_plan.units = [Transformation.build_from_graph(Graph.from_nodes_and_edges(unit.get_all_nodes(),
                                                                                         unit.get_all_edges()),
                                                              unit.get_input_entities(),
                                                              unit.get_output_entity())
                              for unit in adapted_plan.units]

        return adapted_plan
Beispiel #8
0
def create_symbolic_copy(graph: Graph) -> Tuple[Graph, GraphMapping]:
    mapping = GraphMapping()
    for entity in graph.iter_entities():
        mapping.m_ent[entity] = Entity(value=SYMBOLIC_VALUE)

    for node in graph.iter_nodes():
        mapping.m_node[node] = Node(label=node.label,
                                    entity=mapping.m_ent[node.entity],
                                    value=SYMBOLIC_VALUE)

    new_graph = Graph.from_nodes_and_edges(nodes=set(mapping.m_node.values()),
                                           edges={
                                               Edge(src=mapping.m_node[e.src],
                                                    dst=mapping.m_node[e.dst],
                                                    label=e.label)
                                               for e in graph.iter_edges()
                                           })

    return new_graph, mapping
Beispiel #9
0
    def get_strengthening_constraint(self, input_graph: Graph) -> Graph:
        common_tags = None
        common_edges = None
        common_tagged_edges = None

        for plan, partial_mappings in self._checks.items():
            strengthening, s_mapping = plan.strengthenings[self.depth]
            s_edges = strengthening.get_all_edges()
            s_tagged_edges = set(strengthening.iter_tagged_edges())

            plan_tags = set(strengthening.iter_tags())
            plan_tagged_edges = None
            plan_edges = None
            for partial_mapping in partial_mappings:
                mapping_wrt_inp_graph = partial_mapping.apply_mapping(
                    s_mapping, only_keys=True)
                for m in strengthening.get_subgraph_mappings(
                        input_graph, partial_mapping=mapping_wrt_inp_graph):
                    if plan_tagged_edges is None:
                        plan_tagged_edges = {
                            TaggedEdge(m.m_node[e.src], m.m_node[e.dst], e.tag)
                            for e in s_tagged_edges
                        }
                        plan_edges = {
                            Edge(m.m_node[e.src], m.m_node[e.dst], e.label)
                            for e in s_edges
                        }
                    else:
                        plan_tagged_edges.intersection_update(
                            TaggedEdge(m.m_node[e.src], m.m_node[e.dst], e.tag)
                            for e in s_tagged_edges)
                        plan_edges.intersection_update(
                            Edge(m.m_node[e.src], m.m_node[e.dst], e.label)
                            for e in s_edges)

            if common_tags is None:
                common_tags = plan_tags or set()
                common_tagged_edges = plan_tagged_edges or set()
                common_edges = plan_edges or set()

            else:
                common_tags.intersection_update(plan_tags or set())
                common_tagged_edges.intersection_update(plan_tagged_edges
                                                        or set())
                common_edges.intersection_update(plan_edges or set())

        nodes = {e.src for e in common_tagged_edges}
        nodes.update(e.dst for e in common_tagged_edges)
        nodes.update(e.src for e in common_edges)
        nodes.update(e.dst for e in common_edges)

        result = Graph.from_nodes_and_edges(nodes=nodes, edges=common_edges)
        result.add_tagged_edges(common_tagged_edges)
        result.add_tags(common_tags)
        return result
    def test_1(self):
        from gauss.graphs.python.subgraph import _get_candidate_mappings
        g1 = Graph()
        n1 = Node(label=0, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE)
        n2 = Node(label=1, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE)
        g1.add_node(n1)
        g1.add_node(n2)
        g1.add_edge(Edge(n1, n2, 0))

        g2 = Graph()
        n3 = Node(label=0, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE)
        g2.add_node(n3)

        m1 = _get_candidate_mappings(g2, g1)
        m2 = _get_candidate_mappings(g1, g2)

        self.assertIsNotNone(m1)
        self.assertIsNone(m2)
        self.assertIn(n3, m1.m_node)
        self.assertSetEqual({n1}, m1.m_node[n3])
    def _extract_unit_plans(self, component_name: str, witness_entry: WitnessEntry):
        graph = witness_entry.graph
        input_entities = witness_entry.get_input_entities()
        output_entity = witness_entry.get_output_entity()

        placeholder_dict = {}
        #  A placeholder node can represent any node belonging to an entity.
        #  This helps coalesce equivalent query plans.
        for ent in itertools.chain(input_entities, [output_entity]):
            placeholder_dict[ent] = PlaceholderNode(entity=ent)

        path_dict: Dict[Entity, List[Path]] = extract_paths(graph, input_entities, output_entity)

        #  Find queries by taking exactly one path, and placeholder nodes for the
        #  input entities not present in the path.
        for path_ent, paths in path_dict.items():
            remaining_entities = [ent for ent in input_entities if ent is not path_ent]
            for path in paths:
                path_nodes, path_edges = path
                nodes = list(path_nodes) + [placeholder_dict[ent] for ent in remaining_entities]
                edges = path_edges

                #  Get the corresponding subgraph.
                subgraph = Graph.from_nodes_and_edges(nodes=set(nodes), edges=set(edges))
                self._record_unit_meta_query_plan(component_name, subgraph, input_entities, output_entity)

        #  Include empty transformations to help with evolution (the second stage).
        #  An empty transformation plays the role of a *wildcard* plan, that is, any transformation is valid.
        #  We add all empty transformations with input nodes spanning all distinct input node types
        #  (including placeholders) and the output being the placeholder node.
        label_canonical_node: Dict[Entity, Dict[int, Node]] = collections.defaultdict(dict)
        for ent in input_entities:
            for node in graph.iter_nodes(entity=ent):
                label_canonical_node[ent][node.label] = node

        canonical_nodes: List[List[Node]] = [list(v.values()) for v in label_canonical_node.values()]
        canonical_nodes.append([placeholder_dict[output_entity]])
        for subgraph_nodes in itertools.product(*canonical_nodes):
            subgraph = Graph.from_nodes_and_edges(nodes=subgraph_nodes, edges=[])
            self._record_unit_meta_query_plan(component_name, subgraph, input_entities, output_entity,
                                              empty=True)
Beispiel #12
0
def _get_involved_nodes(graph: Graph, node: Node, node_label_dict: Dict[int,
                                                                        str],
                        edge_label_dict: Dict[int, str]) -> Set[Node]:
    result = set()
    for edge in graph.iter_edges(dst=node):
        label = edge_label_dict[edge.label]
        if label.startswith("CUM") or label == "COLUMN" or label == "ROW":
            continue

        result.add(edge.src)
        result.update(
            _get_involved_nodes(graph, edge.src, node_label_dict,
                                edge_label_dict))

    return result
Beispiel #13
0
    def prepare_solution(self, output: Any, output_graph: Graph) -> Solution:
        if self.problem.input_names is not None:
            int_to_names: Dict[int, str] = {
                -idx: name
                for idx, name in enumerate(self.problem.input_names, 1)
            }
        else:
            int_to_names: Dict[int, str] = {
                -idx: f"inp{idx}"
                for idx in range(1,
                                 len(self.problem.inputs) + 1)
            }

        int_to_names[self.skeleton.length] = self.problem.output_name

        graph = Graph()
        for g in self.graphs:
            graph.merge(g)

        #  Perform transitive closure w.r.t the nodes corresponding to the intermediate outputs
        #  and take the induced subgraph containing all nodes except those
        if self.skeleton.length > 1:
            join_nodes = set.union(*(set(self.int_to_graph[i].iter_nodes())
                                     for i in range(1, self.skeleton.length)))
            self.domain.perform_transitive_closure(graph,
                                                   join_nodes=join_nodes)
            graph = graph.induced_subgraph(keep_nodes=set(graph.iter_nodes()) -
                                           join_nodes)

        return self.domain.prepare_solution(
            self.problem.inputs,
            output,
            graph,
            self.problem.graph_inputs,
            output_graph,
            self.enumeration_items,
            arguments=[arg_ints for (comp_name, arg_ints) in self.skeleton],
            int_to_names=int_to_names,
            int_to_obj=self.int_to_val)
    def test_4(self):
        from gauss.graphs.python.subgraph import _get_candidate_mappings
        query = Graph()
        n11 = Node(label=0, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE)
        n12 = Node(label=1, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE)
        n13 = Node(label=2, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE)
        query.add_node(n11)
        query.add_node(n12)
        query.add_node(n13)
        query.add_edge(Edge(n11, n12, 0))
        query.add_edge(Edge(n11, n13, 1))

        graph = Graph()
        n1 = Node(label=0, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE)
        n2 = Node(label=1, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE)
        n3 = Node(label=3, entity=DEFAULT_ENTITY,
                  value=SYMBOLIC_VALUE)  # 3, not 2
        n4 = Node(label=0, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE)
        n5 = Node(label=1, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE)
        n6 = Node(label=2, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE)
        n7 = Node(label=2, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE)
        graph.add_node(n1)
        graph.add_node(n2)
        graph.add_node(n3)
        graph.add_node(n4)
        graph.add_node(n5)
        graph.add_node(n6)
        graph.add_node(n7)
        graph.add_edge(Edge(n1, n2, 0))
        graph.add_edge(Edge(n1, n3, 1))
        graph.add_edge(Edge(n4, n5, 0))
        graph.add_edge(Edge(n4, n6, 1))
        graph.add_edge(Edge(n4, n7, 1))

        m = _get_candidate_mappings(query, graph)
        self.assertIsNotNone(m)
    def test_3(self):
        g1 = Graph()
        n1 = Node(label=0, entity=DEFAULT_ENTITY, value=10)
        n2 = Node(label=0, entity=DEFAULT_ENTITY, value=20)
        n3 = Node(label=1, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE)
        n4 = Node(label=1, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE)
        n5 = Node(label=2, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE)
        n6 = Node(label=2, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE)

        g1.add_node(n1)
        g1.add_node(n2)
        g1.add_node(n3)
        g1.add_node(n4)
        g1.add_node(n5)
        g1.add_node(n6)

        g1.add_edge(Edge(n1, n3, 0))
        g1.add_edge(Edge(n3, n5, 1))
        g1.add_edge(Edge(n2, n4, 0))
        g1.add_edge(Edge(n4, n6, 1))

        g2 = Graph()
        n21 = Node(label=0, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE)
        n22 = Node(label=1, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE)
        n23 = Node(label=2, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE)
        g2.add_node(n21)
        g2.add_node(n22)
        g2.add_node(n23)
        g2.add_edge(Edge(n21, n22, 0))
        g2.add_edge(Edge(n22, n23, 1))

        mappings_21 = list(g2.get_subgraph_mappings(g1))
        self.assertEqual(2, len(mappings_21))

        g3 = Graph()
        n31 = Node(label=0, entity=DEFAULT_ENTITY, value=10)
        n32 = Node(label=1, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE)
        n33 = Node(label=2, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE)
        g3.add_node(n31)
        g3.add_node(n32)
        g3.add_node(n33)
        g3.add_edge(Edge(n31, n32, 0))
        g3.add_edge(Edge(n32, n33, 1))

        mappings_31 = list(g3.get_subgraph_mappings(g1))
        self.assertEqual(1, len(mappings_31))
Beispiel #16
0
    def _solve_for_skeleton_recursive(
            self,
            problem: SynthesisProblem,
            skeleton: Skeleton,
            query_plans: QueryPlans,
            context: SolverContext,
            _depth: int = 0) -> Iterator[Tuple[Any, Graph]]:

        domain = self._domain
        component_name, arg_ints = skeleton[_depth]
        inputs, g_inputs = context.get_arguments(depth=_depth)
        inp_entities = [
            next(iter(g_inp.iter_entities())) for g_inp in g_inputs
        ]
        inp_graph = Graph()
        for g_inp in g_inputs:
            inp_graph.merge(g_inp)

        #  Get the strengthening constraint for this depth.
        #  Specifically, for every query, get the intersection of the strengthenings of all the query plans for that
        #  query at this particular depth. Then take the union of all of these.
        #  In other words, this strengthening constraint is a graph containing the nodes, edges, tags and tagged edges
        #  that must be satisfied by the graph containing the inputs, that is `inp_graph` in this context.
        #  This constraint can then be used by the `enumerate` procedure to speed up the search.
        strengthening_constraint: Graph = context.waypoints[
            _depth].get_strengthening_constraint(inp_graph)
        enumeration_item: EnumerationItem
        for enumeration_item in domain.enumerate(
                component_name=component_name,
                inputs=inputs,
                g_inputs=g_inputs,
                constants=problem.constants,
                strengthening_constraint=strengthening_constraint):

            output = enumeration_item.output
            c_graph = enumeration_item.graph
            o_graph = enumeration_item.o_graph

            # for g in g_inputs:
            #     assert set(g.iter_nodes()).issubset(set(c_graph.iter_nodes()))

            if problem.timeout is not None and time.time(
            ) - self._time_start > problem.timeout:
                raise TimeoutError("Exceeded time limit.")

            out_entity = next(iter(o_graph.iter_entities()))
            c_graph.add_node(PlaceholderNode(entity=out_entity))
            c_graph = Transformation.build_from_graph(
                c_graph, input_entities=inp_entities, output_entity=out_entity)

            #  Check if the returned graph is consistent with the query plans.
            if not context.check_validity(c_graph, depth=_depth):
                continue

            #  Prepare for the next round.
            context.step(output=output,
                         graph=c_graph,
                         output_graph=o_graph,
                         enumeration_item=enumeration_item,
                         depth=_depth)

            if _depth == skeleton.length - 1:
                #  This was the last component, prepare the program and return it along with the final output and graph.
                yield output, o_graph

            else:
                #  Move on to the next component.
                yield from self._solve_for_skeleton_recursive(problem,
                                                              skeleton,
                                                              query_plans,
                                                              context,
                                                              _depth=_depth +
                                                              1)
    def test_5(self):
        #  Stress-tests the intelligence of back-tracking
        query = Graph()
        n11 = Node(label=0, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE)
        n12 = Node(label=0, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE)
        n13 = Node(label=1, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE)
        n14 = Node(label=1, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE)
        n15 = Node(label=2, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE)
        n16 = Node(label=2, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE)
        query.add_node(n11)
        query.add_node(n12)
        query.add_node(n13)
        query.add_node(n14)
        query.add_node(n15)
        query.add_node(n16)
        query.add_edge(Edge(n13, n15, 0))
        query.add_edge(Edge(n13, n15, 1))
        query.add_edge(Edge(n14, n16, 0))
        query.add_edge(Edge(n14, n16, 1))

        graph = Graph()
        n1 = Node(label=0, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE)
        n2 = Node(label=0, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE)
        n3 = Node(label=1, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE)
        n4 = Node(label=1, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE)
        n5 = Node(label=2, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE)
        n6 = Node(label=2, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE)
        n7 = Node(label=0, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE)
        n8 = Node(label=0, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE)
        n9 = Node(label=0, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE)
        n10 = Node(label=0, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE)
        graph.add_node(n1)
        graph.add_node(n2)
        graph.add_node(n3)
        graph.add_node(n4)
        graph.add_node(n5)
        graph.add_node(n6)
        graph.add_node(n7)
        graph.add_node(n8)
        graph.add_node(n9)
        graph.add_node(n10)
        graph.add_edge(Edge(n3, n5, 0))
        graph.add_edge(Edge(n3, n6, 1))
        graph.add_edge(Edge(n4, n5, 1))
        graph.add_edge(Edge(n4, n6, 0))

        mappings = list(
            query.get_subgraph_mappings(
                graph, _worklist_order=[n11, n12, n13, n14, n15, n16]))
        self.assertEqual(0, len(mappings))
    def test_4(self):
        query = Graph()
        n11 = Node(label=0, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE)
        n12 = Node(label=1, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE)
        n13 = Node(label=2, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE)
        query.add_node(n11)
        query.add_node(n12)
        query.add_node(n13)
        query.add_edge(Edge(n11, n12, 0))
        query.add_edge(Edge(n11, n13, 1))

        graph = Graph()
        n1 = Node(label=0, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE)
        n2 = Node(label=1, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE)
        n3 = Node(label=3, entity=DEFAULT_ENTITY,
                  value=SYMBOLIC_VALUE)  # 3, not 2
        n4 = Node(label=0, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE)
        n5 = Node(label=1, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE)
        n6 = Node(label=2, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE)
        n7 = Node(label=2, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE)
        graph.add_node(n1)
        graph.add_node(n2)
        graph.add_node(n3)
        graph.add_node(n4)
        graph.add_node(n5)
        graph.add_node(n6)
        graph.add_node(n7)
        graph.add_edge(Edge(n1, n2, 0))
        graph.add_edge(Edge(n1, n3, 1))
        graph.add_edge(Edge(n4, n5, 0))
        graph.add_edge(Edge(n4, n6, 1))
        graph.add_edge(Edge(n4, n7, 1))

        mappings = list(query.get_subgraph_mappings(graph))
        self.assertEqual(2, len(mappings))
    def _evolve_meta_plan(self,
                          plan: MetaQueryPlan,
                          depth: int,
                          nlabel_to_unit_plan: Dict[int, Set[UnitMetaPlan]]):

        #  Replace an input node of plan with the output of a unit meta-plan (contained in nlabel_to_meta_plan).
        #  Thus we extend an existing plan with the output of exactly one component, thus increasing the program
        #  depth by exactly one.

        plan_transformation: Transformation = plan.canonical_transformations[depth - 1]
        all_input_nodes: Set[Node] = set(plan_transformation.get_input_nodes())
        for inp_node in all_input_nodes:
            #  Even if it is a placeholder node, it can only be extended via the empty transform. This makes sense
            #  as no matter what the extension is, it will never "influence" the final output, as there is no path
            #  between a placeholder node and the output node. This helps reduce the size of the collection of
            #  meta query-plans by a large margin.

            remaining_inputs = all_input_nodes - {inp_node}
            for extender in nlabel_to_unit_plan[int(inp_node.label)]:
                #  We can map the other inputs to the inputs of the extender plan. They can also be distinct
                #  inputs of their own. The total number of inputs should, however, be less than max_inputs.
                #  For the remaining inputs, if they are a placeholder node, they can be mapped to *any* of the
                #  input nodes of the extender plan, regardless of the label.
                nlabel_to_inp_node: Dict[int, Set[Node]] = collections.defaultdict(set)
                extender_inputs: Set[Node] = set(extender.transformation.get_input_nodes())
                #  Guaranteed to be a single output node by construction.
                extender_output: Node = next(extender.transformation.get_output_nodes())

                for inp in extender_inputs:
                    nlabel_to_inp_node[inp.label].add(inp)

                nlabel_to_inp_node[extender_output.label].add(extender_output)

                mapping_possibilities: Dict[Node, Set[Node]] = {inp_node: {extender_output}}
                for inp in remaining_inputs:
                    if inp.label == PLACEHOLDER_LABEL:
                        mapping_possibilities[inp] = extender_inputs | {extender_output, inp}
                    else:
                        mapping_possibilities[inp] = nlabel_to_inp_node[inp.label] | {inp}

                node_list = list(all_input_nodes)
                for border_node_mapping in itertools.product(*[mapping_possibilities[n] for n in node_list]):
                    border_node_mapping: Dict[Node, Node] = dict(zip(node_list, border_node_mapping))

                    border_mapping = GraphMapping(m_ent={k.entity: v.entity for k, v in border_node_mapping.items()},
                                                  m_node=border_node_mapping.copy())

                    #  Create a deepcopy of the extender for safety
                    copied_extender, copy_mapping = extender.deepcopy()
                    copied_extender_inputs: Set[Node] = set(copied_extender.transformation.get_input_nodes())
                    copied_extender_output: Node = next(copied_extender.transformation.get_output_nodes())
                    border_mapping = border_mapping.apply_mapping(copy_mapping, only_values=True)
                    assert border_mapping.m_node != border_node_mapping

                    #  The new inputs are the inputs of extender, plus the nodes of the current plan
                    #  which were not bound to any of the inputs of extender.
                    #  We also decide the order of the nodes/entities right now.
                    new_input_nodes: List[Node] = []
                    for inp in plan_transformation.get_input_nodes():
                        mapped = border_mapping.m_node[inp]
                        if mapped is inp:
                            new_input_nodes.append(inp)
                        elif mapped is copied_extender_output:
                            new_input_nodes.extend(i for i in copied_extender.transformation.get_input_nodes()
                                                   if i not in new_input_nodes)
                        elif mapped not in new_input_nodes:
                            assert mapped in copied_extender_inputs
                            new_input_nodes.append(mapped)

                    new_input_entities = [n.entity for n in new_input_nodes]
                    #  Every entity is associated with one node so the following should hold true.
                    assert len(new_input_entities) == len(set(new_input_entities))

                    if len(new_input_entities) > self._config.max_inputs:
                        continue

                    new_output_entity = plan_transformation.get_output_entity()

                    #  Obtain the transformation by establishing common edges between the node pairs in
                    #  border_node_mapping, taking the transitive closure w.r.t equality, and finally the
                    #  induced subgraph by removing the input nodes of the current plan.
                    joint_graph = Graph.from_graph(copied_extender.transformation)
                    joint_graph.merge(plan_transformation)

                    final_border_mapping = GraphMapping()
                    for k, v in border_mapping.m_node.items():
                        if k is not v:
                            final_border_mapping.m_node[k] = v
                            final_border_mapping.m_ent[k.entity] = v.entity
                            for edge in plan_transformation.iter_edges(src=k):
                                joint_graph.add_edge(Edge(v, edge.dst, edge.label))
                            for edge in plan_transformation.iter_edges(dst=k):
                                joint_graph.add_edge(Edge(edge.src, v, edge.label))

                    join_nodes: Set[Node] = set(all_input_nodes)
                    join_nodes.difference_update(new_input_nodes)
                    join_nodes.add(copied_extender_output)
                    self._domain.perform_transitive_closure(joint_graph, join_nodes=join_nodes)

                    keep_nodes = set(joint_graph.iter_nodes())
                    keep_nodes.difference_update(join_nodes)
                    new_transformation_subgraph = joint_graph.induced_subgraph(keep_nodes=keep_nodes)
                    new_transformation = Transformation.build_from_graph(new_transformation_subgraph,
                                                                         input_entities=new_input_entities,
                                                                         output_entity=new_output_entity)

                    #  Record the transformation and how it was obtained.
                    if new_transformation not in self._meta_plans:
                        #  The transformation was never seen before.
                        blueprint = collections.defaultdict(lambda: collections.defaultdict(list))
                        meta_plan = MetaQueryPlan(transformation=new_transformation.deepcopy()[0],
                                                  blueprint=blueprint)
                        self._meta_plans[new_transformation] = meta_plan
                    else:
                        meta_plan = self._meta_plans[new_transformation]

                    if depth not in meta_plan.canonical_transformations:
                        copy, mapping = new_transformation.deepcopy()
                        meta_plan.canonical_transformations[depth] = copy
                        # mapping = mapping.slice(nodes=set(copied_extender.transformation.iter_nodes()))
                        mapping = mapping.reverse()

                    else:
                        canonical = meta_plan.canonical_transformations[depth]
                        mapping = next(new_transformation.get_subgraph_mappings(canonical))
                        # mapping = mapping.slice(nodes=set(copied_extender.transformation.iter_nodes()))
                        mapping = mapping.reverse()

                    bp_item = MetaQueryPlan.BlueprintItem(depth=depth,
                                                          unit=copied_extender,
                                                          canonical_mapping=mapping,
                                                          sub_plan=plan,
                                                          border_mapping=final_border_mapping)

                    for c in copied_extender.component_entries:
                        meta_plan.blueprint[depth][c].append(bp_item)
    def test_3(self):
        from gauss.graphs.python.subgraph import _get_candidate_mappings
        g1 = Graph()
        n1 = Node(label=0, entity=DEFAULT_ENTITY, value=10)
        n2 = Node(label=0, entity=DEFAULT_ENTITY, value=20)
        n3 = Node(label=1, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE)
        n4 = Node(label=1, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE)
        n5 = Node(label=2, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE)
        n6 = Node(label=2, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE)

        g1.add_node(n1)
        g1.add_node(n2)
        g1.add_node(n3)
        g1.add_node(n4)
        g1.add_node(n5)
        g1.add_node(n6)

        g1.add_edge(Edge(n1, n3, 0))
        g1.add_edge(Edge(n3, n5, 1))
        g1.add_edge(Edge(n2, n4, 0))
        g1.add_edge(Edge(n4, n6, 1))

        g2 = Graph()
        n21 = Node(label=0, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE)
        n22 = Node(label=1, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE)
        n23 = Node(label=2, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE)
        g2.add_node(n21)
        g2.add_node(n22)
        g2.add_node(n23)
        g2.add_edge(Edge(n21, n22, 0))
        g2.add_edge(Edge(n22, n23, 1))

        m21 = _get_candidate_mappings(g2, g1)
        self.assertIsNotNone(m21)
        self.assertSetEqual({n1, n2}, m21.m_node[n21])
        self.assertSetEqual({n3, n4}, m21.m_node[n22])
        self.assertSetEqual({n5, n6}, m21.m_node[n23])

        g3 = Graph()
        n31 = Node(label=0, entity=DEFAULT_ENTITY, value=10)
        n32 = Node(label=1, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE)
        n33 = Node(label=2, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE)
        g3.add_node(n31)
        g3.add_node(n32)
        g3.add_node(n33)
        g3.add_edge(Edge(n31, n32, 0))
        g3.add_edge(Edge(n32, n33, 1))

        m31 = _get_candidate_mappings(g3, g1)
        self.assertIsNotNone(m31)
        self.assertSetEqual({n1}, m31.m_node[n31])
        self.assertSetEqual({n3}, m31.m_node[n32])
        self.assertSetEqual({n5}, m31.m_node[n33])

        g4 = Graph()
        n41 = Node(label=0, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE)
        n42 = Node(label=1, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE)
        n43 = Node(label=2, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE)
        g4.add_node(n41)
        g4.add_node(n42)
        g4.add_node(n43)
        g4.add_edge(Edge(n41, n42, 0))
        g4.add_edge(Edge(n42, n43, 1))

        m41 = _get_candidate_mappings(g4, g1, GraphMapping(m_node={n41: n1}))
        self.assertIsNotNone(m41)
        self.assertSetEqual({n1}, m41.m_node[n41])
        self.assertSetEqual({n3}, m41.m_node[n42])
        self.assertSetEqual({n5}, m41.m_node[n43])
Beispiel #21
0
    def test_greatest_common_universal_subgraph_1(self):
        g1 = Graph()
        n1 = Node(label=0, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE)
        n2 = Node(label=1, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE)
        n3 = Node(label=2, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE)
        n4 = Node(label=3, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE)

        g1.add_nodes_and_edges(nodes=[n1, n2, n3, n4])
        g1.add_tags(["TAG_1", "TAG_2"])
        g1.add_tagged_edges(
            [TaggedEdge(n2, n2, "TAG_L1"),
             TaggedEdge(n3, n3, "TAG_L2")])

        #  Linear chain from n1 to n2 and n2 to n3 and n3 to n4
        g1.add_edge(Edge(src=n1, dst=n2, label=10))
        g1.add_edge(Edge(src=n2, dst=n3, label=11))
        g1.add_edge(Edge(src=n3, dst=n4, label=12))

        g2 = Graph()
        n1 = Node(label=0, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE)
        n2 = Node(label=1, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE)
        n3 = Node(label=2, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE)
        n4 = Node(label=2, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE)
        n5 = Node(label=2, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE)
        n6 = Node(label=2, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE)
        n7 = Node(label=2, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE)
        n8 = Node(label=3, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE)

        g2.add_nodes_and_edges(nodes=[n1, n2, n3, n4, n5, n6, n7, n8])
        g2.add_tags(["TAG_2", "TAG_3"])
        g2.add_tagged_edges(
            [TaggedEdge(n2, n2, "TAG_L1"),
             TaggedEdge(n3, n3, "TAG_L2")])

        #  Only one of label=2 has an edge to a label=3
        g2.add_edge(Edge(src=n1, dst=n2, label=10))
        g2.add_edge(Edge(src=n2, dst=n3, label=11))
        g2.add_edge(Edge(src=n2, dst=n4, label=11))
        g2.add_edge(Edge(src=n2, dst=n5, label=11))
        g2.add_edge(Edge(src=n2, dst=n6, label=11))
        g2.add_edge(Edge(src=n2, dst=n7, label=11))
        g2.add_edge(Edge(src=n7, dst=n8, label=12))

        query = Graph()
        n1 = Node(label=0, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE)
        n2 = Node(label=1, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE)

        query.add_nodes_and_edges(nodes=[n1, n2])
        query.add_edge(Edge(n1, n2, 10))

        supergraph, mapping = query.get_greatest_common_universal_supergraph(
            [g1])

        #  We expect the supergraph to be equivalent to g1
        self.assertEqual(3, supergraph.get_num_edges())
        self.assertEqual(4, supergraph.get_num_nodes())
        self.assertSetEqual({0, 1, 2, 3},
                            {n.label
                             for n in supergraph.iter_nodes()})
        self.assertSetEqual({10, 11, 12},
                            {e.label
                             for e in supergraph.iter_edges()})
        self.assertSetEqual({"TAG_1", "TAG_2"}, set(supergraph.iter_tags()))
        self.assertEqual({"TAG_L1", "TAG_L2"},
                         {e.tag
                          for e in supergraph.iter_tagged_edges()})
        for node in mapping.m_node:
            self.assertIn(node, query.get_all_nodes())

        supergraph, mapping = query.get_greatest_common_universal_supergraph(
            [g1, g2])

        #  We expect the supergraph to be the linear chain 0 to 1 and 1 to 2
        self.assertEqual(2, supergraph.get_num_edges())
        self.assertEqual(3, supergraph.get_num_nodes())
        self.assertSetEqual({0, 1, 2},
                            {n.label
                             for n in supergraph.iter_nodes()})
        self.assertSetEqual({10, 11},
                            {e.label
                             for e in supergraph.iter_edges()})
        self.assertSetEqual({"TAG_2"}, set(supergraph.iter_tags()))
        self.assertEqual({"TAG_L1"},
                         {e.tag
                          for e in supergraph.iter_tagged_edges()})
        for node in mapping.m_node:
            self.assertIn(node, query.get_all_nodes())
    def build(cls, domain: SynthesisDomain, config: EngineConfig, witness_set: WitnessSet) -> 'QueryPlanner':
        q_planner = QueryPlanner(domain=domain, config=config)
        logger_color = logger.opt(colors=True)

        #  ----------------------------------------------------------------------------------------------------------  #
        #  Stage 1 : Build unit plans
        #  ----------------------------------------------------------------------------------------------------------  #
        logger.debug("Extracting meta query-plans of length 1 from individual components...")
        for component_name in domain.get_available_components():
            for witness_entry in witness_set.entries[component_name]:
                q_planner._extract_unit_plans(component_name, witness_entry)

        #  Log some information for post-mortem analysis if necessary
        #  Total plans found.
        logger_color.debug(f"Found <green>{len(q_planner._unit_plans)}</green> unit plans in total.")

        #  Plans found per component.
        components_to_units: Dict[str, List[UnitMetaPlan]] = collections.defaultdict(list)
        for unit in q_planner._unit_plans.values():
            for c in unit.component_entries:
                components_to_units[c].append(unit)

        with logutils.temporary_add(f"{config.path}/logs/query_planner/unit_plans.log",
                                    level="TRACE",
                                    only_sink=True) as logger_:
            logger_ = logger_.opt(raw=True)
            for component_name, units in components_to_units.items():
                logger_color.debug(f"Found <green>{len(units)}</green> unit plans from "
                                  f"<blue>{component_name}</blue>.")
                logger_.opt(colors=True).debug(f"Found <green>{len(units)}</green> unit plans from "
                                              f"<blue>{component_name}</blue>.")
                for unit in units:
                    logger_.trace(f"Component: {component_name}\n")
                    logger_.trace("-----------------\n")
                    logger_.trace("Transformation\n")
                    logger_.trace("-----------------\n")
                    logger_.trace(unit.transformation.to_str(domain))
                    logger_.trace("\n-----------------\n")
                    logger_.trace("Argument Mappings\n")
                    logger_.trace("-----------------\n")
                    logger_.trace(unit.component_entries[component_name].argument_mappings)
                    logger_.trace("\n========xxx========\n\n")

        #  ----------------------------------------------------------------------------------------------------------  #
        #  Stage 2 : Strengthen Unit Plans
        #  ----------------------------------------------------------------------------------------------------------  #
        logger_color.debug("Strengthening Unit Plans...")
        for unit_plan in debug_iter(list(q_planner._unit_plans.values()), desc='Strengthening Unit Plans'):
            query: Transformation = unit_plan.transformation
            for component_name in unit_plan.component_entries.keys():
                #  The witness set examples are guaranteed to have a placeholder node for each entity,
                #  so the use of placeholder nodes in transformations should not cause issues.
                if unit_plan.empty:
                    strengthened, mapping = query.deepcopy()
                    strengthened = Graph.from_graph(strengthened)

                else:
                    examples: List[Transformation] = witness_set.get_transformations(component_name)
                    strengthened, mapping = query.get_greatest_common_universal_supergraph(examples)

                inp_entities = set(query.get_input_entities())
                m_reverse = mapping.reverse()
                keep_nodes = [n for n in strengthened.iter_nodes()
                              if m_reverse.m_ent.get(n.entity, None) in inp_entities]

                strengthened = strengthened.induced_subgraph(keep_nodes=keep_nodes)

                unit_plan.strengthenings[component_name] = (strengthened, mapping)

        logger_color.debug(f"Strengthened <green>{len(q_planner._unit_plans)}</green> unit plans.")

        #  ----------------------------------------------------------------------------------------------------------  #
        #  Stage 3 : Combining unit plans to obtain query plans upto max-depth
        #  Note that these are not explicitly constructed. Rather, a recursive formulation is established
        #  to construct the query plans quickly during test-time.
        #  ----------------------------------------------------------------------------------------------------------  #

        #  Initialize the meta-plans from these unit plans
        logger.debug("Initializing meta-plans from unit-plans...")
        for transformation, unit_plan in q_planner._unit_plans.items():
            q_planner._meta_plans[transformation] = MetaQueryPlan.initialize_from_unit_plan(unit_plan.deepcopy()[0])

        logger_color.debug(f"Evolving meta query-plans upto a maximum length of "
                          f"<blue>{config.max_length}</blue>")

        #  Setup a mapping from the label of the output node of transformations
        #  to the meta plan for that transformation. This helps in quickly finding
        #  appropriate unit plans to extend a plan with.
        nlabel_to_unit_plan: Dict[int, Set[UnitMetaPlan]] = collections.defaultdict(set)
        for transformation, unit_plan in q_planner._unit_plans.items():
            #  Guaranteed to be a single node with that entity.
            output_node = next(transformation.get_output_nodes())
            nlabel_to_unit_plan[output_node.label].add(unit_plan)

        #  At every depth, the worklist will be the set of query plans with an entry for depth-1 in the blueprint.
        worklist: Set[MetaQueryPlan] = set(q_planner._meta_plans.values())
        for depth in range(2, config.max_length + 1):
            for unit in worklist:
                q_planner._evolve_meta_plan(unit, depth, nlabel_to_unit_plan)

            worklist = {plan for plan in q_planner._meta_plans.values() if depth in plan.blueprint}
            logger_color.debug(f"Found <green>{len(worklist)}</green> transformations in total "
                              f"at depth <blue>{depth}</blue>.")
            if len(worklist) == 0:
                break

        logger_color.debug(f"Found <green>{len(q_planner._meta_plans)}</green> transformations and meta "
                          f"query plans in total with <blue>max_depth={config.max_length}</blue>.")

        #  TODO : Log the transformations to a file.

        #  ----------------------------------------------------------------------------------------------------------  #
        #  Stage 4 : Setup auxiliary data-structures
        #  ----------------------------------------------------------------------------------------------------------  #

        #  Flattened blue-print items enable access to all the items agnostic of the component name.
        for plan in q_planner._meta_plans.values():
            plan.blueprint_items = collections.defaultdict(set)
            for depth, bp_dict in plan.blueprint.items():
                for items_list in bp_dict.values():
                    plan.blueprint_items[depth].update(items_list)

        #  Sequence tries help in quickly computing candidate sequences given a synthesis problem.
        logger_color.debug("Constructing Sequence Tries...")
        q_planner._compute_sequence_tries()
        logger_color.debug(f"Sequence Tries constructed for every depth.")

        return q_planner
    def init(self):
        domain = PandasLiteSynthesisDomain()
        replay = {k: iter(v) for k, v in self.replay_map.items()}
        graph = Graph()

        g_inputs = self._g_inputs = [
            self._convert_inp_to_graph(inp) for inp in self.inputs
        ]
        int_to_val = {-idx: inp for idx, inp in enumerate(self.inputs, 1)}
        int_to_graph = {-idx: g_inp for idx, g_inp in enumerate(g_inputs, 1)}

        #  Run the generators to extract the programs and graphs for each component call.
        #  Merge the individual graphs into the master graph.
        call_strs: List[str] = []
        for idx, (component_name, arg_ints) in enumerate(self.skeleton, 1):
            c_inputs = [int_to_val[i] for i in arg_ints]
            g_c_inputs = [int_to_graph[i] for i in arg_ints]
            output, program, c_graph, output_graph = next(
                domain.enumerate(component_name,
                                 c_inputs,
                                 g_c_inputs,
                                 replay=replay))
            int_to_val[idx] = output
            int_to_graph[idx] = output_graph
            call_strs.append(program)
            graph.merge(c_graph)

        #  Check that the final output is equivalent to the original output specified in the benchmark.
        assert domain.check_equivalent(self.output, int_to_val[self.skeleton.length]), \
            f"Generated output inconsistent with specified output in Pandas benchmark {self.b_id}"

        #  Retrofit the value of the output entity to the original output
        cur_out_entity = next(ent for ent in graph.iter_entities()
                              if ent.value is int_to_val[self.skeleton.length])
        cur_out_entity.value = self.output

        #  Perform transitive closure w.r.t the nodes corresponding to the intermediate outputs
        #  and take the induced subgraph containing all nodes except those
        if self.skeleton.length > 1:
            join_nodes = set.union(*(set(int_to_graph[i].iter_nodes())
                                     for i in range(1, self.skeleton.length)))
            domain.perform_transitive_closure(graph, join_nodes=join_nodes)
            intent_graph = graph.induced_subgraph(
                keep_nodes=set(graph.iter_nodes()) - join_nodes)
        else:
            intent_graph = graph

        self._graph = intent_graph

        #  Also construct the string representation of the ground-truth program.
        program_list: List[str] = []
        for depth, (call_str,
                    (component_name,
                     arg_ints)) in enumerate(zip(call_strs, self.skeleton), 1):
            arg_strs = [f"inp{-i}" if i < 0 else f"v{i}" for i in arg_ints]
            call_str = call_str.format(**{
                f"inp{idx}": arg_str
                for idx, arg_str in enumerate(arg_strs, 1)
            })
            if depth == self.skeleton.length:
                program_list.append(call_str)
            else:
                program_list.append(f"v{depth} = {call_str}")

        self.program = "\n".join(program_list)
Beispiel #24
0
def extract_queries(
        problem: SynthesisProblem) -> Dict[Transformation, List[Query]]:
    #  Stage 1 : Get all paths from one of the input nodes to an output node without any input/output node in between.
    graph = problem.graph
    inputs = problem.inputs
    output = problem.output

    entities = list(graph.iter_entities())
    input_entities = [
        next(ent for ent in entities if ent.value is inp) for inp in inputs
    ]
    output_entity = next(ent for ent in entities if ent.value is output)
    arg_numbering = {ent: idx for idx, ent in enumerate(input_entities)}

    placeholder_dict = {}
    #  A placeholder node can represent any node belonging to an entity.
    #  This helps coalesce equivalent query plans.
    for ent in itertools.chain(input_entities, [output_entity]):
        placeholder_dict[ent] = PlaceholderNode(entity=ent)

    path_dict: Dict[Entity,
                    List[Path]] = extract_paths(graph, input_entities,
                                                output_entity)

    canonical_transformations: Dict[Transformation, Transformation] = {}
    #  Only keep one transformation for a set of nodes as satisfying that query means satisfying all the others.
    seen: Set[Tuple[Transformation, FrozenSet[Node]]] = set()

    queries: Dict[Transformation, List[Query]] = collections.defaultdict(list)
    set_input_entities = set(input_entities)

    #  Find queries by taking exactly one path, and placeholder nodes for the
    #  input entities not present in the path.
    for path_ent, paths in path_dict.items():
        remaining_entities = [
            ent for ent in set_input_entities if ent is not path_ent
        ]
        for path in paths:
            path_nodes, path_edges = path
            nodes = list(path_nodes) + [
                placeholder_dict[ent] for ent in remaining_entities
            ]
            edges = path_edges

            #  Get the corresponding subgraph.
            subgraph = Graph.from_nodes_and_edges(nodes=set(nodes),
                                                  edges=set(edges))
            subgraph_transformation = Transformation.build_from_graph(
                subgraph,
                input_entities=input_entities,
                output_entity=output_entity)

            #  Compute the symbolic counter-part to group subgraphs together by the underlying transformation.
            symbolic_copy, mapping = create_symbolic_copy(subgraph)
            mapped_input_entities = [mapping.m_ent[i] for i in input_entities]
            mapped_output_entity = mapping.m_ent[output_entity]
            transformation = Transformation.build_from_graph(
                symbolic_copy,
                input_entities=mapped_input_entities,
                output_entity=mapped_output_entity)

            #  Check if the transformation was seen before
            if transformation not in canonical_transformations:
                canonical_transformations[transformation] = transformation
                seen.add((transformation,
                          frozenset(n for n in subgraph.iter_nodes()
                                    if n.entity in set_input_entities)))

                mapping = mapping.reverse()
                arg_number_mapping = ArgumentNumberMapping({
                    idx: arg_numbering[mapping.m_ent[ent]]
                    for idx, ent in enumerate(mapped_input_entities)
                })

                # We need a mapping from transformation to the subgraph.
                queries[transformation].append(
                    Query(transformation=transformation,
                          subgraph=subgraph_transformation,
                          mapping=mapping,
                          arg_number_mapping=arg_number_mapping))

            else:
                canonical = canonical_transformations[transformation]
                key = (transformation,
                       frozenset(n for n in subgraph.iter_nodes()
                                 if n.entity in set_input_entities))
                #  Check if the transformation was seen before with the same input nodes. If yes, continue. This is
                #  because if a graph satisfies the already seen transformation for these input nodes, it will satisfy
                #  this one as well. So no point in checking it.
                if key in seen:
                    continue

                seen.add(key)
                # We need a mapping from the canonical transformation to the subgraph.
                mapping = next(canonical.get_subgraph_mappings(
                    transformation)).apply_mapping(mapping.reverse())
                arg_number_mapping = ArgumentNumberMapping({
                    idx: arg_numbering[mapping.m_ent[ent]]
                    for idx, ent in enumerate(canonical.get_input_entities())
                })

                queries[canonical].append(
                    Query(transformation=canonical,
                          subgraph=subgraph_transformation,
                          mapping=mapping,
                          arg_number_mapping=arg_number_mapping))

    return queries
    def test_2(self):
        g1 = Graph()
        n1 = Node(label=0, entity=DEFAULT_ENTITY, value=10)
        n2 = Node(label=1, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE)
        g1.add_node(n1)
        g1.add_node(n2)
        g1.add_edge(Edge(n1, n2, 0))

        g2 = Graph()
        n3 = Node(label=0, entity=DEFAULT_ENTITY, value=SYMBOLIC_VALUE)
        g2.add_node(n3)

        g3 = Graph()
        n4 = Node(label=0, entity=DEFAULT_ENTITY, value=20)
        g3.add_node(n4)

        mappings_21 = list(g2.get_subgraph_mappings(g1))
        mappings_31 = list(g3.get_subgraph_mappings(g1))

        self.assertEqual(1, len(mappings_21))
        self.assertEqual(mappings_21[0].m_node[n3], n1)
        self.assertEqual(mappings_21[0].m_ent[n3.entity], n1.entity)

        self.assertEqual(0, len(mappings_31))