예제 #1
0
def extract_paths(graph: Graph, input_entities: List[Entity],
                  output_entity: Entity):
    path_dict: Dict[Entity, List[Path]] = {ent: [] for ent in input_entities}
    for node in itertools.chain(*(graph.iter_nodes(entity=ent)
                                  for ent in input_entities)):
        #  Find all the paths from node to an output node, without any other input or output nodes in between.
        #  An entry is the set of visited nodes, the current node to explore, and the current set of edges.
        entry: Tuple[Set[Node], Node, List[Edge]] = ({node}, node, [])
        worklist = collections.deque([entry])
        paths: List[Path] = []
        while len(worklist) > 0:
            visited, cur_node, edges = worklist.popleft()
            for edge in graph.iter_edges(src=cur_node):
                dst = edge.dst
                if dst in visited or dst.entity in input_entities:
                    continue

                if dst.entity is output_entity:
                    paths.append((visited | {dst}, edges + [edge]))
                else:
                    worklist.append((visited | {dst}, dst, edges + [edge]))

        path_dict[node.entity].extend(paths)

    return path_dict
예제 #2
0
def create_symbolic_copy(graph: Graph) -> Tuple[Graph, GraphMapping]:
    mapping = GraphMapping()
    for entity in graph.iter_entities():
        mapping.m_ent[entity] = Entity(value=SYMBOLIC_VALUE)

    for node in graph.iter_nodes():
        mapping.m_node[node] = Node(label=node.label,
                                    entity=mapping.m_ent[node.entity],
                                    value=SYMBOLIC_VALUE)

    new_graph = Graph.from_nodes_and_edges(nodes=set(mapping.m_node.values()),
                                           edges={
                                               Edge(src=mapping.m_node[e.src],
                                                    dst=mapping.m_node[e.dst],
                                                    label=e.label)
                                               for e in graph.iter_edges()
                                           })

    return new_graph, mapping
예제 #3
0
    def prepare_solution(self, output: Any, output_graph: Graph) -> Solution:
        if self.problem.input_names is not None:
            int_to_names: Dict[int, str] = {
                -idx: name
                for idx, name in enumerate(self.problem.input_names, 1)
            }
        else:
            int_to_names: Dict[int, str] = {
                -idx: f"inp{idx}"
                for idx in range(1,
                                 len(self.problem.inputs) + 1)
            }

        int_to_names[self.skeleton.length] = self.problem.output_name

        graph = Graph()
        for g in self.graphs:
            graph.merge(g)

        #  Perform transitive closure w.r.t the nodes corresponding to the intermediate outputs
        #  and take the induced subgraph containing all nodes except those
        if self.skeleton.length > 1:
            join_nodes = set.union(*(set(self.int_to_graph[i].iter_nodes())
                                     for i in range(1, self.skeleton.length)))
            self.domain.perform_transitive_closure(graph,
                                                   join_nodes=join_nodes)
            graph = graph.induced_subgraph(keep_nodes=set(graph.iter_nodes()) -
                                           join_nodes)

        return self.domain.prepare_solution(
            self.problem.inputs,
            output,
            graph,
            self.problem.graph_inputs,
            output_graph,
            self.enumeration_items,
            arguments=[arg_ints for (comp_name, arg_ints) in self.skeleton],
            int_to_names=int_to_names,
            int_to_obj=self.int_to_val)
    def init(self):
        domain = PandasLiteSynthesisDomain()
        replay = {k: iter(v) for k, v in self.replay_map.items()}
        graph = Graph()

        g_inputs = self._g_inputs = [
            self._convert_inp_to_graph(inp) for inp in self.inputs
        ]
        int_to_val = {-idx: inp for idx, inp in enumerate(self.inputs, 1)}
        int_to_graph = {-idx: g_inp for idx, g_inp in enumerate(g_inputs, 1)}

        #  Run the generators to extract the programs and graphs for each component call.
        #  Merge the individual graphs into the master graph.
        call_strs: List[str] = []
        for idx, (component_name, arg_ints) in enumerate(self.skeleton, 1):
            c_inputs = [int_to_val[i] for i in arg_ints]
            g_c_inputs = [int_to_graph[i] for i in arg_ints]
            output, program, c_graph, output_graph = next(
                domain.enumerate(component_name,
                                 c_inputs,
                                 g_c_inputs,
                                 replay=replay))
            int_to_val[idx] = output
            int_to_graph[idx] = output_graph
            call_strs.append(program)
            graph.merge(c_graph)

        #  Check that the final output is equivalent to the original output specified in the benchmark.
        assert domain.check_equivalent(self.output, int_to_val[self.skeleton.length]), \
            f"Generated output inconsistent with specified output in Pandas benchmark {self.b_id}"

        #  Retrofit the value of the output entity to the original output
        cur_out_entity = next(ent for ent in graph.iter_entities()
                              if ent.value is int_to_val[self.skeleton.length])
        cur_out_entity.value = self.output

        #  Perform transitive closure w.r.t the nodes corresponding to the intermediate outputs
        #  and take the induced subgraph containing all nodes except those
        if self.skeleton.length > 1:
            join_nodes = set.union(*(set(int_to_graph[i].iter_nodes())
                                     for i in range(1, self.skeleton.length)))
            domain.perform_transitive_closure(graph, join_nodes=join_nodes)
            intent_graph = graph.induced_subgraph(
                keep_nodes=set(graph.iter_nodes()) - join_nodes)
        else:
            intent_graph = graph

        self._graph = intent_graph

        #  Also construct the string representation of the ground-truth program.
        program_list: List[str] = []
        for depth, (call_str,
                    (component_name,
                     arg_ints)) in enumerate(zip(call_strs, self.skeleton), 1):
            arg_strs = [f"inp{-i}" if i < 0 else f"v{i}" for i in arg_ints]
            call_str = call_str.format(**{
                f"inp{idx}": arg_str
                for idx, arg_str in enumerate(arg_strs, 1)
            })
            if depth == self.skeleton.length:
                program_list.append(call_str)
            else:
                program_list.append(f"v{depth} = {call_str}")

        self.program = "\n".join(program_list)