Example #1
0
    def prepare_solution(self, output: Any, output_graph: Graph) -> Solution:
        if self.problem.input_names is not None:
            int_to_names: Dict[int, str] = {
                -idx: name
                for idx, name in enumerate(self.problem.input_names, 1)
            }
        else:
            int_to_names: Dict[int, str] = {
                -idx: f"inp{idx}"
                for idx in range(1,
                                 len(self.problem.inputs) + 1)
            }

        int_to_names[self.skeleton.length] = self.problem.output_name

        graph = Graph()
        for g in self.graphs:
            graph.merge(g)

        #  Perform transitive closure w.r.t the nodes corresponding to the intermediate outputs
        #  and take the induced subgraph containing all nodes except those
        if self.skeleton.length > 1:
            join_nodes = set.union(*(set(self.int_to_graph[i].iter_nodes())
                                     for i in range(1, self.skeleton.length)))
            self.domain.perform_transitive_closure(graph,
                                                   join_nodes=join_nodes)
            graph = graph.induced_subgraph(keep_nodes=set(graph.iter_nodes()) -
                                           join_nodes)

        return self.domain.prepare_solution(
            self.problem.inputs,
            output,
            graph,
            self.problem.graph_inputs,
            output_graph,
            self.enumeration_items,
            arguments=[arg_ints for (comp_name, arg_ints) in self.skeleton],
            int_to_names=int_to_names,
            int_to_obj=self.int_to_val)
    def init(self):
        domain = PandasLiteSynthesisDomain()
        replay = {k: iter(v) for k, v in self.replay_map.items()}
        graph = Graph()

        g_inputs = self._g_inputs = [
            self._convert_inp_to_graph(inp) for inp in self.inputs
        ]
        int_to_val = {-idx: inp for idx, inp in enumerate(self.inputs, 1)}
        int_to_graph = {-idx: g_inp for idx, g_inp in enumerate(g_inputs, 1)}

        #  Run the generators to extract the programs and graphs for each component call.
        #  Merge the individual graphs into the master graph.
        call_strs: List[str] = []
        for idx, (component_name, arg_ints) in enumerate(self.skeleton, 1):
            c_inputs = [int_to_val[i] for i in arg_ints]
            g_c_inputs = [int_to_graph[i] for i in arg_ints]
            output, program, c_graph, output_graph = next(
                domain.enumerate(component_name,
                                 c_inputs,
                                 g_c_inputs,
                                 replay=replay))
            int_to_val[idx] = output
            int_to_graph[idx] = output_graph
            call_strs.append(program)
            graph.merge(c_graph)

        #  Check that the final output is equivalent to the original output specified in the benchmark.
        assert domain.check_equivalent(self.output, int_to_val[self.skeleton.length]), \
            f"Generated output inconsistent with specified output in Pandas benchmark {self.b_id}"

        #  Retrofit the value of the output entity to the original output
        cur_out_entity = next(ent for ent in graph.iter_entities()
                              if ent.value is int_to_val[self.skeleton.length])
        cur_out_entity.value = self.output

        #  Perform transitive closure w.r.t the nodes corresponding to the intermediate outputs
        #  and take the induced subgraph containing all nodes except those
        if self.skeleton.length > 1:
            join_nodes = set.union(*(set(int_to_graph[i].iter_nodes())
                                     for i in range(1, self.skeleton.length)))
            domain.perform_transitive_closure(graph, join_nodes=join_nodes)
            intent_graph = graph.induced_subgraph(
                keep_nodes=set(graph.iter_nodes()) - join_nodes)
        else:
            intent_graph = graph

        self._graph = intent_graph

        #  Also construct the string representation of the ground-truth program.
        program_list: List[str] = []
        for depth, (call_str,
                    (component_name,
                     arg_ints)) in enumerate(zip(call_strs, self.skeleton), 1):
            arg_strs = [f"inp{-i}" if i < 0 else f"v{i}" for i in arg_ints]
            call_str = call_str.format(**{
                f"inp{idx}": arg_str
                for idx, arg_str in enumerate(arg_strs, 1)
            })
            if depth == self.skeleton.length:
                program_list.append(call_str)
            else:
                program_list.append(f"v{depth} = {call_str}")

        self.program = "\n".join(program_list)