def prepare_solution(self, output: Any, output_graph: Graph) -> Solution: if self.problem.input_names is not None: int_to_names: Dict[int, str] = { -idx: name for idx, name in enumerate(self.problem.input_names, 1) } else: int_to_names: Dict[int, str] = { -idx: f"inp{idx}" for idx in range(1, len(self.problem.inputs) + 1) } int_to_names[self.skeleton.length] = self.problem.output_name graph = Graph() for g in self.graphs: graph.merge(g) # Perform transitive closure w.r.t the nodes corresponding to the intermediate outputs # and take the induced subgraph containing all nodes except those if self.skeleton.length > 1: join_nodes = set.union(*(set(self.int_to_graph[i].iter_nodes()) for i in range(1, self.skeleton.length))) self.domain.perform_transitive_closure(graph, join_nodes=join_nodes) graph = graph.induced_subgraph(keep_nodes=set(graph.iter_nodes()) - join_nodes) return self.domain.prepare_solution( self.problem.inputs, output, graph, self.problem.graph_inputs, output_graph, self.enumeration_items, arguments=[arg_ints for (comp_name, arg_ints) in self.skeleton], int_to_names=int_to_names, int_to_obj=self.int_to_val)
def init(self): domain = PandasLiteSynthesisDomain() replay = {k: iter(v) for k, v in self.replay_map.items()} graph = Graph() g_inputs = self._g_inputs = [ self._convert_inp_to_graph(inp) for inp in self.inputs ] int_to_val = {-idx: inp for idx, inp in enumerate(self.inputs, 1)} int_to_graph = {-idx: g_inp for idx, g_inp in enumerate(g_inputs, 1)} # Run the generators to extract the programs and graphs for each component call. # Merge the individual graphs into the master graph. call_strs: List[str] = [] for idx, (component_name, arg_ints) in enumerate(self.skeleton, 1): c_inputs = [int_to_val[i] for i in arg_ints] g_c_inputs = [int_to_graph[i] for i in arg_ints] output, program, c_graph, output_graph = next( domain.enumerate(component_name, c_inputs, g_c_inputs, replay=replay)) int_to_val[idx] = output int_to_graph[idx] = output_graph call_strs.append(program) graph.merge(c_graph) # Check that the final output is equivalent to the original output specified in the benchmark. assert domain.check_equivalent(self.output, int_to_val[self.skeleton.length]), \ f"Generated output inconsistent with specified output in Pandas benchmark {self.b_id}" # Retrofit the value of the output entity to the original output cur_out_entity = next(ent for ent in graph.iter_entities() if ent.value is int_to_val[self.skeleton.length]) cur_out_entity.value = self.output # Perform transitive closure w.r.t the nodes corresponding to the intermediate outputs # and take the induced subgraph containing all nodes except those if self.skeleton.length > 1: join_nodes = set.union(*(set(int_to_graph[i].iter_nodes()) for i in range(1, self.skeleton.length))) domain.perform_transitive_closure(graph, join_nodes=join_nodes) intent_graph = graph.induced_subgraph( keep_nodes=set(graph.iter_nodes()) - join_nodes) else: intent_graph = graph self._graph = intent_graph # Also construct the string representation of the ground-truth program. program_list: List[str] = [] for depth, (call_str, (component_name, arg_ints)) in enumerate(zip(call_strs, self.skeleton), 1): arg_strs = [f"inp{-i}" if i < 0 else f"v{i}" for i in arg_ints] call_str = call_str.format(**{ f"inp{idx}": arg_str for idx, arg_str in enumerate(arg_strs, 1) }) if depth == self.skeleton.length: program_list.append(call_str) else: program_list.append(f"v{depth} = {call_str}") self.program = "\n".join(program_list)