def create_symbolic_copy(graph: Graph) -> Tuple[Graph, GraphMapping]: mapping = GraphMapping() for entity in graph.iter_entities(): mapping.m_ent[entity] = Entity(value=SYMBOLIC_VALUE) for node in graph.iter_nodes(): mapping.m_node[node] = Node(label=node.label, entity=mapping.m_ent[node.entity], value=SYMBOLIC_VALUE) new_graph = Graph.from_nodes_and_edges(nodes=set(mapping.m_node.values()), edges={ Edge(src=mapping.m_node[e.src], dst=mapping.m_node[e.dst], label=e.label) for e in graph.iter_edges() }) return new_graph, mapping
def init(self): domain = PandasLiteSynthesisDomain() replay = {k: iter(v) for k, v in self.replay_map.items()} graph = Graph() g_inputs = self._g_inputs = [ self._convert_inp_to_graph(inp) for inp in self.inputs ] int_to_val = {-idx: inp for idx, inp in enumerate(self.inputs, 1)} int_to_graph = {-idx: g_inp for idx, g_inp in enumerate(g_inputs, 1)} # Run the generators to extract the programs and graphs for each component call. # Merge the individual graphs into the master graph. call_strs: List[str] = [] for idx, (component_name, arg_ints) in enumerate(self.skeleton, 1): c_inputs = [int_to_val[i] for i in arg_ints] g_c_inputs = [int_to_graph[i] for i in arg_ints] output, program, c_graph, output_graph = next( domain.enumerate(component_name, c_inputs, g_c_inputs, replay=replay)) int_to_val[idx] = output int_to_graph[idx] = output_graph call_strs.append(program) graph.merge(c_graph) # Check that the final output is equivalent to the original output specified in the benchmark. assert domain.check_equivalent(self.output, int_to_val[self.skeleton.length]), \ f"Generated output inconsistent with specified output in Pandas benchmark {self.b_id}" # Retrofit the value of the output entity to the original output cur_out_entity = next(ent for ent in graph.iter_entities() if ent.value is int_to_val[self.skeleton.length]) cur_out_entity.value = self.output # Perform transitive closure w.r.t the nodes corresponding to the intermediate outputs # and take the induced subgraph containing all nodes except those if self.skeleton.length > 1: join_nodes = set.union(*(set(int_to_graph[i].iter_nodes()) for i in range(1, self.skeleton.length))) domain.perform_transitive_closure(graph, join_nodes=join_nodes) intent_graph = graph.induced_subgraph( keep_nodes=set(graph.iter_nodes()) - join_nodes) else: intent_graph = graph self._graph = intent_graph # Also construct the string representation of the ground-truth program. program_list: List[str] = [] for depth, (call_str, (component_name, arg_ints)) in enumerate(zip(call_strs, self.skeleton), 1): arg_strs = [f"inp{-i}" if i < 0 else f"v{i}" for i in arg_ints] call_str = call_str.format(**{ f"inp{idx}": arg_str for idx, arg_str in enumerate(arg_strs, 1) }) if depth == self.skeleton.length: program_list.append(call_str) else: program_list.append(f"v{depth} = {call_str}") self.program = "\n".join(program_list)