def extract_paths(graph: Graph, input_entities: List[Entity], output_entity: Entity): path_dict: Dict[Entity, List[Path]] = {ent: [] for ent in input_entities} for node in itertools.chain(*(graph.iter_nodes(entity=ent) for ent in input_entities)): # Find all the paths from node to an output node, without any other input or output nodes in between. # An entry is the set of visited nodes, the current node to explore, and the current set of edges. entry: Tuple[Set[Node], Node, List[Edge]] = ({node}, node, []) worklist = collections.deque([entry]) paths: List[Path] = [] while len(worklist) > 0: visited, cur_node, edges = worklist.popleft() for edge in graph.iter_edges(src=cur_node): dst = edge.dst if dst in visited or dst.entity in input_entities: continue if dst.entity is output_entity: paths.append((visited | {dst}, edges + [edge])) else: worklist.append((visited | {dst}, dst, edges + [edge])) path_dict[node.entity].extend(paths) return path_dict
def create_symbolic_copy(graph: Graph) -> Tuple[Graph, GraphMapping]: mapping = GraphMapping() for entity in graph.iter_entities(): mapping.m_ent[entity] = Entity(value=SYMBOLIC_VALUE) for node in graph.iter_nodes(): mapping.m_node[node] = Node(label=node.label, entity=mapping.m_ent[node.entity], value=SYMBOLIC_VALUE) new_graph = Graph.from_nodes_and_edges(nodes=set(mapping.m_node.values()), edges={ Edge(src=mapping.m_node[e.src], dst=mapping.m_node[e.dst], label=e.label) for e in graph.iter_edges() }) return new_graph, mapping
def prepare_solution(self, output: Any, output_graph: Graph) -> Solution: if self.problem.input_names is not None: int_to_names: Dict[int, str] = { -idx: name for idx, name in enumerate(self.problem.input_names, 1) } else: int_to_names: Dict[int, str] = { -idx: f"inp{idx}" for idx in range(1, len(self.problem.inputs) + 1) } int_to_names[self.skeleton.length] = self.problem.output_name graph = Graph() for g in self.graphs: graph.merge(g) # Perform transitive closure w.r.t the nodes corresponding to the intermediate outputs # and take the induced subgraph containing all nodes except those if self.skeleton.length > 1: join_nodes = set.union(*(set(self.int_to_graph[i].iter_nodes()) for i in range(1, self.skeleton.length))) self.domain.perform_transitive_closure(graph, join_nodes=join_nodes) graph = graph.induced_subgraph(keep_nodes=set(graph.iter_nodes()) - join_nodes) return self.domain.prepare_solution( self.problem.inputs, output, graph, self.problem.graph_inputs, output_graph, self.enumeration_items, arguments=[arg_ints for (comp_name, arg_ints) in self.skeleton], int_to_names=int_to_names, int_to_obj=self.int_to_val)
def init(self): domain = PandasLiteSynthesisDomain() replay = {k: iter(v) for k, v in self.replay_map.items()} graph = Graph() g_inputs = self._g_inputs = [ self._convert_inp_to_graph(inp) for inp in self.inputs ] int_to_val = {-idx: inp for idx, inp in enumerate(self.inputs, 1)} int_to_graph = {-idx: g_inp for idx, g_inp in enumerate(g_inputs, 1)} # Run the generators to extract the programs and graphs for each component call. # Merge the individual graphs into the master graph. call_strs: List[str] = [] for idx, (component_name, arg_ints) in enumerate(self.skeleton, 1): c_inputs = [int_to_val[i] for i in arg_ints] g_c_inputs = [int_to_graph[i] for i in arg_ints] output, program, c_graph, output_graph = next( domain.enumerate(component_name, c_inputs, g_c_inputs, replay=replay)) int_to_val[idx] = output int_to_graph[idx] = output_graph call_strs.append(program) graph.merge(c_graph) # Check that the final output is equivalent to the original output specified in the benchmark. assert domain.check_equivalent(self.output, int_to_val[self.skeleton.length]), \ f"Generated output inconsistent with specified output in Pandas benchmark {self.b_id}" # Retrofit the value of the output entity to the original output cur_out_entity = next(ent for ent in graph.iter_entities() if ent.value is int_to_val[self.skeleton.length]) cur_out_entity.value = self.output # Perform transitive closure w.r.t the nodes corresponding to the intermediate outputs # and take the induced subgraph containing all nodes except those if self.skeleton.length > 1: join_nodes = set.union(*(set(int_to_graph[i].iter_nodes()) for i in range(1, self.skeleton.length))) domain.perform_transitive_closure(graph, join_nodes=join_nodes) intent_graph = graph.induced_subgraph( keep_nodes=set(graph.iter_nodes()) - join_nodes) else: intent_graph = graph self._graph = intent_graph # Also construct the string representation of the ground-truth program. program_list: List[str] = [] for depth, (call_str, (component_name, arg_ints)) in enumerate(zip(call_strs, self.skeleton), 1): arg_strs = [f"inp{-i}" if i < 0 else f"v{i}" for i in arg_ints] call_str = call_str.format(**{ f"inp{idx}": arg_str for idx, arg_str in enumerate(arg_strs, 1) }) if depth == self.skeleton.length: program_list.append(call_str) else: program_list.append(f"v{depth} = {call_str}") self.program = "\n".join(program_list)