def get_strengthening_constraint(self, input_graph: Graph) -> Graph: strengthened_input_graph = Graph() for constraint in self.constraints.values(): strengthened_input_graph.merge( constraint.get_strengthening_constraint(input_graph)) return strengthened_input_graph
def _get_canonical_query_plans(self, sequence: List[str], transformation: Transformation) -> Dict[Skeleton, Set[QueryPlan]]: meta_plan = self._meta_plans[transformation] blueprint_item_lists = self._get_blueprint_item_lists(sequence, meta_plan, _d=len(sequence)) canonical_transformation = meta_plan.canonical_transformations[len(sequence)] mapping = next(canonical_transformation.get_subgraph_mappings(transformation)) skeletons_to_plans: Dict[Skeleton, Set[QueryPlan]] = collections.defaultdict(set) for blueprint_item_list in blueprint_item_lists: # Breakdown the overall transformation in terms of the unit plans contained in the blueprint items. # Store the connections between them as a graph mapping. connections = GraphMapping() connections.update(mapping) graph = Graph() for item in blueprint_item_list: graph.merge(item.unit.transformation) connections = connections.apply_mapping(item.canonical_mapping, only_keys=True) if item.border_mapping: connections.update(item.border_mapping) connections = connections.apply_mapping(connections, only_values=True) # Assemble the query plan query_plan = QueryPlan(transformation, units=[item.unit.transformation for item in blueprint_item_list], all_connections=connections, strengthenings=[item.unit.strengthenings[component_name] for component_name, item in zip(sequence, blueprint_item_list)]) # Obtain the skeletons for which this query plan would work. # External inputs are negative integers. See gauss.synthesis.skeleton for details. ent_to_idx = {ent: -idx for idx, ent in enumerate(transformation.get_input_entities(), 1)} possible_arg_ints_lists = [] for component_name, (idx, item) in zip(sequence, enumerate(blueprint_item_list, 1)): # Get the mapped entities to the inputs of this unit's transformation, and look up their idx values. arg_ints = [ent_to_idx[connections.m_ent[ent]] for ent in item.unit.transformation.get_input_entities()] # Get all the permutations as well. arg_ints_list = [arg_num_mapping.apply_list(arg_ints) for arg_num_mapping in item.unit.component_entries[component_name].argument_mappings] possible_arg_ints_lists.append(arg_ints_list) ent_to_idx[item.unit.transformation.get_output_entity()] = idx # The skeletons are then simply the all the combinations for arg_ints_list in itertools.product(*possible_arg_ints_lists): skeleton = Skeleton(list(zip(sequence, arg_ints_list))) skeletons_to_plans[skeleton].add(query_plan) return skeletons_to_plans
def prepare_solution(self, output: Any, output_graph: Graph) -> Solution: if self.problem.input_names is not None: int_to_names: Dict[int, str] = { -idx: name for idx, name in enumerate(self.problem.input_names, 1) } else: int_to_names: Dict[int, str] = { -idx: f"inp{idx}" for idx in range(1, len(self.problem.inputs) + 1) } int_to_names[self.skeleton.length] = self.problem.output_name graph = Graph() for g in self.graphs: graph.merge(g) # Perform transitive closure w.r.t the nodes corresponding to the intermediate outputs # and take the induced subgraph containing all nodes except those if self.skeleton.length > 1: join_nodes = set.union(*(set(self.int_to_graph[i].iter_nodes()) for i in range(1, self.skeleton.length))) self.domain.perform_transitive_closure(graph, join_nodes=join_nodes) graph = graph.induced_subgraph(keep_nodes=set(graph.iter_nodes()) - join_nodes) return self.domain.prepare_solution( self.problem.inputs, output, graph, self.problem.graph_inputs, output_graph, self.enumeration_items, arguments=[arg_ints for (comp_name, arg_ints) in self.skeleton], int_to_names=int_to_names, int_to_obj=self.int_to_val)
def _solve_for_skeleton_recursive( self, problem: SynthesisProblem, skeleton: Skeleton, query_plans: QueryPlans, context: SolverContext, _depth: int = 0) -> Iterator[Tuple[Any, Graph]]: domain = self._domain component_name, arg_ints = skeleton[_depth] inputs, g_inputs = context.get_arguments(depth=_depth) inp_entities = [ next(iter(g_inp.iter_entities())) for g_inp in g_inputs ] inp_graph = Graph() for g_inp in g_inputs: inp_graph.merge(g_inp) # Get the strengthening constraint for this depth. # Specifically, for every query, get the intersection of the strengthenings of all the query plans for that # query at this particular depth. Then take the union of all of these. # In other words, this strengthening constraint is a graph containing the nodes, edges, tags and tagged edges # that must be satisfied by the graph containing the inputs, that is `inp_graph` in this context. # This constraint can then be used by the `enumerate` procedure to speed up the search. strengthening_constraint: Graph = context.waypoints[ _depth].get_strengthening_constraint(inp_graph) enumeration_item: EnumerationItem for enumeration_item in domain.enumerate( component_name=component_name, inputs=inputs, g_inputs=g_inputs, constants=problem.constants, strengthening_constraint=strengthening_constraint): output = enumeration_item.output c_graph = enumeration_item.graph o_graph = enumeration_item.o_graph # for g in g_inputs: # assert set(g.iter_nodes()).issubset(set(c_graph.iter_nodes())) if problem.timeout is not None and time.time( ) - self._time_start > problem.timeout: raise TimeoutError("Exceeded time limit.") out_entity = next(iter(o_graph.iter_entities())) c_graph.add_node(PlaceholderNode(entity=out_entity)) c_graph = Transformation.build_from_graph( c_graph, input_entities=inp_entities, output_entity=out_entity) # Check if the returned graph is consistent with the query plans. if not context.check_validity(c_graph, depth=_depth): continue # Prepare for the next round. context.step(output=output, graph=c_graph, output_graph=o_graph, enumeration_item=enumeration_item, depth=_depth) if _depth == skeleton.length - 1: # This was the last component, prepare the program and return it along with the final output and graph. yield output, o_graph else: # Move on to the next component. yield from self._solve_for_skeleton_recursive(problem, skeleton, query_plans, context, _depth=_depth + 1)
def init(self): domain = PandasLiteSynthesisDomain() replay = {k: iter(v) for k, v in self.replay_map.items()} graph = Graph() g_inputs = self._g_inputs = [ self._convert_inp_to_graph(inp) for inp in self.inputs ] int_to_val = {-idx: inp for idx, inp in enumerate(self.inputs, 1)} int_to_graph = {-idx: g_inp for idx, g_inp in enumerate(g_inputs, 1)} # Run the generators to extract the programs and graphs for each component call. # Merge the individual graphs into the master graph. call_strs: List[str] = [] for idx, (component_name, arg_ints) in enumerate(self.skeleton, 1): c_inputs = [int_to_val[i] for i in arg_ints] g_c_inputs = [int_to_graph[i] for i in arg_ints] output, program, c_graph, output_graph = next( domain.enumerate(component_name, c_inputs, g_c_inputs, replay=replay)) int_to_val[idx] = output int_to_graph[idx] = output_graph call_strs.append(program) graph.merge(c_graph) # Check that the final output is equivalent to the original output specified in the benchmark. assert domain.check_equivalent(self.output, int_to_val[self.skeleton.length]), \ f"Generated output inconsistent with specified output in Pandas benchmark {self.b_id}" # Retrofit the value of the output entity to the original output cur_out_entity = next(ent for ent in graph.iter_entities() if ent.value is int_to_val[self.skeleton.length]) cur_out_entity.value = self.output # Perform transitive closure w.r.t the nodes corresponding to the intermediate outputs # and take the induced subgraph containing all nodes except those if self.skeleton.length > 1: join_nodes = set.union(*(set(int_to_graph[i].iter_nodes()) for i in range(1, self.skeleton.length))) domain.perform_transitive_closure(graph, join_nodes=join_nodes) intent_graph = graph.induced_subgraph( keep_nodes=set(graph.iter_nodes()) - join_nodes) else: intent_graph = graph self._graph = intent_graph # Also construct the string representation of the ground-truth program. program_list: List[str] = [] for depth, (call_str, (component_name, arg_ints)) in enumerate(zip(call_strs, self.skeleton), 1): arg_strs = [f"inp{-i}" if i < 0 else f"v{i}" for i in arg_ints] call_str = call_str.format(**{ f"inp{idx}": arg_str for idx, arg_str in enumerate(arg_strs, 1) }) if depth == self.skeleton.length: program_list.append(call_str) else: program_list.append(f"v{depth} = {call_str}") self.program = "\n".join(program_list)