예제 #1
0
def create_symbolic_copy(graph: Graph) -> Tuple[Graph, GraphMapping]:
    mapping = GraphMapping()
    for entity in graph.iter_entities():
        mapping.m_ent[entity] = Entity(value=SYMBOLIC_VALUE)

    for node in graph.iter_nodes():
        mapping.m_node[node] = Node(label=node.label,
                                    entity=mapping.m_ent[node.entity],
                                    value=SYMBOLIC_VALUE)

    new_graph = Graph.from_nodes_and_edges(nodes=set(mapping.m_node.values()),
                                           edges={
                                               Edge(src=mapping.m_node[e.src],
                                                    dst=mapping.m_node[e.dst],
                                                    label=e.label)
                                               for e in graph.iter_edges()
                                           })

    return new_graph, mapping
    def init(self):
        domain = PandasLiteSynthesisDomain()
        replay = {k: iter(v) for k, v in self.replay_map.items()}
        graph = Graph()

        g_inputs = self._g_inputs = [
            self._convert_inp_to_graph(inp) for inp in self.inputs
        ]
        int_to_val = {-idx: inp for idx, inp in enumerate(self.inputs, 1)}
        int_to_graph = {-idx: g_inp for idx, g_inp in enumerate(g_inputs, 1)}

        #  Run the generators to extract the programs and graphs for each component call.
        #  Merge the individual graphs into the master graph.
        call_strs: List[str] = []
        for idx, (component_name, arg_ints) in enumerate(self.skeleton, 1):
            c_inputs = [int_to_val[i] for i in arg_ints]
            g_c_inputs = [int_to_graph[i] for i in arg_ints]
            output, program, c_graph, output_graph = next(
                domain.enumerate(component_name,
                                 c_inputs,
                                 g_c_inputs,
                                 replay=replay))
            int_to_val[idx] = output
            int_to_graph[idx] = output_graph
            call_strs.append(program)
            graph.merge(c_graph)

        #  Check that the final output is equivalent to the original output specified in the benchmark.
        assert domain.check_equivalent(self.output, int_to_val[self.skeleton.length]), \
            f"Generated output inconsistent with specified output in Pandas benchmark {self.b_id}"

        #  Retrofit the value of the output entity to the original output
        cur_out_entity = next(ent for ent in graph.iter_entities()
                              if ent.value is int_to_val[self.skeleton.length])
        cur_out_entity.value = self.output

        #  Perform transitive closure w.r.t the nodes corresponding to the intermediate outputs
        #  and take the induced subgraph containing all nodes except those
        if self.skeleton.length > 1:
            join_nodes = set.union(*(set(int_to_graph[i].iter_nodes())
                                     for i in range(1, self.skeleton.length)))
            domain.perform_transitive_closure(graph, join_nodes=join_nodes)
            intent_graph = graph.induced_subgraph(
                keep_nodes=set(graph.iter_nodes()) - join_nodes)
        else:
            intent_graph = graph

        self._graph = intent_graph

        #  Also construct the string representation of the ground-truth program.
        program_list: List[str] = []
        for depth, (call_str,
                    (component_name,
                     arg_ints)) in enumerate(zip(call_strs, self.skeleton), 1):
            arg_strs = [f"inp{-i}" if i < 0 else f"v{i}" for i in arg_ints]
            call_str = call_str.format(**{
                f"inp{idx}": arg_str
                for idx, arg_str in enumerate(arg_strs, 1)
            })
            if depth == self.skeleton.length:
                program_list.append(call_str)
            else:
                program_list.append(f"v{depth} = {call_str}")

        self.program = "\n".join(program_list)