def _instrument_code_recursive( self, code: CodeType, ) -> CodeType: """Instrument the given Code Object recursively. Args: code: The code object that should be instrumented Returns: The instrumented code object """ self._logger.debug( "Instrumenting Code Object for dynamic seeding for %s", code.co_name) cfg = CFG.from_bytecode(Bytecode.from_code(code)) assert cfg.entry_node is not None, "Entry node cannot be None." real_entry_node = cfg.get_successors( cfg.entry_node).pop() # Only one exists! assert real_entry_node.basic_block is not None, "Basic block cannot be None." self._instrument_cfg(cfg) return self._instrument_inner_code_objects( cfg.bytecode_cfg().to_code())
def _instrument_code_recursive( self, code: CodeType, parent_code_object_id: Optional[int] = None, ) -> CodeType: """Instrument the given Code Object recursively. Args: code: The code object that should be instrumented parent_code_object_id: The ID of the optional parent code object Returns: The instrumented code object """ self._logger.debug("Instrumenting Code Object for %s", code.co_name) cfg = CFG.from_bytecode(Bytecode.from_code(code)) cdg = ControlDependenceGraph.compute(cfg) code_object_id = self._tracer.register_code_object( CodeObjectMetaData( code_object=code, parent_code_object_id=parent_code_object_id, cfg=cfg, cdg=cdg, )) assert cfg.entry_node is not None, "Entry node cannot be None." real_entry_node = cfg.get_successors( cfg.entry_node).pop() # Only one exists! assert real_entry_node.basic_block is not None, "Basic block cannot be None." self._add_code_object_executed(real_entry_node.basic_block, code_object_id) self._instrument_cfg(cfg, code_object_id) return self._instrument_inner_code_objects( cfg.bytecode_cfg().to_code(), code_object_id)
def compute_post_dominator_tree(graph: cfg.CFG) -> DominatorTree: """Computes the post-dominator tree for a control-flow graph. :param graph: The control-flow graph :return: The post-dominator tree for the control-flow graph """ reversed_cfg = graph.reversed() return DominatorTree.compute(reversed_cfg)
def _create_augmented_graph(graph: cfg.CFG) -> cfg.CFG: entry_node = graph.entry_node assert entry_node, "Cannot work with CFG without entry node" exit_nodes = graph.exit_nodes augmented_graph = graph.copy() start_node = pg.ProgramGraphNode(index=-sys.maxsize, is_artificial=True) augmented_graph.add_node(start_node) augmented_graph.add_edge(start_node, entry_node) for exit_node in exit_nodes: augmented_graph.add_edge(start_node, exit_node) return augmented_graph
def _calculate_dominators( graph: cfg.CFG, dominance_map: Dict[pg.ProgramGraphNode, Set[pg.ProgramGraphNode]], node: pg.ProgramGraphNode, ) -> Set[pg.ProgramGraphNode]: dominators: Set[pg.ProgramGraphNode] = {node} intersection: Set[pg.ProgramGraphNode] = set() predecessors = graph.get_predecessors(node) if not predecessors: return set() first_time: bool = True for predecessor in predecessors: predecessor_dominators = dominance_map.get(predecessor) assert predecessor_dominators is not None, "Cannot be None" if first_time: intersection = intersection.union(predecessor_dominators) first_time = False else: intersection.intersection_update(predecessor_dominators) intersection = intersection.union(dominators) return intersection
def test_integration_create_cfg(conditional_jump_example_bytecode): cfg = CFG.from_bytecode(conditional_jump_example_bytecode) dot_representation = cfg.to_dot() graph = """strict digraph { "ProgramGraphNode(0)"; "ProgramGraphNode(1)"; "ProgramGraphNode(2)"; "ProgramGraphNode(3)"; "ProgramGraphNode(9223372036854775807)"; "ProgramGraphNode(-1)"; "ProgramGraphNode(0)" -> "ProgramGraphNode(1)"; "ProgramGraphNode(0)" -> "ProgramGraphNode(2)"; "ProgramGraphNode(1)" -> "ProgramGraphNode(3)"; "ProgramGraphNode(2)" -> "ProgramGraphNode(3)"; "ProgramGraphNode(3)" -> "ProgramGraphNode(9223372036854775807)"; "ProgramGraphNode(-1)" -> "ProgramGraphNode(0)"; } """ assert cfg.cyclomatic_complexity == 2 assert cfg.entry_node.is_artificial assert len(cfg.exit_nodes) == 1 assert dot_representation == graph
def larger_control_flow_graph() -> CFG: graph = CFG(MagicMock()) entry = ProgramGraphNode(index=-sys.maxsize) n_1 = ProgramGraphNode(index=1) n_2 = ProgramGraphNode(index=2) n_3 = ProgramGraphNode(index=3) n_5 = ProgramGraphNode(index=5) n_100 = ProgramGraphNode(index=100) n_110 = ProgramGraphNode(index=110) n_120 = ProgramGraphNode(index=120) n_130 = ProgramGraphNode(index=130) n_140 = ProgramGraphNode(index=140) n_150 = ProgramGraphNode(index=150) n_160 = ProgramGraphNode(index=160) n_170 = ProgramGraphNode(index=170) n_180 = ProgramGraphNode(index=180) n_190 = ProgramGraphNode(index=190) n_200 = ProgramGraphNode(index=200) n_210 = ProgramGraphNode(index=210) n_300 = ProgramGraphNode(index=300) n_exit = ProgramGraphNode(index=sys.maxsize) graph.add_node(entry) graph.add_node(n_1) graph.add_node(n_2) graph.add_node(n_3) graph.add_node(n_5) graph.add_node(n_100) graph.add_node(n_110) graph.add_node(n_120) graph.add_node(n_130) graph.add_node(n_140) graph.add_node(n_150) graph.add_node(n_160) graph.add_node(n_170) graph.add_node(n_180) graph.add_node(n_190) graph.add_node(n_200) graph.add_node(n_210) graph.add_node(n_300) graph.add_node(n_exit) graph.add_edge(entry, n_1) graph.add_edge(n_1, n_2) graph.add_edge(n_2, n_3) graph.add_edge(n_3, n_5) graph.add_edge(n_5, n_100) graph.add_edge(n_100, n_110) graph.add_edge(n_110, n_120, label="true") graph.add_edge(n_120, n_130) graph.add_edge(n_130, n_140) graph.add_edge(n_140, n_150, label="true") graph.add_edge(n_150, n_160) graph.add_edge(n_160, n_170, label="false") graph.add_edge(n_170, n_180) graph.add_edge(n_180, n_190) graph.add_edge(n_160, n_190, label="true") graph.add_edge(n_190, n_140) graph.add_edge(n_140, n_200, label="false") graph.add_edge(n_200, n_210) graph.add_edge(n_210, n_110) graph.add_edge(n_110, n_300, label="false") graph.add_edge(n_300, n_exit) return graph
def small_control_flow_graph() -> CFG: cfg = CFG(MagicMock()) entry = ProgramGraphNode(index=0) n2 = ProgramGraphNode(index=2) n3 = ProgramGraphNode(index=3) n4 = ProgramGraphNode(index=4) n5 = ProgramGraphNode(index=5) n6 = ProgramGraphNode(index=6) exit_node = ProgramGraphNode(index=sys.maxsize) cfg.add_node(entry) cfg.add_node(n2) cfg.add_node(n3) cfg.add_node(n4) cfg.add_node(n5) cfg.add_node(n6) cfg.add_node(exit_node) cfg.add_edge(entry, n6) cfg.add_edge(n6, n5) cfg.add_edge(n5, n4) cfg.add_edge(n5, n3) cfg.add_edge(n4, n2) cfg.add_edge(n3, n2) cfg.add_edge(n2, exit_node) return cfg
def _instrument_for_loop( self, cfg: CFG, dominator_tree: DominatorTree, node: ProgramGraphNode, code_object_id: int, ) -> int: """Transform the for loop whose header is defined in the given node. We only transform the underlying bytecode cfg, by partially unrolling the first iteration. For this, we add three basic blocks after the loop header: The first block is called, if the iterator on which the loop is based yields at least one element, in which case we report the boolean value True to the tracer, leave the yielded value of the iterator on top of the stack and jump to the the regular body of the loop. The second block is called, if the iterator on which the loop is based does not yield an element, in which case we report the boolean value False to the tracer and jump to the exit instruction of the loop. The third block acts as the new internal header of the for loop. It consists of a copy of the original "FOR_ITER" instruction of the loop. The original loop header is changed such that it either falls through to the first block or jumps to the second, if no element is yielded. Since Python is a structured programming language, there can be no jumps directly into the loop that bypass the loop header (e.g., GOTO). Jumps which reach the loop header from outside the loop will still target the original loop header, so they don't need to be modified. Jumps which originate from within the loop (e.g., break or continue) need to be redirected to the new internal header (3rd new block). We use a dominator tree to find and redirect the jumps of such instructions. Args: cfg: The CFG that contains the loop dominator_tree: The dominator tree of the given CFG. node: The node which contains the header of the for loop. code_object_id: The id of the containing Code Object. Returns: The ID of the instrumented predicate """ assert node.basic_block is not None, "Basic block of for loop cannot be None." for_instr = node.basic_block[self._JUMP_OP_POS] assert for_instr.name == "FOR_ITER" lineno = for_instr.lineno predicate_id = self._tracer.register_predicate( PredicateMetaData(code_object_id, lineno)) for_instr_copy = for_instr.copy() for_loop_exit = for_instr.arg for_loop_body = node.basic_block.next_block # pylint:disable=unbalanced-tuple-unpacking entered, not_entered, new_header = self._create_consecutive_blocks( cfg.bytecode_cfg(), node.basic_block, 3) for_instr.arg = not_entered entered.extend([ Instr("LOAD_CONST", self._tracer, lineno=lineno), Instr( "LOAD_METHOD", ExecutionTracer.executed_bool_predicate.__name__, lineno=lineno, ), Instr("LOAD_CONST", True, lineno=lineno), Instr("LOAD_CONST", predicate_id, lineno=lineno), Instr("CALL_METHOD", 2, lineno=lineno), Instr("POP_TOP", lineno=lineno), Instr("JUMP_ABSOLUTE", for_loop_body, lineno=lineno), ]) not_entered.extend([ Instr("LOAD_CONST", self._tracer, lineno=lineno), Instr( "LOAD_METHOD", ExecutionTracer.executed_bool_predicate.__name__, lineno=lineno, ), Instr("LOAD_CONST", False, lineno=lineno), Instr("LOAD_CONST", predicate_id, lineno=lineno), Instr("CALL_METHOD", 2, lineno=lineno), Instr("POP_TOP", lineno=lineno), Instr("JUMP_ABSOLUTE", for_loop_exit, lineno=lineno), ]) new_header.append(for_instr_copy) # Redirect internal jumps to the new loop header for successor in dominator_tree.get_transitive_successors(node): if (successor.basic_block is not None and successor.basic_block[self._JUMP_OP_POS].arg is node.basic_block): successor.basic_block[self._JUMP_OP_POS].arg = new_header return predicate_id
def test_integration_while_loop(): while_loop_cfg = CFG.from_bytecode(Bytecode.from_code(Foo.receive.__code__)) assert len(while_loop_cfg.nodes) == 3 assert while_loop_cfg.entry_node.index == -1 assert while_loop_cfg.exit_nodes.pop().index == sys.maxsize
def test_integration_copy_cfg(conditional_jump_example_bytecode): cfg = CFG.from_bytecode(conditional_jump_example_bytecode) copied_cfg = cfg.copy() assert copied_cfg.to_dot() == cfg.to_dot()