Beispiel #1
0
    def _instrument_code_recursive(
        self,
        code: CodeType,
    ) -> CodeType:
        """Instrument the given Code Object recursively.

        Args:
            code: The code object that should be instrumented

        Returns:
            The instrumented code object
        """
        self._logger.debug(
            "Instrumenting Code Object for dynamic seeding for %s",
            code.co_name)
        cfg = CFG.from_bytecode(Bytecode.from_code(code))

        assert cfg.entry_node is not None, "Entry node cannot be None."
        real_entry_node = cfg.get_successors(
            cfg.entry_node).pop()  # Only one exists!
        assert real_entry_node.basic_block is not None, "Basic block cannot be None."

        self._instrument_cfg(cfg)
        return self._instrument_inner_code_objects(
            cfg.bytecode_cfg().to_code())
Beispiel #2
0
    def _instrument_code_recursive(
        self,
        code: CodeType,
        parent_code_object_id: Optional[int] = None,
    ) -> CodeType:
        """Instrument the given Code Object recursively.

        Args:
            code: The code object that should be instrumented
            parent_code_object_id: The ID of the optional parent code object

        Returns:
            The instrumented code object
        """
        self._logger.debug("Instrumenting Code Object for %s", code.co_name)
        cfg = CFG.from_bytecode(Bytecode.from_code(code))
        cdg = ControlDependenceGraph.compute(cfg)
        code_object_id = self._tracer.register_code_object(
            CodeObjectMetaData(
                code_object=code,
                parent_code_object_id=parent_code_object_id,
                cfg=cfg,
                cdg=cdg,
            ))
        assert cfg.entry_node is not None, "Entry node cannot be None."
        real_entry_node = cfg.get_successors(
            cfg.entry_node).pop()  # Only one exists!
        assert real_entry_node.basic_block is not None, "Basic block cannot be None."
        self._add_code_object_executed(real_entry_node.basic_block,
                                       code_object_id)

        self._instrument_cfg(cfg, code_object_id)
        return self._instrument_inner_code_objects(
            cfg.bytecode_cfg().to_code(), code_object_id)
    def compute_post_dominator_tree(graph: cfg.CFG) -> DominatorTree:
        """Computes the post-dominator tree for a control-flow graph.

        :param graph: The control-flow graph
        :return: The post-dominator tree for the control-flow graph
        """
        reversed_cfg = graph.reversed()
        return DominatorTree.compute(reversed_cfg)
 def _create_augmented_graph(graph: cfg.CFG) -> cfg.CFG:
     entry_node = graph.entry_node
     assert entry_node, "Cannot work with CFG without entry node"
     exit_nodes = graph.exit_nodes
     augmented_graph = graph.copy()
     start_node = pg.ProgramGraphNode(index=-sys.maxsize, is_artificial=True)
     augmented_graph.add_node(start_node)
     augmented_graph.add_edge(start_node, entry_node)
     for exit_node in exit_nodes:
         augmented_graph.add_edge(start_node, exit_node)
     return augmented_graph
Beispiel #5
0
    def _calculate_dominators(
        graph: cfg.CFG,
        dominance_map: Dict[pg.ProgramGraphNode, Set[pg.ProgramGraphNode]],
        node: pg.ProgramGraphNode,
    ) -> Set[pg.ProgramGraphNode]:
        dominators: Set[pg.ProgramGraphNode] = {node}
        intersection: Set[pg.ProgramGraphNode] = set()
        predecessors = graph.get_predecessors(node)
        if not predecessors:
            return set()

        first_time: bool = True
        for predecessor in predecessors:
            predecessor_dominators = dominance_map.get(predecessor)
            assert predecessor_dominators is not None, "Cannot be None"
            if first_time:
                intersection = intersection.union(predecessor_dominators)
                first_time = False
            else:
                intersection.intersection_update(predecessor_dominators)
        intersection = intersection.union(dominators)
        return intersection
def test_integration_create_cfg(conditional_jump_example_bytecode):
    cfg = CFG.from_bytecode(conditional_jump_example_bytecode)
    dot_representation = cfg.to_dot()
    graph = """strict digraph  {
"ProgramGraphNode(0)";
"ProgramGraphNode(1)";
"ProgramGraphNode(2)";
"ProgramGraphNode(3)";
"ProgramGraphNode(9223372036854775807)";
"ProgramGraphNode(-1)";
"ProgramGraphNode(0)" -> "ProgramGraphNode(1)";
"ProgramGraphNode(0)" -> "ProgramGraphNode(2)";
"ProgramGraphNode(1)" -> "ProgramGraphNode(3)";
"ProgramGraphNode(2)" -> "ProgramGraphNode(3)";
"ProgramGraphNode(3)" -> "ProgramGraphNode(9223372036854775807)";
"ProgramGraphNode(-1)" -> "ProgramGraphNode(0)";
}
"""
    assert cfg.cyclomatic_complexity == 2
    assert cfg.entry_node.is_artificial
    assert len(cfg.exit_nodes) == 1
    assert dot_representation == graph
def larger_control_flow_graph() -> CFG:
    graph = CFG(MagicMock())
    entry = ProgramGraphNode(index=-sys.maxsize)
    n_1 = ProgramGraphNode(index=1)
    n_2 = ProgramGraphNode(index=2)
    n_3 = ProgramGraphNode(index=3)
    n_5 = ProgramGraphNode(index=5)
    n_100 = ProgramGraphNode(index=100)
    n_110 = ProgramGraphNode(index=110)
    n_120 = ProgramGraphNode(index=120)
    n_130 = ProgramGraphNode(index=130)
    n_140 = ProgramGraphNode(index=140)
    n_150 = ProgramGraphNode(index=150)
    n_160 = ProgramGraphNode(index=160)
    n_170 = ProgramGraphNode(index=170)
    n_180 = ProgramGraphNode(index=180)
    n_190 = ProgramGraphNode(index=190)
    n_200 = ProgramGraphNode(index=200)
    n_210 = ProgramGraphNode(index=210)
    n_300 = ProgramGraphNode(index=300)
    n_exit = ProgramGraphNode(index=sys.maxsize)
    graph.add_node(entry)
    graph.add_node(n_1)
    graph.add_node(n_2)
    graph.add_node(n_3)
    graph.add_node(n_5)
    graph.add_node(n_100)
    graph.add_node(n_110)
    graph.add_node(n_120)
    graph.add_node(n_130)
    graph.add_node(n_140)
    graph.add_node(n_150)
    graph.add_node(n_160)
    graph.add_node(n_170)
    graph.add_node(n_180)
    graph.add_node(n_190)
    graph.add_node(n_200)
    graph.add_node(n_210)
    graph.add_node(n_300)
    graph.add_node(n_exit)
    graph.add_edge(entry, n_1)
    graph.add_edge(n_1, n_2)
    graph.add_edge(n_2, n_3)
    graph.add_edge(n_3, n_5)
    graph.add_edge(n_5, n_100)
    graph.add_edge(n_100, n_110)
    graph.add_edge(n_110, n_120, label="true")
    graph.add_edge(n_120, n_130)
    graph.add_edge(n_130, n_140)
    graph.add_edge(n_140, n_150, label="true")
    graph.add_edge(n_150, n_160)
    graph.add_edge(n_160, n_170, label="false")
    graph.add_edge(n_170, n_180)
    graph.add_edge(n_180, n_190)
    graph.add_edge(n_160, n_190, label="true")
    graph.add_edge(n_190, n_140)
    graph.add_edge(n_140, n_200, label="false")
    graph.add_edge(n_200, n_210)
    graph.add_edge(n_210, n_110)
    graph.add_edge(n_110, n_300, label="false")
    graph.add_edge(n_300, n_exit)
    return graph
def small_control_flow_graph() -> CFG:
    cfg = CFG(MagicMock())
    entry = ProgramGraphNode(index=0)
    n2 = ProgramGraphNode(index=2)
    n3 = ProgramGraphNode(index=3)
    n4 = ProgramGraphNode(index=4)
    n5 = ProgramGraphNode(index=5)
    n6 = ProgramGraphNode(index=6)
    exit_node = ProgramGraphNode(index=sys.maxsize)
    cfg.add_node(entry)
    cfg.add_node(n2)
    cfg.add_node(n3)
    cfg.add_node(n4)
    cfg.add_node(n5)
    cfg.add_node(n6)
    cfg.add_node(exit_node)
    cfg.add_edge(entry, n6)
    cfg.add_edge(n6, n5)
    cfg.add_edge(n5, n4)
    cfg.add_edge(n5, n3)
    cfg.add_edge(n4, n2)
    cfg.add_edge(n3, n2)
    cfg.add_edge(n2, exit_node)
    return cfg
Beispiel #9
0
    def _instrument_for_loop(
        self,
        cfg: CFG,
        dominator_tree: DominatorTree,
        node: ProgramGraphNode,
        code_object_id: int,
    ) -> int:
        """Transform the for loop whose header is defined in the given node.
        We only transform the underlying bytecode cfg, by partially unrolling the first
        iteration. For this, we add three basic blocks after the loop header:

        The first block is called, if the iterator on which the loop is based
        yields at least one element, in which case we report the boolean value True
        to the tracer, leave the yielded value of the iterator on top of the stack and
        jump to the the regular body of the loop.

        The second block is called, if the iterator on which the loop is based
        does not yield an element, in which case we report the boolean value False
        to the tracer and jump to the exit instruction of the loop.

        The third block acts as the new internal header of the for loop. It consists
        of a copy of the original "FOR_ITER" instruction of the loop.

        The original loop header is changed such that it either falls through to the
        first block or jumps to the second, if no element is yielded.

        Since Python is a structured programming language, there can be no jumps
        directly into the loop that bypass the loop header (e.g., GOTO).
        Jumps which reach the loop header from outside the loop will still target
        the original loop header, so they don't need to be modified.
        Jumps which originate from within the loop (e.g., break or continue) need
        to be redirected to the new internal header (3rd new block).
        We use a dominator tree to find and redirect the jumps of such instructions.

        Args:
            cfg: The CFG that contains the loop
            dominator_tree: The dominator tree of the given CFG.
            node: The node which contains the header of the for loop.
            code_object_id: The id of the containing Code Object.

        Returns:
            The ID of the instrumented predicate
        """
        assert node.basic_block is not None, "Basic block of for loop cannot be None."
        for_instr = node.basic_block[self._JUMP_OP_POS]
        assert for_instr.name == "FOR_ITER"
        lineno = for_instr.lineno
        predicate_id = self._tracer.register_predicate(
            PredicateMetaData(code_object_id, lineno))
        for_instr_copy = for_instr.copy()
        for_loop_exit = for_instr.arg
        for_loop_body = node.basic_block.next_block

        # pylint:disable=unbalanced-tuple-unpacking
        entered, not_entered, new_header = self._create_consecutive_blocks(
            cfg.bytecode_cfg(), node.basic_block, 3)
        for_instr.arg = not_entered

        entered.extend([
            Instr("LOAD_CONST", self._tracer, lineno=lineno),
            Instr(
                "LOAD_METHOD",
                ExecutionTracer.executed_bool_predicate.__name__,
                lineno=lineno,
            ),
            Instr("LOAD_CONST", True, lineno=lineno),
            Instr("LOAD_CONST", predicate_id, lineno=lineno),
            Instr("CALL_METHOD", 2, lineno=lineno),
            Instr("POP_TOP", lineno=lineno),
            Instr("JUMP_ABSOLUTE", for_loop_body, lineno=lineno),
        ])

        not_entered.extend([
            Instr("LOAD_CONST", self._tracer, lineno=lineno),
            Instr(
                "LOAD_METHOD",
                ExecutionTracer.executed_bool_predicate.__name__,
                lineno=lineno,
            ),
            Instr("LOAD_CONST", False, lineno=lineno),
            Instr("LOAD_CONST", predicate_id, lineno=lineno),
            Instr("CALL_METHOD", 2, lineno=lineno),
            Instr("POP_TOP", lineno=lineno),
            Instr("JUMP_ABSOLUTE", for_loop_exit, lineno=lineno),
        ])

        new_header.append(for_instr_copy)

        # Redirect internal jumps to the new loop header
        for successor in dominator_tree.get_transitive_successors(node):
            if (successor.basic_block is not None
                    and successor.basic_block[self._JUMP_OP_POS].arg is
                    node.basic_block):
                successor.basic_block[self._JUMP_OP_POS].arg = new_header
        return predicate_id
def test_integration_while_loop():
    while_loop_cfg = CFG.from_bytecode(Bytecode.from_code(Foo.receive.__code__))
    assert len(while_loop_cfg.nodes) == 3
    assert while_loop_cfg.entry_node.index == -1
    assert while_loop_cfg.exit_nodes.pop().index == sys.maxsize
def test_integration_copy_cfg(conditional_jump_example_bytecode):
    cfg = CFG.from_bytecode(conditional_jump_example_bytecode)
    copied_cfg = cfg.copy()
    assert copied_cfg.to_dot() == cfg.to_dot()