Ejemplo n.º 1
0
    def reduce_is_invalid(self, rule, ast, tokens, first, last):
        invalid = super(Python27Parser,
                        self).reduce_is_invalid(rule, ast, tokens, first, last)

        if invalid:
            return invalid

        if rule == ('and', ('expr', 'jmp_false', 'expr', '\\e_come_from_opt')):
            # If the instruction after the instructions formin "and"  is an "YIELD_VALUE"
            # then this is probably an "if" inside a comprehension.
            if tokens[last] == 'YIELD_VALUE':
                # Note: We might also consider testing last+1 being "POP_TOP"
                return True

            # Test that jmp_false jumps to the end of "and"
            # or that it jumps to the same place as the end of "and"
            jmp_false = ast[1][0]
            jmp_target = jmp_false.offset + jmp_false.attr + 3
            return not (jmp_target == tokens[last].offset
                        or tokens[last].pattr == jmp_false.pattr)
        elif rule[0] == ('raise_stmt1'):
            return ast[0] == 'expr' and ast[0][0] == 'or'
        elif rule[0] in ('assert', 'assert2'):
            jump_inst = ast[1][0]
            jump_target = jump_inst.attr
            return not (last >= len(tokens) or jump_target
                        == tokens[last].offset or jump_target == next_offset(
                            ast[-1].op, ast[-1].opc, ast[-1].offset))
        elif rule == ('list_if_not', ('expr', 'jmp_true', 'list_iter')):
            jump_inst = ast[1][0]
            jump_offset = jump_inst.attr
            return jump_offset > jump_inst.offset and jump_offset < tokens[
                last].offset
        elif rule == ('list_if', ('expr', 'jmp_false', 'list_iter')):
            jump_inst = ast[1][0]
            jump_offset = jump_inst.attr
            return jump_offset > jump_inst.offset and jump_offset < tokens[
                last].offset
        elif rule == ('or', ('expr', 'jmp_true', 'expr', '\\e_come_from_opt')):
            # Test that jmp_true doesn't jump inside the middle the "or"
            # or that it jumps to the same place as the end of "and"
            jmp_true = ast[1][0]
            jmp_target = jmp_true.offset + jmp_true.attr + 3
            return not (jmp_target == tokens[last].offset
                        or tokens[last].pattr == jmp_true.pattr)

        elif (rule[0] == 'whilestmt'
              and rule[1][0:-2] == ('SETUP_LOOP', 'testexpr', 'l_stmts_opt',
                                    'JUMP_BACK', 'JUMP_BACK')):
            # Make sure that the jump backs all go to the same place
            i = last - 1
            while (tokens[i] != 'JUMP_BACK'):
                i -= 1
            return tokens[i].attr != tokens[i - 1].attr
        elif rule[0] == 'if_expr_true':
            return (first) > 0 and tokens[first - 1] == 'POP_JUMP_IF_FALSE'

        return False
Ejemplo n.º 2
0
    def reduce_is_invalid(self, rule, ast, tokens, first, last):
        invalid = super(Python27Parser,
                        self).reduce_is_invalid(rule, ast, tokens, first, last)

        if invalid:
            return invalid

        if rule == ('and', ('expr', 'jmp_false', 'expr', '\\e_come_from_opt')):
            # Test that jmp_false jumps to the end of "and"
            # or that it jumps to the same place as the end of "and"
            jmp_false = ast[1][0]
            jmp_target = jmp_false.offset + jmp_false.attr + 3
            return not (jmp_target == tokens[last].offset
                        or tokens[last].pattr == jmp_false.pattr)
        elif rule[0] == ('raise_stmt1'):
            return ast[0] == 'expr' and ast[0][0] == 'or'
        elif rule[0] in ('assert', 'assert2'):
            jump_inst = ast[1][0]
            jump_target = jump_inst.attr
            return not (last >= len(tokens) or jump_target
                        == tokens[last].offset or jump_target == next_offset(
                            ast[-1].op, ast[-1].opc, ast[-1].offset))
        elif rule == ('list_if_not', ('expr', 'jmp_true', 'list_iter')):
            jump_inst = ast[1][0]
            jump_offset = jump_inst.attr
            return jump_offset > jump_inst.offset and jump_offset < tokens[
                last].offset
        elif rule == ('list_if', ('expr', 'jmp_false', 'list_iter')):
            jump_inst = ast[1][0]
            jump_offset = jump_inst.attr
            return jump_offset > jump_inst.offset and jump_offset < tokens[
                last].offset
        elif rule == ('or', ('expr', 'jmp_true', 'expr', '\\e_come_from_opt')):
            # Test that jmp_true doesn't jump inside the middle the "or"
            # or that it jumps to the same place as the end of "and"
            jmp_true = ast[1][0]
            jmp_target = jmp_true.offset + jmp_true.attr + 3
            return not (jmp_target == tokens[last].offset
                        or tokens[last].pattr == jmp_true.pattr)

        elif (rule[0] == 'whilestmt'
              and rule[1][0:-2] == ('SETUP_LOOP', 'testexpr', 'l_stmts_opt',
                                    'JUMP_BACK', 'JUMP_BACK')):
            # Make sure that the jump backs all go to the same place
            i = last - 1
            while (tokens[i] != 'JUMP_BACK'):
                i -= 1
            return tokens[i].attr != tokens[i - 1].attr
        # elif rule[0] == ('conditional_true'):
        #     # FIXME: the below is a hack: we check expr for
        #     # nodes that could have possibly been a been a Boolean.
        #     # We should also look for the presence of dead code.
        #     return ast[0] == 'expr' and ast[0] == 'or'

        return False
Ejemplo n.º 3
0
 def get_target(self, offset, extended_arg=0):
     """
     Get next instruction offset for op located at given <offset>.
     NOTE: extended_arg is no longer used
     """
     inst = self.get_inst(offset)
     if inst.opcode in self.opc.JREL_OPS | self.opc.JABS_OPS:
         target = inst.argval
     else:
         # No jump offset, so use fall-through offset
         target = next_offset(inst.opcode, self.opc, inst.offset)
     return target
Ejemplo n.º 4
0
    def detect_control_flow(self, offset, targets, inst_index):
        """
        Detect structures and their boundaries to fix optimized jumps
        Python 3.0 is more like Python 2.6 than it is Python 3.x.
        So we have a special routine here.
        """

        code = self.code
        op = self.insts[inst_index].opcode

        # Detect parent structure
        parent = self.structs[0]
        start = parent["start"]
        end = parent["end"]

        # Pick inner-most parent for our offset
        for struct in self.structs:
            current_start = struct["start"]
            current_end = struct["end"]
            if (current_start <= offset < current_end) and (
                    current_start >= start and current_end <= end):
                start = current_start
                end = current_end
                parent = struct

        if op == self.opc.SETUP_LOOP:
            # We categorize loop types: 'for', 'while', 'while 1' with
            # possibly suffixes '-loop' and '-else'
            # Try to find the jump_back instruction of the loop.
            # It could be a return instruction.

            start += instruction_size(op, self.opc)
            target = self.get_target(offset)
            end = self.restrict_to_parent(target, parent)
            self.setup_loops[target] = offset

            if target != end:
                self.fixed_jumps[offset] = end

            (line_no, next_line_byte) = self.lines[offset]
            jump_back = self.last_instr(start, end, self.opc.JUMP_ABSOLUTE,
                                        next_line_byte, False)

            if jump_back:
                jump_forward_offset = xdis.next_offset(code[jump_back],
                                                       self.opc, jump_back)
            else:
                jump_forward_offset = None

            return_val_offset1 = self.prev[self.prev[end]]

            if (jump_back and jump_back != self.prev_op[end]
                    and self.is_jump_forward(jump_forward_offset)):
                if code[self.prev_op[end]] == self.opc.RETURN_VALUE or (
                        code[self.prev_op[end]] == self.opc.POP_BLOCK
                        and code[return_val_offset1] == self.opc.RETURN_VALUE):
                    jump_back = None
            if not jump_back:
                # loop suite ends in return
                jump_back = self.last_instr(start, end, self.opc.RETURN_VALUE)
                if not jump_back:
                    return

                jb_inst = self.get_inst(jump_back)
                jump_back = self.next_offset(jb_inst.opcode, jump_back)

                if_offset = None
                if code[self.prev_op[next_line_byte]] not in JUMP_TF:
                    if_offset = self.prev[next_line_byte]
                if if_offset:
                    loop_type = "while"
                    self.ignore_if.add(if_offset)
                else:
                    loop_type = "for"
                target = next_line_byte
                end = jump_back + 3
            else:
                if self.get_target(jump_back) >= next_line_byte:
                    jump_back = self.last_instr(start, end,
                                                self.opc.JUMP_ABSOLUTE, start,
                                                False)

                jb_inst = self.get_inst(jump_back)

                jb_next_offset = self.next_offset(jb_inst.opcode, jump_back)
                if end > jb_next_offset and self.is_jump_forward(end):
                    if self.is_jump_forward(jb_next_offset):
                        if self.get_target(jump_back +
                                           4) == self.get_target(end):
                            self.fixed_jumps[offset] = jump_back + 4
                            end = jb_next_offset
                elif target < offset:
                    self.fixed_jumps[offset] = jump_back + 4
                    end = jb_next_offset

                target = self.get_target(jump_back)

                if code[target] in (self.opc.FOR_ITER, self.opc.GET_ITER):
                    loop_type = "for"
                else:
                    loop_type = "while"
                    test = self.prev_op[next_line_byte]

                    if test == offset:
                        loop_type = "while 1"
                    elif self.code[test] in self.opc.JUMP_OPs:
                        self.ignore_if.add(test)
                        test_target = self.get_target(test)
                        if test_target > (jump_back + 3):
                            jump_back = test_target
                self.not_continue.add(jump_back)
            self.loops.append(target)
            self.structs.append({
                "type": loop_type + "-loop",
                "start": target,
                "end": jump_back
            })
            after_jump_offset = xdis.next_offset(code[jump_back], self.opc,
                                                 jump_back)
            if self.get_inst(after_jump_offset).opname == "POP_TOP":
                after_jump_offset = xdis.next_offset(code[after_jump_offset],
                                                     self.opc,
                                                     after_jump_offset)
            if after_jump_offset != end:
                self.structs.append({
                    "type": loop_type + "-else",
                    "start": after_jump_offset,
                    "end": end,
                })
        elif op in self.pop_jump_tf:
            start = offset + instruction_size(op, self.opc)
            target = self.get_target(offset)
            rtarget = self.restrict_to_parent(target, parent)
            prev_op = self.prev_op

            # Do not let jump to go out of parent struct bounds
            if target != rtarget and parent["type"] == "and/or":
                self.fixed_jumps[offset] = rtarget
                return

            # Does this jump to right after another conditional jump that is
            # not myself?  If so, it's part of a larger conditional.
            # rocky: if we have a conditional jump to the next instruction, then
            # possibly I am "skipping over" a "pass" or null statement.

            if ((code[prev_op[target]] in self.pop_jump_if_pop)
                    and (target > offset) and prev_op[target] != offset):
                self.fixed_jumps[offset] = prev_op[target]
                self.structs.append({
                    "type": "and/or",
                    "start": start,
                    "end": prev_op[target]
                })
                return

            # The op offset just before the target jump offset is important
            # in making a determination of what we have. Save that.
            pre_rtarget = prev_op[rtarget]

            # Is it an "and" inside an "if" or "while" block
            if op == opc.JUMP_IF_FALSE:

                # Search for another JUMP_IF_FALSE targetting the same op,
                # in current statement, starting from current offset, and filter
                # everything inside inner 'or' jumps and midline ifs
                match = self.rem_or(start, self.next_stmt[offset],
                                    opc.JUMP_IF_FALSE, target)

                # If we still have any offsets in set, start working on it
                if match:
                    is_jump_forward = self.is_jump_forward(pre_rtarget)
                    if (is_jump_forward and pre_rtarget not in self.stmts
                            and self.restrict_to_parent(
                                self.get_target(pre_rtarget),
                                parent) == rtarget):
                        if (code[prev_op[pre_rtarget]]
                                == self.opc.JUMP_ABSOLUTE
                                and self.remove_mid_line_ifs([offset])
                                and target == self.get_target(
                                    prev_op[pre_rtarget]) and
                            (prev_op[pre_rtarget] not in self.stmts
                             or self.get_target(prev_op[pre_rtarget]) >
                             prev_op[pre_rtarget]) and 1 == len(
                                 self.remove_mid_line_ifs(
                                     self.rem_or(start, prev_op[pre_rtarget],
                                                 JUMP_TF, target)))):
                            pass
                        elif (code[prev_op[pre_rtarget]]
                              == self.opc.RETURN_VALUE
                              and self.remove_mid_line_ifs([offset])
                              and 1 == (len(
                                  set(
                                      self.remove_mid_line_ifs(
                                          self.rem_or(
                                              start,
                                              prev_op[pre_rtarget],
                                              JUMP_TF,
                                              target,
                                          )))
                                  | set(
                                      self.remove_mid_line_ifs(
                                          self.rem_or(
                                              start,
                                              prev_op[pre_rtarget],
                                              (
                                                  opc.JUMP_IF_FALSE,
                                                  opc.JUMP_IF_TRUE,
                                                  opc.JUMP_ABSOLUTE,
                                              ),
                                              pre_rtarget,
                                              True,
                                          )))))):
                            pass
                        else:
                            fix = None
                            jump_ifs = self.inst_matches(
                                start, self.next_stmt[offset],
                                opc.JUMP_IF_FALSE)
                            last_jump_good = True
                            for j in jump_ifs:
                                if target == self.get_target(j):
                                    # FIXME: remove magic number
                                    if self.lines[
                                            j].next == j + 3 and last_jump_good:
                                        fix = j
                                        break
                                else:
                                    last_jump_good = False
                            self.fixed_jumps[offset] = fix or match[-1]
                            return
                    else:
                        self.fixed_jumps[offset] = match[-1]
                        return
            # op == JUMP_IF_TRUE
            else:
                next = self.next_stmt[offset]
                if prev_op[next] == offset:
                    pass
                elif self.is_jump_forward(next) and target == self.get_target(
                        next):
                    if code[prev_op[next]] == opc.JUMP_IF_FALSE:
                        if (code[next] == self.opc.JUMP_FORWARD
                                or target != rtarget
                                or code[prev_op[pre_rtarget]]
                                not in (self.opc.JUMP_ABSOLUTE,
                                        self.opc.RETURN_VALUE)):
                            self.fixed_jumps[offset] = prev_op[next]
                            return
                elif (code[next] == self.opc.JUMP_ABSOLUTE
                      and self.is_jump_forward(target)
                      and self.get_target(target) == self.get_target(next)):
                    self.fixed_jumps[offset] = prev_op[next]
                    return

            # Don't add a struct for a while test, it's already taken care of
            if offset in self.ignore_if:
                return

            if (code[pre_rtarget] == self.opc.JUMP_ABSOLUTE
                    and pre_rtarget in self.stmts and pre_rtarget != offset
                    and prev_op[pre_rtarget] != offset
                    and not (code[rtarget] == self.opc.JUMP_ABSOLUTE
                             and code[rtarget + 3] == self.opc.POP_BLOCK
                             and code[prev_op[pre_rtarget]] !=
                             self.opc.JUMP_ABSOLUTE)):
                rtarget = pre_rtarget

            # Does the "jump if" jump beyond a jump op?
            # That is, we have something like:
            #  JUMP_IF_FALSE HERE
            #  ...
            # JUMP_FORWARD
            # HERE:
            #
            # If so, this can be block inside an "if" statement
            # or a conditional assignment like:
            #   x = 1 if x else 2
            #
            # There are other contexts we may need to consider
            # like whether the target is "END_FINALLY"
            # or if the condition jump is to a forward location
            if self.is_jump_forward(pre_rtarget):
                if_end = self.get_target(pre_rtarget, 0)

                # If the jump target is back, we are looping
                if if_end < pre_rtarget and (code[prev_op[if_end]]
                                             == self.opc.SETUP_LOOP):
                    if if_end > start:
                        return

                end = self.restrict_to_parent(if_end, parent)

                self.structs.append({
                    "type": "if-then",
                    "start": start,
                    "end": pre_rtarget
                })
                self.not_continue.add(pre_rtarget)

                # if rtarget < end and (
                #         code[rtarget] not in (self.opc.END_FINALLY,
                #                               self.opc.JUMP_ABSOLUTE) and
                #         code[prev_op[pre_rtarget]] not in (self.opc.POP_EXCEPT,
                #                                         self.opc.END_FINALLY)):
                #     self.structs.append({'type': 'else',
                #                          'start': rtarget,
                #                          'end': end})
                #     self.else_start[rtarget] = end
            elif self.is_jump_back(pre_rtarget, 0):
                if_end = rtarget
                self.structs.append({
                    "type": "if-then",
                    "start": start,
                    "end": pre_rtarget
                })
                self.not_continue.add(pre_rtarget)
            elif code[pre_rtarget] in (self.opc.RETURN_VALUE,
                                       self.opc.BREAK_LOOP):
                self.structs.append({
                    "type": "if-then",
                    "start": start,
                    "end": rtarget
                })
                # It is important to distingish if this return is inside some sort
                # except block return
                jump_prev = prev_op[offset]
                if self.is_pypy and code[jump_prev] == self.opc.COMPARE_OP:
                    if self.opc.cmp_op[code[jump_prev +
                                            1]] == "exception-match":
                        return
                if self.version >= 3.5:
                    # Python 3.5 may remove as dead code a JUMP
                    # instruction after a RETURN_VALUE. So we check
                    # based on seeing SETUP_EXCEPT various places.
                    if code[rtarget] == self.opc.SETUP_EXCEPT:
                        return
                    # Check that next instruction after pops and jump is
                    # not from SETUP_EXCEPT
                    next_op = rtarget
                    if code[next_op] == self.opc.POP_BLOCK:
                        next_op += instruction_size(self.code[next_op],
                                                    self.opc)
                    if code[next_op] == self.opc.JUMP_ABSOLUTE:
                        next_op += instruction_size(self.code[next_op],
                                                    self.opc)
                    if next_op in targets:
                        for try_op in targets[next_op]:
                            come_from_op = code[try_op]
                            if come_from_op == self.opc.SETUP_EXCEPT:
                                return
                            pass
                    pass
                if code[pre_rtarget] == self.opc.RETURN_VALUE:
                    if self.version == 3.0:
                        next_op = rtarget
                        if code[next_op] == self.opc.POP_TOP:
                            next_op = rtarget
                        for block in self.structs:
                            if (block["type"] == "while-loop"
                                    and block["end"] == next_op):
                                return
                        next_op += instruction_size(self.code[next_op],
                                                    self.opc)
                        if code[next_op] == self.opc.POP_BLOCK:
                            return
                    self.return_end_ifs.add(pre_rtarget)
                else:
                    self.fixed_jumps[offset] = rtarget
                    self.not_continue.add(pre_rtarget)

        elif op == self.opc.SETUP_EXCEPT:
            target = self.get_target(offset)
            end = self.restrict_to_parent(target, parent)
            self.fixed_jumps[offset] = end
        elif op == self.opc.SETUP_FINALLY:
            target = self.get_target(offset)
            end = self.restrict_to_parent(target, parent)
            self.fixed_jumps[offset] = end
        elif op in self.jump_if_pop:
            target = self.get_target(offset)
            if target > offset:
                unop_target = self.last_instr(offset, target,
                                              self.opc.JUMP_FORWARD, target)
                if unop_target and code[unop_target + 3] != self.opc.ROT_TWO:
                    self.fixed_jumps[offset] = unop_target
                else:
                    self.fixed_jumps[offset] = self.restrict_to_parent(
                        target, parent)
                    pass
                pass
        elif self.version >= 3.5:
            # 3.5+ has Jump optimization which too often causes RETURN_VALUE to get
            # misclassified as RETURN_END_IF. Handle that here.
            # In RETURN_VALUE, JUMP_ABSOLUTE, RETURN_VALUE is never RETURN_END_IF
            if op == self.opc.RETURN_VALUE:
                if (offset + 1 < len(code)
                        and code[offset + 1] == self.opc.JUMP_ABSOLUTE
                        and offset in self.return_end_ifs):
                    self.return_end_ifs.remove(offset)
                    pass
                pass
            elif op == self.opc.JUMP_FORWARD:
                # If we have:
                #   JUMP_FORWARD x, [non-jump, insns], RETURN_VALUE, x:
                # then RETURN_VALUE is not RETURN_END_IF
                rtarget = self.get_target(offset)
                rtarget_prev = self.prev[rtarget]
                if (code[rtarget_prev] == self.opc.RETURN_VALUE
                        and rtarget_prev in self.return_end_ifs):
                    i = rtarget_prev
                    while i != offset:
                        if code[i] in [opc.JUMP_FORWARD, opc.JUMP_ABSOLUTE]:
                            return
                        i = self.prev[i]
                    self.return_end_ifs.remove(rtarget_prev)
                pass
        return
Ejemplo n.º 5
0
    def ingest(self, co, classname=None, code_objects={}, show_asm=None):
        """
        Pick out tokens from an uncompyle6 code object, and transform them,
        returning a list of uncompyle6 Token's.

        The transformations are made to assist the deparsing grammar.
        Specificially:
           -  various types of LOAD_CONST's are categorized in terms of what they load
           -  COME_FROM instructions are added to assist parsing control structures
           -  MAKE_FUNCTION and FUNCTION_CALLS append the number of positional arguments
           -  some EXTENDED_ARGS instructions are removed

        Also, when we encounter certain tokens, we add them to a set which will cause custom
        grammar rules. Specifically, variable arg tokens like MAKE_FUNCTION or BUILD_LIST
        cause specific rules for the specific number of arguments they take.
        """

        if not show_asm:
            show_asm = self.show_asm

        bytecode = self.build_instructions(co)

        # show_asm = 'both'
        if show_asm in ('both', 'before'):
            for instr in bytecode.get_instructions(co):
                print(instr.disassemble())

        # list of tokens/instructions
        tokens = []

        # "customize" is in the process of going away here
        customize = {}

        if self.is_pypy:
            customize['PyPy'] = 0

        # Scan for assertions. Later we will
        # turn 'LOAD_GLOBAL' to 'LOAD_ASSERT'.
        # 'LOAD_ASSERT' is used in assert statements.
        self.load_asserts = set()

        n = len(self.insts)
        for i, inst in enumerate(self.insts):

            # We need to detect the difference between:
            #   raise AssertionError
            #  and
            #   assert ...
            # If we have a JUMP_FORWARD after the
            # RAISE_VARARGS then we have a "raise" statement
            # else we have an "assert" statement.
            if self.version == 3.0:
                # There is a an implied JUMP_IF_TRUE that we are not testing for (yet?) here
                assert_can_follow = inst.opname == 'POP_TOP' and i+1 < n
            else:
                assert_can_follow = inst.opname == 'POP_JUMP_IF_TRUE' and i+1 < n
            if assert_can_follow:
                next_inst = self.insts[i+1]
                if (next_inst.opname == 'LOAD_GLOBAL' and
                    next_inst.argval == 'AssertionError'):
                    if (i + 2 < n and self.insts[i+2].opname.startswith('RAISE_VARARGS')):
                        self.load_asserts.add(next_inst.offset)
                    pass
                pass

        # Get jump targets
        # Format: {target offset: [jump offsets]}
        jump_targets = self.find_jump_targets(show_asm)
        # print("XXX2", jump_targets)

        last_op_was_break = False

        for i, inst in enumerate(self.insts):

            argval = inst.argval
            op     = inst.opcode

            if inst.opname == 'EXTENDED_ARG':
                # FIXME: The EXTENDED_ARG is used to signal annotation
                # parameters
                if (i+1 < n and
                    self.insts[i+1].opcode != self.opc.MAKE_FUNCTION):
                    continue

            if inst.offset in jump_targets:
                jump_idx = 0
                # We want to process COME_FROMs to the same offset to be in *descending*
                # offset order so we have the larger range or biggest instruction interval
                # last. (I think they are sorted in increasing order, but for safety
                # we sort them). That way, specific COME_FROM tags will match up
                # properly. For example, a "loop" with an "if" nested in it should have the
                # "loop" tag last so the grammar rule matches that properly.
                for jump_offset in sorted(jump_targets[inst.offset], reverse=True):
                    come_from_name = 'COME_FROM'
                    opname = self.opname_for_offset(jump_offset)
                    if opname == 'EXTENDED_ARG':
                        j = xdis.next_offset(op, self.opc, jump_offset)
                        opname = self.opname_for_offset(j)

                    if opname.startswith('SETUP_'):
                        come_from_type = opname[len('SETUP_'):]
                        come_from_name = 'COME_FROM_%s' % come_from_type
                        pass
                    elif inst.offset in self.except_targets:
                        come_from_name = 'COME_FROM_EXCEPT_CLAUSE'
                    tokens.append(Token(come_from_name,
                                        jump_offset, repr(jump_offset),
                                        offset='%s_%s' % (inst.offset, jump_idx),
                                        has_arg = True, opc=self.opc))
                    jump_idx += 1
                    pass
                pass
            elif inst.offset in self.else_start:
                end_offset = self.else_start[inst.offset]
                tokens.append(Token('ELSE',
                                    None, repr(end_offset),
                                    offset='%s' % (inst.offset),
                                    has_arg = True, opc=self.opc))

                pass

            pattr  = inst.argrepr
            opname = inst.opname

            if op in self.opc.CONST_OPS:
                const = argval
                if iscode(const):
                    if const.co_name == '<lambda>':
                        assert opname == 'LOAD_CONST'
                        opname = 'LOAD_LAMBDA'
                    elif const.co_name == '<genexpr>':
                        opname = 'LOAD_GENEXPR'
                    elif const.co_name == '<dictcomp>':
                        opname = 'LOAD_DICTCOMP'
                    elif const.co_name == '<setcomp>':
                        opname = 'LOAD_SETCOMP'
                    elif const.co_name == '<listcomp>':
                        opname = 'LOAD_LISTCOMP'
                    # verify() uses 'pattr' for comparison, since 'attr'
                    # now holds Code(const) and thus can not be used
                    # for comparison (todo: think about changing this)
                    # pattr = 'code_object @ 0x%x %s->%s' %\
                    # (id(const), const.co_filename, const.co_name)
                    pattr = '<code_object ' + const.co_name + '>'
                else:
                    if isinstance(inst.arg, int) and inst.arg < len(co.co_consts):
                        argval, _ = _get_const_info(inst.arg, co.co_consts)
                    # Why don't we use _ above for "pattr" rather than "const"?
                    # This *is* a little hoaky, but we have to coordinate with
                    # other parts like n_LOAD_CONST in pysource.py for example.
                    pattr = const
                    pass
            elif opname in ('MAKE_FUNCTION', 'MAKE_CLOSURE'):
                if self.version >= 3.6:
                    # 3.6+ doesn't have MAKE_CLOSURE, so opname == 'MAKE_FUNCTION'
                    flags = argval
                    opname = 'MAKE_FUNCTION_%d' % (flags)
                    attr = []
                    for flag in self.MAKE_FUNCTION_FLAGS:
                        bit = flags & 1
                        attr.append(bit)
                        flags >>= 1
                    attr = attr[:4] # remove last value: attr[5] == False
                else:
                    pos_args, name_pair_args, annotate_args = parse_fn_counts(inst.argval)
                    pattr = ("%d positional, %d keyword pair, %d annotated" %
                                 (pos_args, name_pair_args, annotate_args))
                    if name_pair_args > 0:
                        opname = '%s_N%d' % (opname, name_pair_args)
                        pass
                    if annotate_args > 0:
                        opname = '%s_A_%d' % (opname, annotate_args)
                        pass
                    opname = '%s_%d' % (opname, pos_args)
                    attr = (pos_args, name_pair_args, annotate_args)
                tokens.append(
                    Token(
                        opname = opname,
                        attr = attr,
                        pattr = pattr,
                        offset = inst.offset,
                        linestart = inst.starts_line,
                        op = op,
                        has_arg = inst.has_arg,
                        opc = self.opc
                    )
                )
                continue
            elif op in self.varargs_ops:
                pos_args = argval
                if self.is_pypy and not pos_args and opname == 'BUILD_MAP':
                    opname = 'BUILD_MAP_n'
                else:
                    opname = '%s_%d' % (opname, pos_args)

            elif self.is_pypy and opname == 'JUMP_IF_NOT_DEBUG':
                # The value in the dict is in special cases in semantic actions, such
                # as JUMP_IF_NOT_DEBUG. The value is not used in these cases, so we put
                # in arbitrary value 0.
                customize[opname] = 0
            elif opname == 'UNPACK_EX':
                # FIXME: try with scanner and parser by
                # changing argval
                before_args = argval & 0xFF
                after_args = (argval >> 8) & 0xff
                pattr = "%d before vararg, %d after" % (before_args, after_args)
                argval = (before_args, after_args)
                opname = '%s_%d+%d' % (opname, before_args, after_args)

            elif op == self.opc.JUMP_ABSOLUTE:
                # Further classify JUMP_ABSOLUTE into backward jumps
                # which are used in loops, and "CONTINUE" jumps which
                # may appear in a "continue" statement.  The loop-type
                # and continue-type jumps will help us classify loop
                # boundaries The continue-type jumps help us get
                # "continue" statements with would otherwise be turned
                # into a "pass" statement because JUMPs are sometimes
                # ignored in rules as just boundary overhead. In
                # comprehensions we might sometimes classify JUMP_BACK
                # as CONTINUE, but that's okay since we add a grammar
                # rule for that.
                pattr = argval
                target = self.get_target(inst.offset)
                if target <= inst.offset:
                    next_opname = self.insts[i+1].opname

                    # 'Continue's include jumps to loops that are not
                    # and the end of a block which follow with POP_BLOCK and COME_FROM_LOOP.
                    # If the JUMP_ABSOLUTE is to a FOR_ITER and it is followed by another JUMP_FORWARD
                    # then we'll take it as a "continue".
                    is_continue = (self.insts[self.offset2inst_index[target]]
                                  .opname == 'FOR_ITER'
                                  and self.insts[i+1].opname == 'JUMP_FORWARD')

                    if (is_continue or
                        (inst.offset in self.stmts and (inst.starts_line and
                        next_opname not in self.not_continue_follow))):
                        opname = 'CONTINUE'
                    else:
                        opname = 'JUMP_BACK'
                        # FIXME: this is a hack to catch stuff like:
                        #   if x: continue
                        # the "continue" is not on a new line.
                        # There are other situations where we don't catch
                        # CONTINUE as well.
                        if tokens[-1].kind == 'JUMP_BACK' and tokens[-1].attr <= argval:
                            if tokens[-2].kind == 'BREAK_LOOP':
                                del tokens[-1]
                            else:
                                # intern is used because we are changing the *previous* token
                                tokens[-1].kind = intern('CONTINUE')
                    if last_op_was_break and opname == 'CONTINUE':
                        last_op_was_break = False
                        continue

            # FIXME: go over for Python 3.6+. This is sometimes wrong
            elif op == self.opc.RETURN_VALUE:
                if inst.offset in self.return_end_ifs:
                    opname = 'RETURN_END_IF'

            elif inst.offset in self.load_asserts:
                opname = 'LOAD_ASSERT'

            last_op_was_break = opname == 'BREAK_LOOP'
            tokens.append(
                Token(
                    opname = opname,
                    attr = argval,
                    pattr = pattr,
                    offset = inst.offset,
                    linestart = inst.starts_line,
                    op = op,
                    has_arg = inst.has_arg,
                    opc = self.opc
                    )
                )
            pass

        if show_asm in ('both', 'after'):
            for t in tokens:
                print(t.format(line_prefix='L.'))
            print()
        return tokens, customize
Ejemplo n.º 6
0
def basic_blocks(version, is_pypy, fn):
    """Create a list of basic blocks found in a code object
    """

    BB = BBMgr(version, is_pypy)

    # Get jump targets
    jump_targets = set()
    for inst in get_instructions(fn):
        op = inst.opcode
        offset = inst.offset
        follow_offset = next_offset(op, BB.opcode, offset)
        if op in BB.JUMP_INSTRUCTIONS:
            if op in BB.JABS_INSTRUCTIONS:
                jump_offset = inst.arg
            else:
                jump_offset = follow_offset + inst.arg
            jump_targets.add(jump_offset)
            pass

    start_offset = 0
    end_offset = -1
    jump_offsets = set()
    prev_offset = -1
    endloop_offsets = [-1]
    flags = set([BB_ENTRY])

    for inst in get_instructions(fn):
        prev_offset = end_offset
        end_offset = inst.offset
        op = inst.opcode
        offset = inst.offset
        follow_offset = next_offset(op, BB.opcode, offset)

        if op == BB.opcode.SETUP_LOOP:
            jump_offset = follow_offset + inst.arg
            endloop_offsets.append(jump_offset)
        elif offset == endloop_offsets[-1]:
            endloop_offsets.pop()
        pass

        if op in BB.LOOP_INSTRUCTIONS:
            flags.add(BB_LOOP)
        elif op in BB.BREAK_INSTRUCTIONS:
            flags.add(BB_BREAK)
            jump_offsets.add(endloop_offsets[-1])
            flags, jump_offsets = BB.add_bb(start_offset, end_offset,
                                            follow_offset, flags, jump_offsets)
            start_offset = follow_offset

        if offset in jump_targets:
            # Fallthrough path and jump target path.
            # This instruction definitely starts a new basic block
            # Close off any prior basic block
            if start_offset < end_offset:
                flags, jump_offsets = BB.add_bb(start_offset, prev_offset,
                                                end_offset, flags,
                                                jump_offsets)
                start_offset = end_offset

        # Add block flags for certain classes of instructions
        if op in BB.BLOCK_INSTRUCTIONS:
            flags.add(BB_BLOCK)
        elif op in BB.EXCEPT_INSTRUCTIONS:
            flags.add(BB_EXCEPT)
        elif op in BB.FINALLY_INSTRUCTIONS:
            flags.add(BB_FINALLY)
        elif op in BB.FOR_INSTRUCTIONS:
            flags.add(BB_FOR)
        elif op in BB.JUMP_INSTRUCTIONS:
            # Some sort of jump instruction.
            # While in theory an absolute jump could be part of the
            # same (extened) basic block, for our purposes we would like to
            # call them two basic blocks as that probably mirrors
            # the code more simply.

            # Figure out where we jump to amd add it to this
            # basic block's jump offsets.
            if op in BB.JABS_INSTRUCTIONS:
                jump_offset = inst.arg
            else:
                jump_offset = follow_offset + inst.arg

            jump_offsets.add(jump_offset)
            if op in BB.JUMP_UNCONDITONAL:
                flags.add(BB_JUMP_UNCONDITIONAL)
                flags, jump_offsets = BB.add_bb(start_offset, end_offset,
                                                follow_offset, flags,
                                                jump_offsets)
                start_offset = follow_offset
            elif op != BB.opcode.SETUP_LOOP:
                flags, jump_offsets = BB.add_bb(start_offset, end_offset,
                                                follow_offset, flags,
                                                jump_offsets)
                start_offset = follow_offset

                pass
        elif op in BB.NOFOLLOW_INSTRUCTIONS:
            flags.add(BB_NOFOLLOW)
            flags, jump_offsets = BB.add_bb(start_offset, end_offset,
                                            follow_offset, flags, jump_offsets)
            start_offset = follow_offset
            pass
        pass

    if len(BB.bb_list):
        BB.bb_list[-1].follow_offset = None

    # Add remaining instructions?
    if start_offset <= end_offset:
        BB.bb_list.append(
            BasicBlock(start_offset,
                       end_offset,
                       None,
                       flags=flags,
                       jump_offsets=jump_offsets))

    return BB.bb_list
Ejemplo n.º 7
0
 def next_offset(self, op, offset: int) -> int:
     return xdis.next_offset(op, self.opc, offset)
Ejemplo n.º 8
0
    def reduce_is_invalid(self, rule, ast, tokens, first, last):
        invalid = super(Python27Parser,
                        self).reduce_is_invalid(rule, ast, tokens, first, last)

        if invalid:
            return invalid

        if rule == ("and", ("expr", "jmp_false", "expr", "\\e_come_from_opt")):
            # If the instruction after the instructions forming the "and"  is an "YIELD_VALUE"
            # then this is probably an "if" inside a comprehension.
            if tokens[last] == "YIELD_VALUE":
                # Note: We might also consider testing last+1 being "POP_TOP"
                return True

            # Test that jump_false jump somewhere beyond the end of the "and"
            # it might not be exactly the end of the "and" because this and can
            # be a part of a larger condition. Oddly in 2.7 there doesn't seem to be
            # an optimization where the "and" jump_false is back to a loop.
            jmp_false = ast[1]
            if jmp_false[0] == "POP_JUMP_IF_FALSE":
                while (first < last and isinstance(tokens[last].offset, str)):
                    last -= 1
                if jmp_false[0].attr < tokens[last].offset:
                    return True

            # Test that jmp_false jumps to the end of "and"
            # or that it jumps to the same place as the end of "and"
            jmp_false = ast[1][0]
            jmp_target = jmp_false.offset + jmp_false.attr + 3
            return not (jmp_target == tokens[last].offset
                        or tokens[last].pattr == jmp_false.pattr)
        elif rule == ("comp_if", ("expr", "jmp_false", "comp_iter")):
            jmp_false = ast[1]
            if jmp_false[0] == "POP_JUMP_IF_FALSE":
                return tokens[first].offset < jmp_false[0].attr < tokens[
                    last].offset
            pass
        elif (rule[0], rule[1][0:5]) == ("conditional",
                                         ("expr", "jmp_false", "expr",
                                          "JUMP_ABSOLUTE", "expr")):
            jmp_false = ast[1]
            if jmp_false[0] == "POP_JUMP_IF_FALSE":
                else_instr = ast[4].first_child()
                if jmp_false[0].attr != else_instr.offset:
                    return True
                end_offset = ast[3].attr
                return end_offset < tokens[last].offset
            pass
        elif rule[0] == ("raise_stmt1"):
            return ast[0] == "expr" and ast[0][0] == "or"
        elif rule[0] in ("assert", "assert2"):
            jump_inst = ast[1][0]
            jump_target = jump_inst.attr
            return not (last >= len(tokens) or jump_target
                        == tokens[last].offset or jump_target == next_offset(
                            ast[-1].op, ast[-1].opc, ast[-1].offset))
        elif rule == ("iflaststmtl", ("testexpr", "c_stmts")):
            testexpr = ast[0]
            if testexpr[0] in ("testfalse", "testtrue"):
                test = testexpr[0]
                if len(test) > 1 and test[1].kind.startswith("jmp_"):
                    jmp_target = test[1][0].attr
                    if last == len(tokens):
                        last -= 1
                    while (isinstance(tokens[first].offset, str)
                           and first < last):
                        first += 1
                    if first == last:
                        return True
                    while (first < last
                           and isinstance(tokens[last].offset, str)):
                        last -= 1
                    return tokens[first].off2int(
                    ) < jmp_target < tokens[last].off2int()
                    pass
                pass
            pass
        elif rule == ("list_if_not", ("expr", "jmp_true", "list_iter")):
            jump_inst = ast[1][0]
            jump_offset = jump_inst.attr
            return jump_offset > jump_inst.offset and jump_offset < tokens[
                last].offset
        elif rule == ("list_if", ("expr", "jmp_false", "list_iter")):
            jump_inst = ast[1][0]
            jump_offset = jump_inst.attr
            return jump_offset > jump_inst.offset and jump_offset < tokens[
                last].offset
        elif rule == ("or", ("expr", "jmp_true", "expr", "\\e_come_from_opt")):
            # Test that jmp_true doesn"t jump inside the middle the "or"
            # or that it jumps to the same place as the end of "and"
            jmp_true = ast[1][0]
            jmp_target = jmp_true.offset + jmp_true.attr + 3
            return not (jmp_target == tokens[last].offset
                        or tokens[last].pattr == jmp_true.pattr)

        elif (rule[0] == "whilestmt"
              and rule[1][0:-2] == ("SETUP_LOOP", "testexpr", "l_stmts_opt",
                                    "JUMP_BACK", "JUMP_BACK")):
            # Make sure that the jump backs all go to the same place
            i = last - 1
            while (tokens[i] != "JUMP_BACK"):
                i -= 1
            return tokens[i].attr != tokens[i - 1].attr
        elif rule[0] == "if_expr_true":
            return (first) > 0 and tokens[first - 1] == "POP_JUMP_IF_FALSE"

        return False
Ejemplo n.º 9
0
    def find_jump_targets(self, debug):
        """
        Detect all offsets in a byte code which are jump targets
        where we might insert a COME_FROM instruction.

        Return the list of offsets.

        Return the list of offsets. An instruction can be jumped
        to in from multiple instructions.
        """
        code = self.code
        n = len(code)
        self.structs = [{'type':  'root',
                         'start': 0,
                         'end':   n-1}]

        # All loop entry points
        self.loops = []

        # Map fixed jumps to their real destination
        self.fixed_jumps = {}
        self.except_targets = {}
        self.ignore_if = set()
        self.build_statement_indices()
        self.else_start = {}

        # Containers filled by detect_control_flow()
        self.not_continue = set()
        self.return_end_ifs = set()
        self.setup_loop_targets = {}  # target given setup_loop offset
        self.setup_loops = {}  # setup_loop offset given target

        targets = {}
        extended_arg = 0
        for offset in self.op_range(0, n):
            op = code[offset]

            if op == self.opc.EXTENDED_ARG:
                arg = code2num(code, offset+1) | extended_arg
                extended_arg = self.extended_arg_val(arg)
                continue

            # Determine structures and fix jumps in Python versions
            # since 2.3
            self.detect_control_flow(offset, targets, extended_arg)

            has_arg = (op >= op3.HAVE_ARGUMENT)
            if has_arg:
                label = self.fixed_jumps.get(offset)
                if self.version >= 3.6:
                    oparg = code[offset+1]
                else:
                    oparg = code[offset+1] + code[offset+2] * 256
                next_offset = xdis.next_offset(op, self.opc, offset)

                if label is None:
                    if op in op3.hasjrel and op != self.opc.FOR_ITER:
                        label = next_offset + oparg
                    elif op in op3.hasjabs:
                        if op in self.jump_if_pop:
                            if oparg > offset:
                                label = oparg

                if label is not None and label != -1:
                    targets[label] = targets.get(label, []) + [offset]
            elif op == self.opc.END_FINALLY and offset in self.fixed_jumps:
                label = self.fixed_jumps[offset]
                targets[label] = targets.get(label, []) + [offset]
                pass

            extended_arg = 0
            pass # for loop

        # DEBUG:
        if debug in ('both', 'after'):
            import pprint as pp
            pp.pprint(self.structs)

        return targets
Ejemplo n.º 10
0
    def ingest(self, co, classname=None, code_objects={}, show_asm=None):
        """
        Pick out tokens from an decompyle3 code object, and transform them,
        returning a list of decompyle3 Token's.

        The transformations are made to assist the deparsing grammar.
        Specificially:
           -  various types of LOAD_CONST's are categorized in terms of what they load
           -  COME_FROM instructions are added to assist parsing control structures
           -  MAKE_FUNCTION and FUNCTION_CALLS append the number of positional arguments
           -  some EXTENDED_ARGS instructions are removed

        Also, when we encounter certain tokens, we add them to a set which will cause custom
        grammar rules. Specifically, variable arg tokens like MAKE_FUNCTION or BUILD_LIST
        cause specific rules for the specific number of arguments they take.
        """

        def tokens_append(j, token):
            tokens.append(token)
            self.offset2tok_index[token.offset] = j
            j += 1
            assert j == len(tokens)
            return j

        if not show_asm:
            show_asm = self.show_asm

        bytecode = self.build_instructions(co)

        # show_asm = 'both'
        if show_asm in ("both", "before"):
            for instr in bytecode.get_instructions(co):
                print(instr.disassemble())

        # "customize" is in the process of going away here
        customize = {}

        if self.is_pypy:
            customize["PyPy"] = 0

        # Scan for assertions. Later we will
        # turn 'LOAD_GLOBAL' to 'LOAD_ASSERT'.
        # 'LOAD_ASSERT' is used in assert statements.
        self.load_asserts = set()

        # list of tokens/instructions
        tokens = []
        self.offset2tok_index = {}

        n = len(self.insts)
        for i, inst in enumerate(self.insts):

            # We need to detect the difference between:
            #   raise AssertionError
            #  and
            #   assert ...
            # If we have a JUMP_FORWARD after the
            # RAISE_VARARGS then we have a "raise" statement
            # else we have an "assert" statement.
            assert_can_follow = inst.opname == "POP_JUMP_IF_TRUE" and i + 1 < n
            if assert_can_follow:
                next_inst = self.insts[i + 1]
                if (
                    next_inst.opname == "LOAD_GLOBAL"
                    and next_inst.argval == "AssertionError"
                ):
                    raise_idx = self.offset2inst_index[self.prev_op[inst.argval]]
                    raise_inst = self.insts[raise_idx]
                    if raise_inst.opname.startswith("RAISE_VARARGS"):
                        self.load_asserts.add(next_inst.offset)
                    pass
                pass

        # Operand values in Python wordcode are small. As a result,
        # there are these EXTENDED_ARG instructions - way more than
        # before 3.6. These parsing a lot of pain.

        # To simplify things we want to untangle this. We also
        # do this loop before we compute jump targets.
        for i, inst in enumerate(self.insts):

            # One artifact of the "too-small" operand problem, is that
            # some backward jumps, are turned into forward jumps to another
            # "extended arg" backward jump to the same location.
            if inst.opname == "JUMP_FORWARD":
                jump_inst = self.insts[self.offset2inst_index[inst.argval]]
                if jump_inst.has_extended_arg and jump_inst.opname.startswith("JUMP"):
                    # Create comination of the jump-to instruction and
                    # this one. Keep the position information of this instruction,
                    # but the operator and operand properties come from the other
                    # instruction
                    self.insts[i] = Instruction(
                        jump_inst.opname,
                        jump_inst.opcode,
                        jump_inst.optype,
                        jump_inst.inst_size,
                        jump_inst.arg,
                        jump_inst.argval,
                        jump_inst.argrepr,
                        jump_inst.has_arg,
                        inst.offset,
                        inst.starts_line,
                        inst.is_jump_target,
                        inst.has_extended_arg,
                    )

        # Get jump targets
        # Format: {target offset: [jump offsets]}
        jump_targets = self.find_jump_targets(show_asm)
        # print("XXX2", jump_targets)

        last_op_was_break = False

        j = 0
        for i, inst in enumerate(self.insts):

            argval = inst.argval
            op = inst.opcode

            if inst.opname == "EXTENDED_ARG":
                # FIXME: The EXTENDED_ARG is used to signal annotation
                # parameters
                if i + 1 < n and self.insts[i + 1].opcode != self.opc.MAKE_FUNCTION:
                    continue

            if inst.offset in jump_targets:
                jump_idx = 0
                # We want to process COME_FROMs to the same offset to be in *descending*
                # offset order so we have the larger range or biggest instruction interval
                # last. (I think they are sorted in increasing order, but for safety
                # we sort them). That way, specific COME_FROM tags will match up
                # properly. For example, a "loop" with an "if" nested in it should have the
                # "loop" tag last so the grammar rule matches that properly.
                for jump_offset in sorted(jump_targets[inst.offset], reverse=True):
                    come_from_name = "COME_FROM"

                    opname = self.opname_for_offset(jump_offset)
                    if opname == "EXTENDED_ARG":
                        k = xdis.next_offset(op, self.opc, jump_offset)
                        opname = self.opname_for_offset(k)

                    if opname.startswith("SETUP_"):
                        come_from_type = opname[len("SETUP_") :]
                        come_from_name = "COME_FROM_%s" % come_from_type
                        pass
                    elif inst.offset in self.except_targets:
                        come_from_name = "COME_FROM_EXCEPT_CLAUSE"
                    j = tokens_append(
                        j,
                        Token(
                            come_from_name,
                            jump_offset,
                            repr(jump_offset),
                            offset="%s_%s" % (inst.offset, jump_idx),
                            has_arg=True,
                            opc=self.opc,
                            has_extended_arg=False,
                        ),
                    )
                    jump_idx += 1
                    pass
                pass

            pattr = inst.argrepr
            opname = inst.opname

            if op in self.opc.CONST_OPS:
                const = argval
                if iscode(const):
                    if const.co_name == "<lambda>":
                        assert opname == "LOAD_CONST"
                        opname = "LOAD_LAMBDA"
                    elif const.co_name == "<genexpr>":
                        opname = "LOAD_GENEXPR"
                    elif const.co_name == "<dictcomp>":
                        opname = "LOAD_DICTCOMP"
                    elif const.co_name == "<setcomp>":
                        opname = "LOAD_SETCOMP"
                    elif const.co_name == "<listcomp>":
                        opname = "LOAD_LISTCOMP"
                    else:
                        opname = "LOAD_CODE"
                    # verify() uses 'pattr' for comparison, since 'attr'
                    # now holds Code(const) and thus can not be used
                    # for comparison (todo: think about changing this)
                    # pattr = 'code_object @ 0x%x %s->%s' %\
                    # (id(const), const.co_filename, const.co_name)
                    pattr = "<code_object " + const.co_name + ">"
                elif isinstance(const, str):
                    opname = "LOAD_STR"
                else:
                    if isinstance(inst.arg, int) and inst.arg < len(co.co_consts):
                        argval, _ = _get_const_info(inst.arg, co.co_consts)
                    # Why don't we use _ above for "pattr" rather than "const"?
                    # This *is* a little hoaky, but we have to coordinate with
                    # other parts like n_LOAD_CONST in pysource.py for example.
                    pattr = const
                    pass
            elif opname == "IMPORT_NAME":
                if "." in inst.argval:
                    opname = "IMPORT_NAME_ATTR"
                    pass
            elif opname in ("MAKE_FUNCTION", "MAKE_CLOSURE"):
                flags = argval
                opname = "MAKE_FUNCTION_%d" % (flags)
                attr = []
                for flag in self.MAKE_FUNCTION_FLAGS:
                    bit = flags & 1
                    attr.append(bit)
                    flags >>= 1
                attr = attr[:4]  # remove last value: attr[5] == False
                j = tokens_append(
                    j,
                    Token(
                        opname=opname,
                        attr=attr,
                        pattr=pattr,
                        offset=inst.offset,
                        linestart=inst.starts_line,
                        op=op,
                        has_arg=inst.has_arg,
                        opc=self.opc,
                        has_extended_arg=inst.has_extended_arg,
                    ),
                )
                continue
            elif op in self.varargs_ops:
                pos_args = argval
                if self.is_pypy and not pos_args and opname == "BUILD_MAP":
                    opname = "BUILD_MAP_n"
                else:
                    opname = "%s_%d" % (opname, pos_args)

            elif self.is_pypy and opname == "JUMP_IF_NOT_DEBUG":
                # The value in the dict is in special cases in semantic actions, such
                # as JUMP_IF_NOT_DEBUG. The value is not used in these cases, so we put
                # in arbitrary value 0.
                customize[opname] = 0
            elif opname == "UNPACK_EX":
                # FIXME: try with scanner and parser by
                # changing argval
                before_args = argval & 0xFF
                after_args = (argval >> 8) & 0xFF
                pattr = "%d before vararg, %d after" % (before_args, after_args)
                argval = (before_args, after_args)
                opname = "%s_%d+%d" % (opname, before_args, after_args)

            elif op == self.opc.JUMP_ABSOLUTE:
                #  Refine JUMP_ABSOLUTE further in into:
                #
                # * "JUMP_BACK"    - which are are used in loops. This is sometimes
                #                   found at the end of a looping construct
                # * "BREAK_LOOP"  - which are are used to break loops.
                # * "CONTINUE"    - jumps which may appear in a "continue" statement.
                #                   It is okay to confuse this with JUMP_BACK. The
                #                   grammar should tolerate this.
                # * "JUMP_FORWARD - forward jumps that are not BREAK_LOOP jumps.
                #
                # The loop-type and continue-type jumps will help us
                # classify loop boundaries The continue-type jumps
                # help us get "continue" statements with would
                # otherwise be turned into a "pass" statement because
                # JUMPs are sometimes ignored in rules as just
                # boundary overhead. Again, in comprehensions we might
                # sometimes classify JUMP_BACK as CONTINUE, but that's
                # okay since grammar rules should tolerate that.
                pattr = argval
                target = self.get_target(inst.offset)
                if target <= inst.offset:
                    next_opname = self.insts[i + 1].opname

                    # 'Continue's include jumps to loops that are not
                    # and the end of a block which follow with POP_BLOCK and COME_FROM_LOOP.
                    # If the JUMP_ABSOLUTE is to a FOR_ITER and it is followed by another JUMP_FORWARD
                    # then we'll take it as a "continue".
                    is_continue = (
                        self.insts[self.offset2inst_index[target]].opname == "FOR_ITER"
                        and self.insts[i + 1].opname == "JUMP_FORWARD"
                    )

                    if self.version < 3.8 and (
                        is_continue
                        or (
                            inst.offset in self.stmts
                            and (
                                inst.starts_line
                                and next_opname not in self.not_continue_follow
                            )
                        )
                    ):
                        opname = "CONTINUE"
                    else:
                        opname = "JUMP_BACK"
                        # FIXME: this is a hack to catch stuff like:
                        #   if x: continue
                        # the "continue" is not on a new line.
                        # There are other situations where we don't catch
                        # CONTINUE as well.
                        if tokens[-1].kind == "JUMP_BACK" and tokens[-1].attr <= argval:
                            if tokens[-2].kind == "BREAK_LOOP":
                                del tokens[-1]
                            else:
                                # intern is used because we are changing the *previous* token.
                                # A POP_TOP suggests a "break" rather than a "continue"?
                                if tokens[-2] == "POP_TOP":
                                    tokens[-1].kind = sys.intern("BREAK_LOOP")
                                else:
                                    tokens[-1].kind = sys.intern("CONTINUE")
                                    pass
                                pass
                            pass
                    if last_op_was_break and opname == "CONTINUE":
                        last_op_was_break = False
                        continue
                    pass
                else:
                    opname = "JUMP_FORWARD"

            elif opname.startswith("POP_JUMP_IF_") and not inst.jumps_forward():
                opname += "_BACK"
            elif inst.offset in self.load_asserts:
                opname = "LOAD_ASSERT"

            last_op_was_break = opname == "BREAK_LOOP"
            j = tokens_append(
                j,
                Token(
                    opname=opname,
                    attr=argval,
                    pattr=pattr,
                    offset=inst.offset,
                    linestart=inst.starts_line,
                    op=op,
                    has_arg=inst.has_arg,
                    opc=self.opc,
                    has_extended_arg=inst.has_extended_arg,
                ),
            )
            pass

        if show_asm in ("both", "after"):
            for t in tokens:
                print(t.format(line_prefix=""))
            print()
        return tokens, customize
Ejemplo n.º 11
0
    def detect_control_flow(self, offset, targets, inst_index):
        """
        Detect type of block structures and their boundaries to fix optimized jumps
        in python2.3+
        """

        code = self.code
        inst = self.insts[inst_index]
        op = inst.opcode

        # Detect parent structure
        parent = self.structs[0]
        start = parent["start"]
        end = parent["end"]

        # Pick inner-most parent for our offset
        for struct in self.structs:
            current_start = struct["start"]
            current_end = struct["end"]
            if (current_start <= offset < current_end) and (
                    current_start >= start and current_end <= end):
                start = current_start
                end = current_end
                parent = struct

        if self.version < 3.8 and op == self.opc.SETUP_LOOP:
            # We categorize loop types: 'for', 'while', 'while 1' with
            # possibly suffixes '-loop' and '-else'
            # Try to find the jump_back instruction of the loop.
            # It could be a return instruction.

            start += inst.inst_size
            target = self.get_target(offset)
            end = self.restrict_to_parent(target, parent)
            self.setup_loops[target] = offset

            if target != end:
                self.fixed_jumps[offset] = end

            (line_no, next_line_byte) = self.lines[offset]
            jump_back = self.last_instr(start, end, self.opc.JUMP_ABSOLUTE,
                                        next_line_byte, False)

            if jump_back:
                jump_forward_offset = xdis.next_offset(code[jump_back],
                                                       self.opc, jump_back)
            else:
                jump_forward_offset = None

            return_val_offset1 = self.prev[self.prev[end]]

            if (jump_back and jump_back != self.prev_op[end]
                    and self.is_jump_forward(jump_forward_offset)):
                if code[self.prev_op[end]] == self.opc.RETURN_VALUE or (
                        code[self.prev_op[end]] == self.opc.POP_BLOCK
                        and code[return_val_offset1] == self.opc.RETURN_VALUE):
                    jump_back = None
            if not jump_back:
                # loop suite ends in return
                jump_back = self.last_instr(start, end, self.opc.RETURN_VALUE)
                if not jump_back:
                    return

                jb_inst = self.get_inst(jump_back)
                jump_back = self.next_offset(jb_inst.opcode, jump_back)

                if_offset = None
                if code[self.prev_op[next_line_byte]] not in self.pop_jump_tf:
                    if_offset = self.prev[next_line_byte]
                if if_offset:
                    loop_type = "while"
                    self.ignore_if.add(if_offset)
                else:
                    loop_type = "for"
                target = next_line_byte
                end = xdis.next_offset(code[jump_back], self.opc, jump_back)
            else:
                if self.get_target(jump_back) >= next_line_byte:
                    jump_back = self.last_instr(start, end,
                                                self.opc.JUMP_ABSOLUTE, start,
                                                False)

                jb_inst = self.get_inst(jump_back)

                jb_next_offset = self.next_offset(jb_inst.opcode, jump_back)
                if end > jb_next_offset and self.is_jump_forward(end):
                    if self.is_jump_forward(jb_next_offset):
                        if self.get_target(jb_next_offset) == self.get_target(
                                end):
                            self.fixed_jumps[offset] = jb_next_offset
                            end = jb_next_offset
                elif target < offset:
                    self.fixed_jumps[offset] = jb_next_offset
                    end = jb_next_offset

                target = self.get_target(jump_back)

                if code[target] in (self.opc.FOR_ITER, self.opc.GET_ITER):
                    loop_type = "for"
                else:
                    loop_type = "while"
                    test = self.prev_op[next_line_byte]

                    if test == offset:
                        loop_type = "while 1"
                    elif self.code[test] in self.opc.JUMP_OPs:
                        self.ignore_if.add(test)
                        test_target = self.get_target(test)
                        if test_target > (jump_back + 3):
                            jump_back = test_target
                self.not_continue.add(jump_back)
            self.loops.append(target)
            self.structs.append({
                "type": loop_type + "-loop",
                "start": target,
                "end": jump_back
            })
            after_jump_offset = xdis.next_offset(code[jump_back], self.opc,
                                                 jump_back)
            if after_jump_offset != end:
                self.structs.append({
                    "type": loop_type + "-else",
                    "start": after_jump_offset,
                    "end": end,
                })
        elif op in self.pop_jump_tf:
            start = offset + inst.inst_size
            target = inst.argval
            rtarget = self.restrict_to_parent(target, parent)
            prev_op = self.prev_op

            # Do not let jump to go out of parent struct bounds
            if target != rtarget and parent["type"] == "and/or":
                self.fixed_jumps[offset] = rtarget
                return

            # Does this jump to right after another conditional jump that is
            # not myself?  If so, it's part of a larger conditional.
            # rocky: if we have a conditional jump to the next instruction, then
            # possibly I am "skipping over" a "pass" or null statement.
            pretarget = self.get_inst(prev_op[target])

            if (pretarget.opcode in self.pop_jump_if_pop and (target > offset)
                    and pretarget.offset != offset):

                # FIXME: hack upon hack...
                # In some cases the pretarget can be a jump to the next instruction
                # and these aren't and/or's either. We limit to 3.5+ since we experienced there
                # but it might be earlier versions, or might be a general principle.
                if pretarget.argval != target:
                    # FIXME: this is not accurate The commented out below
                    # is what it should be. However grammar rules right now
                    # assume the incorrect offsets.
                    # self.fixed_jumps[offset] = target
                    self.fixed_jumps[offset] = pretarget.offset
                    self.structs.append({
                        "type": "and/or",
                        "start": start,
                        "end": pretarget.offset
                    })
                    return

            # The opcode *two* instructions before the target jump offset is important
            # in making a determination of what we have. Save that.
            pre_rtarget = prev_op[rtarget]

            if op == self.opc.POP_JUMP_IF_FALSE:
                self.fixed_jumps[offset] = target

            # op == POP_JUMP_IF_TRUE
            else:
                next = self.next_stmt[offset]
                if prev_op[next] == offset:
                    pass
                elif self.is_jump_forward(next) and target == self.get_target(
                        next):
                    if code[prev_op[next]] == self.opc.POP_JUMP_IF_FALSE:
                        if (code[next] == self.opc.JUMP_FORWARD
                                or target != rtarget
                                or code[prev_op[pre_rtarget]]
                                not in (self.opc.JUMP_ABSOLUTE,
                                        self.opc.RETURN_VALUE)):
                            self.fixed_jumps[offset] = prev_op[next]
                            return
                elif (code[next] == self.opc.JUMP_ABSOLUTE
                      and self.is_jump_forward(target)
                      and self.get_target(target) == self.get_target(next)):
                    self.fixed_jumps[offset] = prev_op[next]
                    return

            rtarget_is_ja = code[pre_rtarget] == self.opc.JUMP_ABSOLUTE
            if (rtarget_is_ja and pre_rtarget in self.stmts
                    and pre_rtarget != offset
                    and prev_op[pre_rtarget] != offset
                    and not (code[rtarget] == self.opc.JUMP_ABSOLUTE
                             and code[rtarget + 3] == self.opc.POP_BLOCK
                             and code[prev_op[pre_rtarget]] !=
                             self.opc.JUMP_ABSOLUTE)):
                rtarget = pre_rtarget

            # Does the "jump if" jump beyond a jump op?
            # That is, we have something like:
            #  POP_JUMP_IF_FALSE HERE
            #  ...
            # JUMP_FORWARD
            # HERE:
            #
            # If so, this can be block inside an "if" statement
            # or a conditional assignment like:
            #   x = 1 if x else 2
            #
            # For 3.5, for JUMP_FORWARD above we could have also
            # JUMP_BACK or CONTINUE
            #
            # There are other situations we may need to consider, like
            # if the condition jump is to a forward location.
            # Also the existence of a jump to the instruction after "END_FINALLY"
            # will distinguish "try/else" from "try".
            rtarget_break = (self.opc.RETURN_VALUE, self.opc.BREAK_LOOP)

            if self.is_jump_forward(pre_rtarget) or (rtarget_is_ja):
                if_end = self.get_target(pre_rtarget)

                # If the jump target is back, we are looping
                if (if_end < pre_rtarget and self.version < 3.8
                        and (code[prev_op[if_end]] == self.opc.SETUP_LOOP)):
                    if if_end > start:
                        return

                end = self.restrict_to_parent(if_end, parent)

                self.structs.append({
                    "type": "if-then",
                    "start": start,
                    "end": pre_rtarget
                })

                # FIXME: add this
                # self.fixed_jumps[offset] = rtarget
                self.not_continue.add(pre_rtarget)

                if rtarget < end and (
                        code[rtarget]
                        not in (self.opc.END_FINALLY, self.opc.JUMP_ABSOLUTE)
                        and code[prev_op[pre_rtarget]]
                        not in (self.opc.POP_EXCEPT, self.opc.END_FINALLY)):
                    self.structs.append({
                        "type": "else",
                        "start": rtarget,
                        "end": end
                    })
                    self.else_start[rtarget] = end
            elif self.is_jump_back(pre_rtarget, 0):
                if_end = rtarget
                self.structs.append({
                    "type": "if-then",
                    "start": start,
                    "end": pre_rtarget
                })
                self.not_continue.add(pre_rtarget)
            elif code[pre_rtarget] in rtarget_break:
                self.structs.append({
                    "type": "if-then",
                    "start": start,
                    "end": rtarget
                })
                # It is important to distingish if this return is inside some sort
                # except block return
                jump_prev = prev_op[offset]
                if self.is_pypy and code[jump_prev] == self.opc.COMPARE_OP:
                    if self.opc.cmp_op[code[jump_prev +
                                            1]] == "exception-match":
                        return
                    pass

                # Check that next instruction after pops and jump is
                # not from SETUP_EXCEPT
                next_op = rtarget
                if code[next_op] == self.opc.POP_BLOCK:
                    next_op += instruction_size(self.code[next_op], self.opc)
                if code[next_op] == self.opc.JUMP_ABSOLUTE:
                    next_op += instruction_size(self.code[next_op], self.opc)
                if next_op in targets:
                    for try_op in targets[next_op]:
                        come_from_op = code[try_op]
                        if self.version < 3.8 and come_from_op == self.opc.SETUP_EXCEPT:
                            return
                        pass

                self.fixed_jumps[offset] = rtarget

                if code[pre_rtarget] == self.opc.RETURN_VALUE:
                    # If we are at some sort of POP_JUMP_IF and the instruction before was
                    # COMPARE_OP exception-match, then pre_rtarget is not an end_if
                    if not (inst_index > 0
                            and self.insts[inst_index - 1].argval
                            == "exception-match"):
                        self.return_end_ifs.add(pre_rtarget)
                else:
                    self.fixed_jumps[offset] = rtarget
                    self.not_continue.add(pre_rtarget)
            else:

                if target > offset:
                    self.fixed_jumps[offset] = target
                    pass

        elif self.version < 3.8 and op == self.opc.SETUP_EXCEPT:
            target = self.get_target(offset)
            end = self.restrict_to_parent(target, parent)
            self.fixed_jumps[offset] = end
        elif op == self.opc.POP_EXCEPT:
            next_offset = xdis.next_offset(op, self.opc, offset)
            target = self.get_target(next_offset)
            if target > next_offset:
                next_op = code[next_offset]
                if (self.opc.JUMP_ABSOLUTE == next_op
                        and self.opc.END_FINALLY != code[xdis.next_offset(
                            next_op, self.opc, next_offset)]):
                    self.fixed_jumps[next_offset] = target
                    self.except_targets[target] = next_offset

        elif op == self.opc.SETUP_FINALLY:
            target = self.get_target(offset)
            end = self.restrict_to_parent(target, parent)
            self.fixed_jumps[offset] = end
        elif op in self.jump_if_pop:
            target = self.get_target(offset)
            if target > offset:
                unop_target = self.last_instr(offset, target,
                                              self.opc.JUMP_FORWARD, target)
                if unop_target and code[unop_target + 3] != self.opc.ROT_TWO:
                    self.fixed_jumps[offset] = unop_target
                else:
                    self.fixed_jumps[offset] = self.restrict_to_parent(
                        target, parent)
                    pass
                pass
        else:
            # 3.5+ has Jump optimization which too often causes RETURN_VALUE to get
            # misclassified as RETURN_END_IF. Handle that here.
            # In RETURN_VALUE, JUMP_ABSOLUTE, RETURN_VALUE is never RETURN_END_IF
            if op == self.opc.RETURN_VALUE:
                next_offset = xdis.next_offset(op, self.opc, offset)
                if next_offset < len(code) and (
                        code[next_offset] == self.opc.JUMP_ABSOLUTE
                        and offset in self.return_end_ifs):
                    self.return_end_ifs.remove(offset)
                    pass
                pass
            elif op == self.opc.JUMP_FORWARD:
                # If we have:
                #   JUMP_FORWARD x, [non-jump, insns], RETURN_VALUE, x:
                # then RETURN_VALUE is not RETURN_END_IF
                rtarget = self.get_target(offset)
                rtarget_prev = self.prev[rtarget]
                if (code[rtarget_prev] == self.opc.RETURN_VALUE
                        and rtarget_prev in self.return_end_ifs):
                    i = rtarget_prev
                    while i != offset:
                        if code[i] in [op3.JUMP_FORWARD, op3.JUMP_ABSOLUTE]:
                            return
                        i = self.prev[i]
                    self.return_end_ifs.remove(rtarget_prev)
                pass
        return
Ejemplo n.º 12
0
    def find_jump_targets(self, debug):
        """
        Detect all offsets in a byte code which are jump targets
        where we might insert a COME_FROM instruction.

        Return the list of offsets.

        Return the list of offsets. An instruction can be jumped
        to in from multiple instructions.
        """
        code = self.code
        n = len(code)
        self.structs = [{"type": "root", "start": 0, "end": n - 1}]

        # All loop entry points
        self.loops = []

        # Map fixed jumps to their real destination
        self.fixed_jumps = {}
        self.except_targets = {}
        self.ignore_if = set()
        self.build_statement_indices()
        self.else_start = {}

        # Containers filled by detect_control_flow()
        self.not_continue = set()
        self.return_end_ifs = set()
        self.setup_loop_targets = {}  # target given setup_loop offset
        self.setup_loops = {}  # setup_loop offset given target

        targets = {}
        for i, inst in enumerate(self.insts):
            offset = inst.offset
            op = inst.opcode

            # Determine structures and fix jumps in Python versions
            # since 2.3
            self.detect_control_flow(offset, targets, i)

            if inst.has_arg:
                label = self.fixed_jumps.get(offset)
                oparg = inst.arg
                if self.code[offset] == self.opc.EXTENDED_ARG:
                    j = xdis.next_offset(op, self.opc, offset)
                    next_offset = xdis.next_offset(op, self.opc, j)
                else:
                    next_offset = xdis.next_offset(op, self.opc, offset)

                if label is None:
                    if op in self.opc.hasjrel and op != self.opc.FOR_ITER:
                        label = next_offset + oparg
                    elif op in self.opc.hasjabs:
                        if op in self.jump_if_pop:
                            if oparg > offset:
                                label = oparg

                if label is not None and label != -1:
                    targets[label] = targets.get(label, []) + [offset]
            elif op == self.opc.END_FINALLY and offset in self.fixed_jumps:
                label = self.fixed_jumps[offset]
                targets[label] = targets.get(label, []) + [offset]
                pass

            pass  # for loop

        # DEBUG:
        if debug in ("both", "after"):
            import pprint as pp

            pp.pprint(self.structs)

        return targets
Ejemplo n.º 13
0
def basic_blocks(version, is_pypy, fn, first_line=None):
    """Create a list of basic blocks found in a code object
    """


    BB = BBMgr(version, is_pypy)

    # Get jump targets
    jump_targets = set()
    instructions = list(get_instructions(fn, first_line=first_line))
    for inst in instructions:
        op = inst.opcode
        offset = inst.offset
        follow_offset = next_offset(op, BB.opcode, offset)
        if op in BB.JUMP_INSTRUCTIONS:
            if op in BB.JABS_INSTRUCTIONS:
                jump_offset = inst.arg
            else:
                jump_offset = follow_offset + inst.arg
            jump_targets.add(jump_offset)
            pass

    start_offset = 0
    end_offset = -1
    jump_offsets = set()
    prev_offset = -1
    endloop_offsets = [-1]
    flags = set([BB_ENTRY])
    end_try_offset_stack = []
    try_stack = []
    end_try_offset = None
    loop_offset = None

    for i, inst in enumerate(instructions):
        prev_offset = end_offset
        end_offset = inst.offset
        op = inst.opcode
        offset = inst.offset
        follow_offset = next_offset(op, BB.opcode, offset)

        if offset == end_try_offset:
          if len(end_try_offset_stack):
              end_try_offset = end_try_offset_stack[-1]
              end_try_offset_stack.pop()
          else:
              end_try_offset = None


        if op in BB.LOOP_INSTRUCTIONS:
            jump_offset = follow_offset + inst.arg
            endloop_offsets.append(jump_offset)
            loop_offset = offset
        elif offset == endloop_offsets[-1]:
            endloop_offsets.pop()
        pass

        if op in BB.LOOP_INSTRUCTIONS:
            flags.add(BB_LOOP)
        elif op in BB.BREAK_INSTRUCTIONS:
            flags.add(BB_BREAK)
            jump_offsets.add(endloop_offsets[-1])
            block, flags, jump_offsets = BB.add_bb(start_offset, end_offset,
                                                   loop_offset, follow_offset,
                                                   flags, jump_offsets)
            loop_offset = None
            if BB_TRY in block.flags:
                try_stack.append(block)
            start_offset = follow_offset

        if offset in jump_targets:
            # Fallthrough path and jump target path.
            # This instruction definitely starts a new basic block
            # Close off any prior basic block
            if start_offset < end_offset:
                block, flags, jump_offsets = BB.add_bb(start_offset, prev_offset,
                                                       loop_offset, end_offset,
                                                       flags, jump_offsets)
                loop_offset = None
                if BB_TRY in block.flags:
                    try_stack.append(block)
                    pass

                start_offset = end_offset
                pass

        # Add block flags for certain classes of instructions
        if op in BB.JUMP_CONDITONAL:
            flags.add(BB_JUMP_CONDITIONAL)

        if op in BB.POP_BLOCK_INSTRUCTIONS:
            flags.add(BB_POP_BLOCK)
            if start_offset == offset:
                flags.add(BB_STARTS_POP_BLOCK)
                flags.remove(BB_POP_BLOCK)
        elif op in BB.EXCEPT_INSTRUCTIONS:
            if (sys.version_info[0:2] <= (2, 7)):
                # In Python up to 2.7, thre'POP_TOP'S at the beginning of a block
                # indicate an exception handler. We also check
                # that we are nested inside a "try".
                if len(try_stack) == 0 or start_offset != offset:
                  continue
                pass
                if (instructions[i+1].opcode != BB.opcode.opmap['POP_TOP'] or
                    instructions[i+2].opcode != BB.opcode.opmap['POP_TOP']):
                  continue
            flags.add(BB_EXCEPT)
            try_stack[-1].exception_offsets.add(start_offset)
            pass
        elif op in BB.TRY_INSTRUCTIONS:
            end_try_offset_stack.append(inst.argval)
            flags.add(BB_TRY)
        elif op in BB.END_FINALLY_INSTRUCTIONS:
            flags.add(BB_END_FINALLY)
            try_stack[-1].exception_offsets.add(start_offset)
        elif op in BB.FOR_INSTRUCTIONS:
            flags.add(BB_FOR)
            jump_offsets.add(inst.argval)
            block, flags, jump_offsets = BB.add_bb(start_offset, end_offset,
                                                   loop_offset, follow_offset,
                                                   flags, jump_offsets)
            loop_offset = None
            start_offset = follow_offset
        elif op in BB.JUMP_INSTRUCTIONS:
            # Some sort of jump instruction.
            # Figure out where we jump to amd add it to this
            # basic block's jump offsets.
            if op in BB.JABS_INSTRUCTIONS:
                jump_offset = inst.arg
            else:
                jump_offset = inst.argval

            jump_offsets.add(jump_offset)
            if op in BB.JUMP_UNCONDITONAL:
                flags.add(BB_JUMP_UNCONDITIONAL)
                if jump_offset == follow_offset:
                    flags.add(BB_JUMP_TO_FALLTHROUGH)
                    pass
                block, flags, jump_offsets = BB.add_bb(start_offset, end_offset,
                                                       loop_offset, follow_offset,
                                                       flags, jump_offsets)
                loop_offset = None
                if BB_TRY in block.flags:
                    try_stack.append(block)
                    pass

                start_offset = follow_offset
            elif op != BB.opcode.SETUP_LOOP:
                if op in BB.FINALLY_INSTRUCTIONS:
                    flags.add(BB_FINALLY)

                block, flags, jump_offsets = BB.add_bb(start_offset, end_offset,
                                                       loop_offset, follow_offset,
                                                       flags, jump_offsets)
                loop_offset = None
                if BB_TRY in block.flags:
                    try_stack.append(block)
                start_offset = follow_offset

                pass
        elif op in BB.NOFOLLOW_INSTRUCTIONS:
            flags.add(BB_NOFOLLOW)
            last_block, flags, jump_offsets = BB.add_bb(start_offset, end_offset,
                                                        loop_offset, follow_offset,
                                                        flags, jump_offsets)
            loop_offset = None
            start_offset = follow_offset
            pass
        pass

    if len(BB.bb_list):
      BB.bb_list[-1].follow_offset = None
      BB.start_block = BB.bb_list[0]

    # Add remaining instructions?
    if start_offset <= end_offset:
        BB.bb_list.append(BasicBlock(start_offset, end_offset, loop_offset, None,
                                     flags=flags, jump_offsets=jump_offsets))
        loop_offset = None
        pass

    # Add an artificial block where we can link the exits of other blocks
    # to. This helps in computing reverse dominators.
    BB.add_bb(end_offset+1, end_offset+1, None, None, set([BB_EXIT]), [])
    return BB
Ejemplo n.º 14
0
    def reduce_is_invalid(self, rule, ast, tokens, first, last):
        invalid = super(Python27Parser,
                        self).reduce_is_invalid(rule, ast,
                                                tokens, first, last)

        lhs = rule[0]
        n = len(tokens)
        fn = self.reduce_check_table.get(lhs, None)
        if fn:
            invalid = fn(self, lhs, n, rule, ast, tokens, first, last)
        last = min(last, n-1)
        if invalid:
            return invalid

        if rule == ("comp_if", ("expr", "jmp_false", "comp_iter")):
            jmp_false = ast[1]
            if jmp_false[0] == "POP_JUMP_IF_FALSE":
                return tokens[first].offset < jmp_false[0].attr < tokens[last].offset
            pass
        elif (rule[0], rule[1][0:5]) == (
                "if_exp",
                ("expr", "jmp_false", "expr", "JUMP_ABSOLUTE", "expr")):
            jmp_false = ast[1]
            if jmp_false[0] == "POP_JUMP_IF_FALSE":
                else_instr = ast[4].first_child()
                if jmp_false[0].attr != else_instr.offset:
                    return True
                end_offset = ast[3].attr
                return end_offset < tokens[last].offset
            pass
        elif rule[0] == ("raise_stmt1"):
            return ast[0] == "expr" and ast[0][0] == "or"
        elif rule[0] in ("assert", "assert2"):
            jump_inst = ast[1][0]
            jump_target = jump_inst.attr
            return not (last >= len(tokens)
                        or jump_target == tokens[last].offset
                        or jump_target == next_offset(ast[-1].op, ast[-1].opc, ast[-1].offset))
        elif rule == ("ifstmt", ("testexpr", "_ifstmts_jump")):
            for i in range(last-1, last-4, -1):
                t = tokens[i]
                if t == "JUMP_FORWARD":
                    return t.attr > tokens[min(last, len(tokens)-1)].off2int()
                elif t not in ("POP_TOP", "COME_FROM"):
                    break
                pass
            pass
        elif rule == ("iflaststmtl", ("testexpr", "c_stmts")):
            testexpr = ast[0]
            if testexpr[0] in ("testfalse", "testtrue"):
                test = testexpr[0]
                if len(test) > 1 and test[1].kind.startswith("jmp_"):
                    jmp_target = test[1][0].attr
                    if last == len(tokens):
                        last -= 1
                    while (isinstance(tokens[first].offset, str) and first < last):
                        first += 1
                    if first == last:
                        return True
                    while (first < last and isinstance(tokens[last].offset, str)):
                        last -= 1
                    return tokens[first].off2int() < jmp_target < tokens[last].off2int()
                    pass
                pass
            pass
        elif rule == ("list_if_not", ("expr", "jmp_true", "list_iter")):
            jump_inst = ast[1][0]
            jump_offset = jump_inst.attr
            return jump_offset > jump_inst.offset and jump_offset < tokens[last].offset
        elif rule == ("list_if", ("expr", "jmp_false", "list_iter")):
            jump_inst = ast[1][0]
            jump_offset = jump_inst.attr
            return jump_offset > jump_inst.offset and jump_offset < tokens[last].offset
        elif rule == ("or", ("expr", "jmp_true", "expr", "\\e_come_from_opt")):
            # Test that jmp_true doesn"t jump inside the middle the "or"
            # or that it jumps to the same place as the end of "and"
            jmp_true = ast[1][0]
            jmp_target = jmp_true.offset + jmp_true.attr + 3
            return not (jmp_target == tokens[last].offset or
                        tokens[last].pattr == jmp_true.pattr)

        elif (rule[0] == "whilestmt" and
              rule[1][0:-2] ==
                      ("SETUP_LOOP", "testexpr", "l_stmts_opt",
                       "JUMP_BACK", "JUMP_BACK")):
            # Make sure that the jump backs all go to the same place
            i = last-1
            while (tokens[i] != "JUMP_BACK"):
                i -= 1
            return tokens[i].attr != tokens[i-1].attr
        elif rule[0] == "if_exp_true":
            return (first) > 0 and tokens[first-1] == "POP_JUMP_IF_FALSE"

        return False
Ejemplo n.º 15
0
    def detect_control_flow(self, offset, targets, inst_index):
        """
        Detect type of block structures and their boundaries to fix optimized jumps
        in python2.3+
        """

        code = self.code
        inst = self.insts[inst_index]
        op = inst.opcode


        # Detect parent structure
        parent = self.structs[0]
        start  = parent['start']
        end    = parent['end']

        # Pick inner-most parent for our offset
        for struct in self.structs:
            current_start = struct['start']
            current_end   = struct['end']
            if ((current_start <= offset < current_end)
                and (current_start >= start and current_end <= end)):
                start  = current_start
                end    = current_end
                parent = struct

        if self.version < 3.8 and op == self.opc.SETUP_LOOP:
            # We categorize loop types: 'for', 'while', 'while 1' with
            # possibly suffixes '-loop' and '-else'
            # Try to find the jump_back instruction of the loop.
            # It could be a return instruction.

            start += inst.inst_size
            target = self.get_target(offset)
            end    = self.restrict_to_parent(target, parent)
            self.setup_loops[target] = offset

            if target != end:
                self.fixed_jumps[offset] = end

            (line_no, next_line_byte) = self.lines[offset]
            jump_back = self.last_instr(start, end, self.opc.JUMP_ABSOLUTE,
                                            next_line_byte, False)

            if jump_back:
                jump_forward_offset = xdis.next_offset(code[jump_back], self.opc, jump_back)
            else:
                jump_forward_offset = None

            return_val_offset1 = self.prev[self.prev[end]]

            if (jump_back and jump_back != self.prev_op[end]
                and self.is_jump_forward(jump_forward_offset)):
                if (code[self.prev_op[end]] == self.opc.RETURN_VALUE or
                    (code[self.prev_op[end]] == self.opc.POP_BLOCK
                     and code[return_val_offset1] == self.opc.RETURN_VALUE)):
                    jump_back = None
            if not jump_back:
                # loop suite ends in return
                jump_back = self.last_instr(start, end, self.opc.RETURN_VALUE)
                if not jump_back:
                    return

                jb_inst = self.get_inst(jump_back)
                jump_back = self.next_offset(jb_inst.opcode, jump_back)

                if_offset = None
                if code[self.prev_op[next_line_byte]] not in self.pop_jump_tf:
                    if_offset = self.prev[next_line_byte]
                if if_offset:
                    loop_type = 'while'
                    self.ignore_if.add(if_offset)
                else:
                    loop_type = 'for'
                target = next_line_byte
                end = xdis.next_offset(code[jump_back], self.opc, jump_back)
            else:
                if self.get_target(jump_back) >= next_line_byte:
                    jump_back = self.last_instr(start, end, self.opc.JUMP_ABSOLUTE, start, False)

                jb_inst = self.get_inst(jump_back)

                jb_next_offset = self.next_offset(jb_inst.opcode, jump_back)
                if end > jb_next_offset and self.is_jump_forward(end):
                    if self.is_jump_forward(jb_next_offset):
                        if self.get_target(jb_next_offset) == self.get_target(end):
                            self.fixed_jumps[offset] = jb_next_offset
                            end = jb_next_offset
                elif target < offset:
                    self.fixed_jumps[offset] = jb_next_offset
                    end = jb_next_offset

                target = self.get_target(jump_back)

                if code[target] in (self.opc.FOR_ITER, self.opc.GET_ITER):
                    loop_type = 'for'
                else:
                    loop_type = 'while'
                    test = self.prev_op[next_line_byte]

                    if test == offset:
                        loop_type = 'while 1'
                    elif self.code[test] in self.opc.JUMP_OPs:
                        self.ignore_if.add(test)
                        test_target = self.get_target(test)
                        if test_target > (jump_back+3):
                            jump_back = test_target
                self.not_continue.add(jump_back)
            self.loops.append(target)
            self.structs.append({'type': loop_type + '-loop',
                                 'start': target,
                                 'end':   jump_back})
            after_jump_offset = xdis.next_offset(code[jump_back], self.opc, jump_back)
            if after_jump_offset != end:
                self.structs.append({'type': loop_type + '-else',
                                     'start': after_jump_offset,
                                     'end':   end})
        elif op in self.pop_jump_tf:
            start   = offset + inst.inst_size
            target  = inst.argval
            rtarget = self.restrict_to_parent(target, parent)
            prev_op = self.prev_op

            # Do not let jump to go out of parent struct bounds
            if target != rtarget and parent['type'] == 'and/or':
                self.fixed_jumps[offset] = rtarget
                return

            # Does this jump to right after another conditional jump that is
            # not myself?  If so, it's part of a larger conditional.
            # rocky: if we have a conditional jump to the next instruction, then
            # possibly I am "skipping over" a "pass" or null statement.
            pretarget = self.get_inst(prev_op[target])

            if (pretarget.opcode in self.pop_jump_if_pop and
                (target > offset) and pretarget.offset != offset):

                # FIXME: hack upon hack...
                # In some cases the pretarget can be a jump to the next instruction
                # and these aren't and/or's either. We limit to 3.5+ since we experienced there
                # but it might be earlier versions, or might be a general principle.
                if self.version < 3.5 or pretarget.argval != target:
                    # FIXME: this is not accurate The commented out below
                    # is what it should be. However grammar rules right now
                    # assume the incorrect offsets.
                    # self.fixed_jumps[offset] = target
                    self.fixed_jumps[offset] = pretarget.offset
                    self.structs.append({'type': 'and/or',
                                         'start': start,
                                         'end': pretarget.offset})
                    return

            # The opcode *two* instructions before the target jump offset is important
            # in making a determination of what we have. Save that.
            pre_rtarget = prev_op[rtarget]

            # Is it an "and" inside an "if" or "while" block
            if op == self.opc.POP_JUMP_IF_FALSE:

                # Search for another POP_JUMP_IF_FALSE targetting the same op,
                # in current statement, starting from current offset, and filter
                # everything inside inner 'or' jumps and midline ifs
                match = self.rem_or(start, self.next_stmt[offset],
                                    self.opc.POP_JUMP_IF_FALSE, target)

                # If we still have any offsets in set, start working on it
                if match:
                    is_jump_forward = self.is_jump_forward(pre_rtarget)
                    if (is_jump_forward and pre_rtarget not in self.stmts and
                        self.restrict_to_parent(self.get_target(pre_rtarget), parent) == rtarget):
                        if (code[prev_op[pre_rtarget]] == self.opc.JUMP_ABSOLUTE
                            and self.remove_mid_line_ifs([offset]) and
                            target == self.get_target(prev_op[pre_rtarget]) and
                            (prev_op[pre_rtarget] not in self.stmts or
                             self.get_target(prev_op[pre_rtarget]) > prev_op[pre_rtarget]) and
                            1 == len(self.remove_mid_line_ifs(self.rem_or(start, prev_op[pre_rtarget], self.pop_jump_tf, target)))):
                            pass
                        elif (code[prev_op[pre_rtarget]] == self.opc.RETURN_VALUE
                              and self.remove_mid_line_ifs([offset]) and
                              1 == (len(set(self.remove_mid_line_ifs(self.rem_or(start, prev_op[pre_rtarget],
                                                                                 self.pop_jump_tf, target))) |
                                    set(self.remove_mid_line_ifs(self.rem_or(start, prev_op[pre_rtarget],
                                                                             (self.opc.POP_JUMP_IF_FALSE,
                                                                              self.opc.POP_JUMP_IF_TRUE,
                                                                              self.opc.JUMP_ABSOLUTE),
                                                                             pre_rtarget, True)))))):
                            pass
                        else:
                            fix = None
                            jump_ifs = self.inst_matches(start, self.next_stmt[offset],
                                                         self.opc.POP_JUMP_IF_FALSE)
                            last_jump_good = True
                            for j in jump_ifs:
                                if target == self.get_target(j):
                                    # FIXME: remove magic number
                                    if self.lines[j].next == j + 3 and last_jump_good:
                                        fix = j
                                        break
                                else:
                                    last_jump_good = False
                            self.fixed_jumps[offset] = fix or match[-1]
                            return
                    else:
                        if self.version < 3.6:
                            # FIXME: this is putting in COME_FROMs in the wrong place.
                            # Fix up grammar so we don't need to do this.
                            # See cf_for_iter use in parser36.py
                            self.fixed_jumps[offset] = match[-1]
                        elif target > offset:
                            # Right now we only add COME_FROMs in forward (not loop) jumps
                            self.fixed_jumps[offset] = target
                        return
            # op == POP_JUMP_IF_TRUE
            else:
                next = self.next_stmt[offset]
                if prev_op[next] == offset:
                    pass
                elif self.is_jump_forward(next) and target == self.get_target(next):
                    if code[prev_op[next]] == self.opc.POP_JUMP_IF_FALSE:
                        if (code[next] == self.opc.JUMP_FORWARD
                            or target != rtarget
                            or code[prev_op[pre_rtarget]] not in
                            (self.opc.JUMP_ABSOLUTE, self.opc.RETURN_VALUE)):
                            self.fixed_jumps[offset] = prev_op[next]
                            return
                elif (code[next] == self.opc.JUMP_ABSOLUTE and self.is_jump_forward(target) and
                      self.get_target(target) == self.get_target(next)):
                    self.fixed_jumps[offset] = prev_op[next]
                    return

            # Don't add a struct for a while test, it's already taken care of
            if offset in self.ignore_if:
                return

            rtarget_is_ja = code[pre_rtarget] == self.opc.JUMP_ABSOLUTE
            if ( rtarget_is_ja and
                pre_rtarget in self.stmts and
                pre_rtarget != offset and
                prev_op[pre_rtarget] != offset and
                not (code[rtarget] == self.opc.JUMP_ABSOLUTE and
                     code[rtarget+3] == self.opc.POP_BLOCK and
                     code[prev_op[pre_rtarget]] != self.opc.JUMP_ABSOLUTE)):
                rtarget = pre_rtarget

            # Does the "jump if" jump beyond a jump op?
            # That is, we have something like:
            #  POP_JUMP_IF_FALSE HERE
            #  ...
            # JUMP_FORWARD
            # HERE:
            #
            # If so, this can be block inside an "if" statement
            # or a conditional assignment like:
            #   x = 1 if x else 2
            #
            # For 3.5, in addition the JUMP_FORWARD above we could have
            # JUMP_BACK or CONTINUE
            #
            # There are other situations we may need to consider, like
            # if the condition jump is to a forward location.
            # Also the existence of a jump to the instruction after "END_FINALLY"
            # will distinguish "try/else" from "try".
            if self.version < 3.8:
                rtarget_break = (self.opc.RETURN_VALUE, self.opc.BREAK_LOOP)
            else:
                rtarget_break = (self.opc.RETURN_VALUE,)

            if self.is_jump_forward(pre_rtarget) or (rtarget_is_ja and self.version >= 3.5):
                if_end = self.get_target(pre_rtarget)

                # If the jump target is back, we are looping
                if (if_end < pre_rtarget and self.version < 3.8 and
                    (code[prev_op[if_end]] == self.opc.SETUP_LOOP)):
                    if (if_end > start):
                        return

                end = self.restrict_to_parent(if_end, parent)

                self.structs.append({'type': 'if-then',
                                     'start': start,
                                     'end': pre_rtarget})

                # FIXME: add this
                # self.fixed_jumps[offset] = rtarget
                self.not_continue.add(pre_rtarget)

                if rtarget < end and (
                        code[rtarget] not in (self.opc.END_FINALLY,
                                              self.opc.JUMP_ABSOLUTE) and
                        code[prev_op[pre_rtarget]] not in (self.opc.POP_EXCEPT,
                                                        self.opc.END_FINALLY)):
                    self.structs.append({'type': 'else',
                                         'start': rtarget,
                                         'end': end})
                    self.else_start[rtarget] = end
            elif self.is_jump_back(pre_rtarget, 0):
                if_end = rtarget
                self.structs.append({'type': 'if-then',
                                     'start': start,
                                     'end': pre_rtarget})
                self.not_continue.add(pre_rtarget)
            elif code[pre_rtarget] in rtarget_break:
                self.structs.append({'type': 'if-then',
                                     'start': start,
                                     'end': rtarget})
                # It is important to distingish if this return is inside some sort
                # except block return
                jump_prev = prev_op[offset]
                if self.is_pypy and code[jump_prev] == self.opc.COMPARE_OP:
                    if self.opc.cmp_op[code[jump_prev+1]] == 'exception-match':
                        return
                if self.version >= 3.5:
                    # Python 3.5 may remove as dead code a JUMP
                    # instruction after a RETURN_VALUE. So we check
                    # based on seeing SETUP_EXCEPT various places.
                    if self.version < 3.6 and code[rtarget] == self.opc.SETUP_EXCEPT:
                        return
                    # Check that next instruction after pops and jump is
                    # not from SETUP_EXCEPT
                    next_op = rtarget
                    if code[next_op] == self.opc.POP_BLOCK:
                        next_op += instruction_size(self.code[next_op], self.opc)
                    if code[next_op] == self.opc.JUMP_ABSOLUTE:
                        next_op += instruction_size(self.code[next_op], self.opc)
                    if next_op in targets:
                        for try_op in targets[next_op]:
                            come_from_op = code[try_op]
                            if self.version < 3.8 and come_from_op == self.opc.SETUP_EXCEPT:
                                return
                            pass
                    pass

                if self.version >= 3.4:
                    self.fixed_jumps[offset] = rtarget

                if code[pre_rtarget] == self.opc.RETURN_VALUE:
                    # If we are at some sort of POP_JUMP_IF and the instruction before was
                    # COMPARE_OP exception-match, then pre_rtarget is not an end_if
                    if not (inst_index > 0 and self.insts[inst_index-1].argval == 'exception-match'):
                        self.return_end_ifs.add(pre_rtarget)
                else:
                    self.fixed_jumps[offset] = rtarget
                    self.not_continue.add(pre_rtarget)
            else:

                # FIXME: this is very convoluted and based on rather hacky
                # empirical evidence. It should go a way when
                # we have better control-flow analysis
                normal_jump = self.version >= 3.6
                if self.version == 3.5:
                    j = self.offset2inst_index[target]
                    if j+2 < len(self.insts) and self.insts[j+2].is_jump_target:
                        normal_jump = self.insts[j+1].opname == 'POP_BLOCK'

                if normal_jump:
                    # For now, we'll only tag forward jump.
                    if target > offset:
                        self.fixed_jumps[offset] = target
                        pass
                else:
                    # FIXME: This is probably a bug in < 3.5 and we should
                    # instead use the above code. But until we smoke things
                    # out we'll stick with it.
                    if rtarget > offset:
                        self.fixed_jumps[offset] = rtarget

        elif self.version < 3.8 and op == self.opc.SETUP_EXCEPT:
            target = self.get_target(offset)
            end    = self.restrict_to_parent(target, parent)
            self.fixed_jumps[offset] = end
        elif op == self.opc.POP_EXCEPT:
            next_offset = xdis.next_offset(op, self.opc, offset)
            target = self.get_target(next_offset)
            if target > next_offset:
                next_op = code[next_offset]
                if (self.opc.JUMP_ABSOLUTE == next_op and
                    self.opc.END_FINALLY != code[xdis.next_offset(next_op, self.opc, next_offset)]):
                    self.fixed_jumps[next_offset] = target
                    self.except_targets[target] = next_offset

        elif op == self.opc.SETUP_FINALLY:
            target = self.get_target(offset)
            end    = self.restrict_to_parent(target, parent)
            self.fixed_jumps[offset] = end
        elif op in self.jump_if_pop:
            target = self.get_target(offset)
            if target > offset:
                unop_target = self.last_instr(offset, target, self.opc.JUMP_FORWARD, target)
                if unop_target and code[unop_target+3] != self.opc.ROT_TWO:
                    self.fixed_jumps[offset] = unop_target
                else:
                    self.fixed_jumps[offset] = self.restrict_to_parent(target, parent)
                    pass
                pass
        elif self.version >= 3.5:
            # 3.5+ has Jump optimization which too often causes RETURN_VALUE to get
            # misclassified as RETURN_END_IF. Handle that here.
            # In RETURN_VALUE, JUMP_ABSOLUTE, RETURN_VALUE is never RETURN_END_IF
            if op == self.opc.RETURN_VALUE:
                next_offset = xdis.next_offset(op, self.opc, offset)
                if ( next_offset < len(code) and
                     (code[next_offset] == self.opc.JUMP_ABSOLUTE and
                      offset in self.return_end_ifs) ):
                    self.return_end_ifs.remove(offset)
                    pass
                pass
            elif op == self.opc.JUMP_FORWARD:
                # If we have:
                #   JUMP_FORWARD x, [non-jump, insns], RETURN_VALUE, x:
                # then RETURN_VALUE is not RETURN_END_IF
                rtarget = self.get_target(offset)
                rtarget_prev = self.prev[rtarget]
                if (code[rtarget_prev] == self.opc.RETURN_VALUE and
                    rtarget_prev in self.return_end_ifs):
                    i = rtarget_prev
                    while i != offset:
                        if code[i] in [op3.JUMP_FORWARD, op3.JUMP_ABSOLUTE]:
                            return
                        i = self.prev[i]
                    self.return_end_ifs.remove(rtarget_prev)
                pass
        return
Ejemplo n.º 16
0
    def detect_control_flow(
        self, offset: int, targets: Dict[Any, Any], inst_index: int
    ):
        """
        Detect type of block structures and their boundaries to fix optimized jumps
        in python2.3+
        """

        code = self.code
        inst = self.insts[inst_index]
        op = inst.opcode

        # Detect parent structure
        parent: Dict[str, Any] = self.structs[0]
        start: int = parent["start"]
        end: int = parent["end"]

        # Pick inner-most parent for our offset
        for struct in self.structs:
            current_start = struct["start"]
            current_end = struct["end"]
            if (current_start <= offset < current_end) and (
                current_start >= start and current_end <= end
            ):
                start = current_start
                end = current_end
                parent = struct

        if self.version < 3.8 and op == self.opc.SETUP_LOOP:
            # We categorize loop types: 'for', 'while', 'while 1' with
            # possibly suffixes '-loop' and '-else'
            # Try to find the jump_back instruction of the loop.
            # It could be a return instruction.

            start += inst.inst_size
            target = self.get_target(offset)
            end = self.restrict_to_parent(target, parent)
            self.setup_loops[target] = offset

            if target != end:
                self.fixed_jumps[offset] = end

            (line_no, next_line_byte) = self.lines[offset]
            jump_back = self.last_instr(
                start, end, self.opc.JUMP_ABSOLUTE, next_line_byte, False
            )

            if jump_back:
                jump_forward_offset = xdis.next_offset(
                    code[jump_back], self.opc, jump_back
                )
            else:
                jump_forward_offset = None

            return_val_offset1 = self.prev[self.prev[end]]

            if (
                jump_back
                and jump_back != self.prev_op[end]
                and self.is_jump_forward(jump_forward_offset)
            ):
                if code[self.prev_op[end]] == self.opc.RETURN_VALUE or (
                    code[self.prev_op[end]] == self.opc.POP_BLOCK
                    and code[return_val_offset1] == self.opc.RETURN_VALUE
                ):
                    jump_back = None
            if not jump_back:
                # loop suite ends in return
                jump_back = self.last_instr(start, end, self.opc.RETURN_VALUE)
                if not jump_back:
                    return

                jb_inst = self.get_inst(jump_back)
                jump_back = self.next_offset(jb_inst.opcode, jump_back)

                if_offset = None
                if code[self.prev_op[next_line_byte]] not in self.pop_jump_tf:
                    if_offset = self.prev[next_line_byte]
                if if_offset:
                    loop_type = "while"
                    self.ignore_if.add(if_offset)
                else:
                    loop_type = "for"
                target = next_line_byte
                end = xdis.next_offset(code[jump_back], self.opc, jump_back)
            else:
                if self.get_target(jump_back) >= next_line_byte:
                    jump_back = self.last_instr(
                        start, end, self.opc.JUMP_ABSOLUTE, start, False
                    )

                jb_inst = self.get_inst(jump_back)

                jb_next_offset = self.next_offset(jb_inst.opcode, jump_back)
                if end > jb_next_offset and self.is_jump_forward(end):
                    if self.is_jump_forward(jb_next_offset):
                        if self.get_target(jb_next_offset) == self.get_target(end):
                            self.fixed_jumps[offset] = jb_next_offset
                            end = jb_next_offset
                elif target < offset:
                    self.fixed_jumps[offset] = jb_next_offset
                    end = jb_next_offset

                target = self.get_target(jump_back)

                if code[target] in (self.opc.FOR_ITER, self.opc.GET_ITER):
                    loop_type = "for"
                else:
                    loop_type = "while"
                    test = self.prev_op[next_line_byte]

                    if test == offset:
                        loop_type = "while 1"
                    elif self.code[test] in self.opc.JUMP_OPs:
                        self.ignore_if.add(test)
                        test_target = self.get_target(test)
                        if test_target > (jump_back + 3):
                            jump_back = test_target
                self.not_continue.add(jump_back)
            self.loops.append(target)
            self.structs.append(
                {"type": loop_type + "-loop", "start": target, "end": jump_back}
            )
            after_jump_offset = xdis.next_offset(code[jump_back], self.opc, jump_back)
            if after_jump_offset != end:
                self.structs.append(
                    {
                        "type": loop_type + "-else",
                        "start": after_jump_offset,
                        "end": end,
                    }
                )
        elif op in self.pop_jump_tf:
            target = inst.argval
            self.fixed_jumps[offset] = target

        # FIXME: consider removing the test on 3.8.
        elif self.version >= 3.8 and inst.is_jump():
            self.fixed_jumps[offset] = inst.argval

        elif self.version < 3.8 and op == self.opc.SETUP_EXCEPT:
            target = self.get_target(offset)
            end = self.restrict_to_parent(target, parent)
            self.fixed_jumps[offset] = end
        elif self.version < 3.8 and op == self.opc.POP_EXCEPT:
            next_offset = xdis.next_offset(op, self.opc, offset)
            target = self.get_target(next_offset)
            if target > next_offset:
                next_op = code[next_offset]
                if (
                    self.opc.JUMP_ABSOLUTE == next_op
                    and self.opc.END_FINALLY
                    != code[xdis.next_offset(next_op, self.opc, next_offset)]
                ):
                    self.fixed_jumps[next_offset] = target
                    self.except_targets[target] = next_offset

        elif op == self.opc.SETUP_FINALLY:
            target = self.get_target(offset)
            end = self.restrict_to_parent(target, parent)
            self.fixed_jumps[offset] = end
        elif op in self.jump_if_pop:
            target = self.get_target(offset)
            if target > offset:
                unop_target = self.last_instr(
                    offset, target, self.opc.JUMP_FORWARD, target
                )
                if unop_target and code[unop_target + 3] != self.opc.ROT_TWO:
                    self.fixed_jumps[offset] = unop_target
                else:
                    self.fixed_jumps[offset] = self.restrict_to_parent(target, parent)
                    pass
                pass
        else:
            # 3.5+ has Jump optimization which too often causes RETURN_VALUE to get
            # misclassified as RETURN_END_IF. Handle that here.
            # In RETURN_VALUE, JUMP_ABSOLUTE, RETURN_VALUE is never RETURN_END_IF
            if op == self.opc.RETURN_VALUE:
                next_offset = xdis.next_offset(op, self.opc, offset)
                if next_offset < len(code) and (
                    code[next_offset] == self.opc.JUMP_ABSOLUTE
                    and offset in self.return_end_ifs
                ):
                    self.return_end_ifs.remove(offset)
                    pass
                pass
            elif op == self.opc.JUMP_FORWARD:
                # If we have:
                #   JUMP_FORWARD x, [non-jump, insns], RETURN_VALUE, x:
                # then RETURN_VALUE is not RETURN_END_IF
                rtarget = self.get_target(offset)
                rtarget_prev = self.prev[rtarget]
                if (
                    code[rtarget_prev] == self.opc.RETURN_VALUE
                    and rtarget_prev in self.return_end_ifs
                ):
                    i = rtarget_prev
                    while i != offset:
                        if code[i] in [op3.JUMP_FORWARD, op3.JUMP_ABSOLUTE]:
                            return
                        i = self.prev[i]
                    self.return_end_ifs.remove(rtarget_prev)
                pass
        return
Ejemplo n.º 17
0
 def next_offset(self, op, offset):
     return xdis.next_offset(op, self.opc, offset)
Ejemplo n.º 18
0
Archivo: vm.py Proyecto: rocky/x-python
    def parse_byte_and_args(self, byte_code, replay=False):
        """ Parse 1 - 3 bytes of bytecode into
        an instruction and optionally arguments.

        Argument replay is used to handle breakpoints.
        """

        f = self.frame
        f_code = f.f_code
        co_code = f_code.co_code
        extended_arg = 0

        # Note: There is never more than one argument.
        # The list size is used to indicate whether an argument
        # exists or not.
        # FIMXE: remove and use int_arg as a indicator of whether
        # the argument exists.
        arguments = []
        int_arg = None

        while True:
            if f.fallthrough:
                if not replay:
                    f.f_lasti = next_offset(byte_code, self.opc, f.f_lasti)
            else:
                # Jump instructions must set this False.
                f.fallthrough = True
            offset = f.f_lasti
            line_number = self.frame.linestarts.get(offset, None)
            if line_number is not None:
                f.f_lineno = line_number
            if not replay:
                byte_code = byteint(co_code[offset])
            byte_name = self.opc.opname[byte_code]
            arg_offset = offset + 1
            arg = None

            if op_has_argument(byte_code, self.opc):
                if self.version >= 3.6:
                    int_arg = code2num(co_code, arg_offset) | extended_arg
                    # Note: Python 3.6.0a1 is 2, for 3.6.a3 and beyond we have 1
                    arg_offset += 1
                    if byte_code == self.opc.EXTENDED_ARG:
                        extended_arg = int_arg << 8
                        continue
                    else:
                        extended_arg = 0
                else:
                    int_arg = (code2num(co_code, arg_offset) +
                               code2num(co_code, arg_offset + 1) * 256 +
                               extended_arg)
                    arg_offset += 2
                    if byte_code == self.opc.EXTENDED_ARG:
                        extended_arg = int_arg * 65536
                        continue
                    else:
                        extended_arg = 0

                if byte_code in self.opc.CONST_OPS:
                    arg = f_code.co_consts[int_arg]
                elif byte_code in self.opc.FREE_OPS:
                    if int_arg < len(f_code.co_cellvars):
                        arg = f_code.co_cellvars[int_arg]
                    else:
                        var_idx = int_arg - len(f.f_code.co_cellvars)
                        arg = f_code.co_freevars[var_idx]
                elif byte_code in self.opc.NAME_OPS:
                    arg = f_code.co_names[int_arg]
                elif byte_code in self.opc.JREL_OPS:
                    # Many relative jumps are conditional,
                    # so setting f.fallthrough is wrong.
                    arg = arg_offset + int_arg
                elif byte_code in self.opc.JABS_OPS:
                    # We probably could set fallthough, since many (all?)
                    # of these are unconditional, but we'll make the jump do
                    # the work of setting.
                    arg = int_arg
                elif byte_code in self.opc.LOCAL_OPS:
                    arg = f_code.co_varnames[int_arg]
                else:
                    arg = int_arg
                arguments = [arg]
            break

        return byte_name, byte_code, int_arg, arguments, offset, line_number
Ejemplo n.º 19
0
    def detect_control_flow(self, offset, targets, extended_arg):
        """
        Detect structures and their boundaries to fix optimized jumps
        in python2.3+
        """

        # TODO: check the struct boundaries more precisely -Dan

        code = self.code
        op = code[offset]

        # Detect parent structure
        parent = self.structs[0]
        start = parent['start']
        end = parent['end']

        # Pick inner-most parent for our offset
        for struct in self.structs:
            current_start = struct['start']
            current_end   = struct['end']
            if ((current_start <= offset < current_end)
                and (current_start >= start and current_end <= end)):
                start = current_start
                end = current_end
                parent = struct

        if op == self.opc.SETUP_LOOP:
            # We categorize loop types: 'for', 'while', 'while 1' with
            # possibly suffixes '-loop' and '-else'
            # Try to find the jump_back instruction of the loop.
            # It could be a return instruction.

            start += instruction_size(op, self.opc)
            target = self.get_target(offset, extended_arg)
            end    = self.restrict_to_parent(target, parent)
            self.setup_loops[target] = offset

            if target != end:
                self.fixed_jumps[offset] = end

            (line_no, next_line_byte) = self.lines[offset]
            jump_back = self.last_instr(start, end, self.opc.JUMP_ABSOLUTE,
                                            next_line_byte, False)

            if jump_back:
                jump_forward_offset = xdis.next_offset(code[jump_back], self.opc, jump_back)
            else:
                jump_forward_offset = None

            return_val_offset1 = self.prev[self.prev[end]]

            if (jump_back and jump_back != self.prev_op[end]
                and self.is_jump_forward(jump_forward_offset)):
                if (code[self.prev_op[end]] == self.opc.RETURN_VALUE or
                    (code[self.prev_op[end]] == self.opc.POP_BLOCK
                     and code[return_val_offset1] == self.opc.RETURN_VALUE)):
                    jump_back = None
            if not jump_back:
                jump_back = self.last_instr(start, end, self.opc.RETURN_VALUE)
                if not jump_back:
                    return

                jump_back += 2  # FIXME ???
                if_offset = None
                if code[self.prev_op[next_line_byte]] not in self.pop_jump_tf:
                    if_offset = self.prev[next_line_byte]
                if if_offset:
                    loop_type = 'while'
                    self.ignore_if.add(if_offset)
                else:
                    loop_type = 'for'
                target = next_line_byte
                end = xdis.next_offset(code[jump_back], self.opc, jump_back)
            else:
                if self.get_target(jump_back, 0) >= next_line_byte:
                    jump_back = self.last_instr(start, end, self.opc.JUMP_ABSOLUTE, start, False)
                if end > jump_back+4 and self.is_jump_forward(end):
                    if self.is_jump_forward(jump_back+4):
                        if self.get_target(jump_back+4, extended_arg) == self.get_target(end, extended_arg):
                            self.fixed_jumps[offset] = jump_back+4
                            end = jump_back+4
                elif target < offset:
                    self.fixed_jumps[offset] = jump_back+4
                    end = jump_back+4

                # I think 0 right because jump_back has been adjusted for any EXTENDED_ARG
                # it encounters
                target = self.get_target(jump_back, 0)

                if code[target] in (self.opc.FOR_ITER, self.opc.GET_ITER):
                    loop_type = 'for'
                else:
                    loop_type = 'while'
                    test = self.prev_op[next_line_byte]

                    if test == offset:
                        loop_type = 'while 1'
                    elif self.code[test] in self.opc.JUMP_OPs:
                        self.ignore_if.add(test)
                        test_target = self.get_target(test, extended_arg)
                        if test_target > (jump_back+3):
                            jump_back = test_target
                self.not_continue.add(jump_back)
            self.loops.append(target)
            self.structs.append({'type': loop_type + '-loop',
                                 'start': target,
                                 'end':   jump_back})
            after_jump_offset = xdis.next_offset(code[jump_back], self.opc, jump_back)
            if after_jump_offset != end:
                self.structs.append({'type': loop_type + '-else',
                                     'start': after_jump_offset,
                                     'end':   end})
        elif op in self.pop_jump_tf:
            start = offset + instruction_size(op, self.opc)
            target = self.get_target(offset, extended_arg)
            rtarget = self.restrict_to_parent(target, parent)
            prev_op = self.prev_op

            # Do not let jump to go out of parent struct bounds
            if target != rtarget and parent['type'] == 'and/or':
                self.fixed_jumps[offset] = rtarget
                return

            # Does this jump to right after another conditional jump that is
            # not myself?  If so, it's part of a larger conditional.
            # rocky: if we have a conditional jump to the next instruction, then
            # possibly I am "skipping over" a "pass" or null statement.

            if ((code[prev_op[target]] in self.pop_jump_if_pop) and
                (target > offset) and prev_op[target] != offset):
                # FIXME: this is not accurate The commented out below
                # is what it should be. However grammar rules right now
                # assume the incorrect offsets.
                # self.fixed_jumps[offset] = target
                self.fixed_jumps[offset] = prev_op[target]
                self.structs.append({'type': 'and/or',
                                     'start': start,
                                     'end': prev_op[target]})
                return

            # The opcode *two* instructions before the target jump offset is important
            # in making a determination of what we have. Save that.
            pre_rtarget = prev_op[rtarget]

            # Is it an "and" inside an "if" or "while" block
            if op == self.opc.POP_JUMP_IF_FALSE and self.version < 3.6:

                # Search for another POP_JUMP_IF_FALSE targetting the same op,
                # in current statement, starting from current offset, and filter
                # everything inside inner 'or' jumps and midline ifs
                match = self.rem_or(start, self.next_stmt[offset],
                                    self.opc.POP_JUMP_IF_FALSE, target)

                # If we still have any offsets in set, start working on it
                if match:
                    is_jump_forward = self.is_jump_forward(pre_rtarget)
                    if (is_jump_forward and pre_rtarget not in self.stmts and
                        self.restrict_to_parent(self.get_target(pre_rtarget, extended_arg), parent) == rtarget):
                        if (code[prev_op[pre_rtarget]] == self.opc.JUMP_ABSOLUTE
                            and self.remove_mid_line_ifs([offset]) and
                            target == self.get_target(prev_op[pre_rtarget], extended_arg) and
                            (prev_op[pre_rtarget] not in self.stmts or
                             self.get_target(prev_op[pre_rtarget], extended_arg) > prev_op[pre_rtarget]) and
                            1 == len(self.remove_mid_line_ifs(self.rem_or(start, prev_op[pre_rtarget], self.pop_jump_tf, target)))):
                            pass
                        elif (code[prev_op[pre_rtarget]] == self.opc.RETURN_VALUE
                              and self.remove_mid_line_ifs([offset]) and
                              1 == (len(set(self.remove_mid_line_ifs(self.rem_or(start, prev_op[pre_rtarget],
                                                                                 self.pop_jump_tf, target))) |
                                    set(self.remove_mid_line_ifs(self.rem_or(start, prev_op[pre_rtarget],
                                                                             (self.opc.POP_JUMP_IF_FALSE,
                                                                              self.opc.POP_JUMP_IF_TRUE,
                                                                              self.opc.JUMP_ABSOLUTE),
                                                                             pre_rtarget, True)))))):
                            pass
                        else:
                            fix = None
                            jump_ifs = self.all_instr(start, self.next_stmt[offset],
                                                      self.opc.POP_JUMP_IF_FALSE)
                            last_jump_good = True
                            for j in jump_ifs:
                                if target == self.get_target(j, extended_arg):
                                    if self.lines[j].next == j + 3 and last_jump_good:
                                        fix = j
                                        break
                                else:
                                    last_jump_good = False
                            self.fixed_jumps[offset] = fix or match[-1]
                            return
                    else:
                        self.fixed_jumps[offset] = match[-1]
                        return
            # op == POP_JUMP_IF_TRUE
            else:
                next = self.next_stmt[offset]
                if prev_op[next] == offset:
                    pass
                elif self.is_jump_forward(next) and target == self.get_target(next, extended_arg):
                    if code[prev_op[next]] == self.opc.POP_JUMP_IF_FALSE:
                        if (code[next] == self.opc.JUMP_FORWARD
                            or target != rtarget
                            or code[prev_op[pre_rtarget]] not in
                            (self.opc.JUMP_ABSOLUTE, self.opc.RETURN_VALUE)):
                            self.fixed_jumps[offset] = prev_op[next]
                            return
                elif (code[next] == self.opc.JUMP_ABSOLUTE and self.is_jump_forward(target) and
                      self.get_target(target, extended_arg) == self.get_target(next, extended_arg)):
                    self.fixed_jumps[offset] = prev_op[next]
                    return

            # Don't add a struct for a while test, it's already taken care of
            if offset in self.ignore_if:
                return

            if (code[pre_rtarget] == self.opc.JUMP_ABSOLUTE and
                pre_rtarget in self.stmts and
                pre_rtarget != offset and
                prev_op[pre_rtarget] != offset and
                not (code[rtarget] == self.opc.JUMP_ABSOLUTE and
                     code[rtarget+3] == self.opc.POP_BLOCK and
                     code[prev_op[pre_rtarget]] != self.opc.JUMP_ABSOLUTE)):
                rtarget = pre_rtarget

            # Does the "jump if" jump beyond a jump op?
            # That is, we have something like:
            #  POP_JUMP_IF_FALSE HERE
            #  ...
            # JUMP_FORWARD
            # HERE:
            #
            # If so, this can be block inside an "if" statement
            # or a conditional assignment like:
            #   x = 1 if x else 2
            #
            # There are other contexts we may need to consider
            # like whether the target is "END_FINALLY"
            # or if the condition jump is to a forward location
            if self.is_jump_forward(pre_rtarget):
                if_end = self.get_target(pre_rtarget, 0)

                # If the jump target is back, we are looping
                if (if_end < pre_rtarget and
                    (code[prev_op[if_end]] == self.opc.SETUP_LOOP)):
                    if (if_end > start):
                        return

                end = self.restrict_to_parent(if_end, parent)

                self.structs.append({'type': 'if-then',
                                     'start': start,
                                     'end': pre_rtarget})

                # FIXME: add this
                # self.fixed_jumps[offset] = rtarget
                self.not_continue.add(pre_rtarget)

                if rtarget < end and (
                        code[rtarget] not in (self.opc.END_FINALLY,
                                              self.opc.JUMP_ABSOLUTE) and
                        code[prev_op[pre_rtarget]] not in (self.opc.POP_EXCEPT,
                                                        self.opc.END_FINALLY)):
                    self.structs.append({'type': 'else',
                                         'start': rtarget,
                                         'end': end})
                    self.else_start[rtarget] = end
            elif self.is_jump_back(pre_rtarget, 0):
                if_end = rtarget
                self.structs.append({'type': 'if-then',
                                     'start': start,
                                     'end': pre_rtarget})
                self.not_continue.add(pre_rtarget)
            elif code[pre_rtarget] in (self.opc.RETURN_VALUE,
                                       self.opc.BREAK_LOOP):
                self.structs.append({'type': 'if-then',
                                     'start': start,
                                     'end': rtarget})
                # It is important to distingish if this return is inside some sort
                # except block return
                jump_prev = prev_op[offset]
                if self.is_pypy and code[jump_prev] == self.opc.COMPARE_OP:
                    if self.opc.cmp_op[code[jump_prev+1]] == 'exception-match':
                        return
                if self.version >= 3.5:
                    # Python 3.5 may remove as dead code a JUMP
                    # instruction after a RETURN_VALUE. So we check
                    # based on seeing SETUP_EXCEPT various places.
                    if code[rtarget] == self.opc.SETUP_EXCEPT:
                        return
                    # Check that next instruction after pops and jump is
                    # not from SETUP_EXCEPT
                    next_op = rtarget
                    if code[next_op] == self.opc.POP_BLOCK:
                        next_op += instruction_size(self.code[next_op], self.opc)
                    if code[next_op] == self.opc.JUMP_ABSOLUTE:
                        next_op += instruction_size(self.code[next_op], self.opc)
                    if next_op in targets:
                        for try_op in targets[next_op]:
                            come_from_op = code[try_op]
                            if come_from_op == self.opc.SETUP_EXCEPT:
                                return
                            pass
                    pass
                if code[pre_rtarget] == self.opc.RETURN_VALUE:
                    self.return_end_ifs.add(pre_rtarget)
                else:
                    self.fixed_jumps[offset] = rtarget
                    self.not_continue.add(pre_rtarget)
            else:
                # For now, we'll only tag forward jump.
                if self.version >= 3.6:
                    if target > offset:
                        self.fixed_jumps[offset] = target
                        pass
                else:
                    # FIXME: This is probably a bug in < 3.6 and we should
                    # instead use the above code. But until we smoke things
                    # out we'll stick with it.
                    if rtarget > offset:
                        self.fixed_jumps[offset] = rtarget

        elif op == self.opc.SETUP_EXCEPT:
            target = self.get_target(offset, extended_arg)
            end    = self.restrict_to_parent(target, parent)
            self.fixed_jumps[offset] = end
        elif op == self.opc.POP_EXCEPT:
            next_offset = xdis.next_offset(op, self.opc, offset)
            target = self.get_target(next_offset, extended_arg)
            if target > next_offset:
                next_op = code[next_offset]
                if (self.opc.JUMP_ABSOLUTE == next_op and
                    self.opc.END_FINALLY != code[xdis.next_offset(next_op, self.opc, next_offset)]):
                    self.fixed_jumps[next_offset] = target
                    self.except_targets[target] = next_offset

        elif op == self.opc.SETUP_FINALLY:
            target = self.get_target(offset, extended_arg)
            end    = self.restrict_to_parent(target, parent)
            self.fixed_jumps[offset] = end
        elif op in self.jump_if_pop:
            target = self.get_target(offset, extended_arg)
            if target > offset:
                unop_target = self.last_instr(offset, target, self.opc.JUMP_FORWARD, target)
                if unop_target and code[unop_target+3] != self.opc.ROT_TWO:
                    self.fixed_jumps[offset] = unop_target
                else:
                    self.fixed_jumps[offset] = self.restrict_to_parent(target, parent)
                    pass
                pass
        elif self.version >= 3.5:
            # 3.5+ has Jump optimization which too often causes RETURN_VALUE to get
            # misclassified as RETURN_END_IF. Handle that here.
            # In RETURN_VALUE, JUMP_ABSOLUTE, RETURN_VALUE is never RETURN_END_IF
            if op == self.opc.RETURN_VALUE:
                next_offset = xdis.next_offset(op, self.opc, offset)
                if (next_offset < len(code) and code[next_offset] == self.opc.JUMP_ABSOLUTE and
                    offset in self.return_end_ifs):
                    self.return_end_ifs.remove(offset)
                    pass
                pass
            elif op == self.opc.JUMP_FORWARD:
                # If we have:
                #   JUMP_FORWARD x, [non-jump, insns], RETURN_VALUE, x:
                # then RETURN_VALUE is not RETURN_END_IF
                rtarget = self.get_target(offset, extended_arg)
                rtarget_prev = self.prev[rtarget]
                if (code[rtarget_prev] == self.opc.RETURN_VALUE and
                    rtarget_prev in self.return_end_ifs):
                    i = rtarget_prev
                    while i != offset:
                        if code[i] in [op3.JUMP_FORWARD, op3.JUMP_ABSOLUTE]:
                            return
                        i = self.prev[i]
                    self.return_end_ifs.remove(rtarget_prev)
                pass
        return
Ejemplo n.º 20
0
def basic_blocks(
    fn_or_code,
    version=PYTHON_VERSION_TRIPLE,
    is_pypy=IS_PYPY,
    more_precise_returns=False,
    print_instructions=False,
):
    """Create a list of basic blocks found in a code object.
    `more_precise_returns` indicates whether the RETURN_VALUE
    should modeled as a jump to the end of the enclosing function
    or not. See comment in code as to why this might be useful.
    """

    BB = BBMgr(version, is_pypy)

    # Get jump targets
    jump_targets = set()
    instructions = list(get_instructions(fn_or_code))
    for inst in instructions:
        op = inst.opcode
        offset = inst.offset
        follow_offset = next_offset(op, BB.opcode, offset)
        if op in BB.JUMP_INSTRUCTIONS:
            jump_value = get_jump_val(inst.arg, version)
            if op in BB.JABS_INSTRUCTIONS:
                jump_offset = jump_value
            else:
                jump_offset = follow_offset + jump_value
            jump_targets.add(jump_offset)
            pass

    # Add an artificial block where we can link the exits of other blocks
    # to. This helps when there is a "raise" not in any try block and
    # in computing reverse dominators.
    end_offset = instructions[-1].offset
    if version >= (3, 6):
        end_bb_offset = end_offset + 2
    else:
        end_bb_offset = end_offset + 1

    end_block, _, _ = BB.add_bb(
        end_bb_offset, end_bb_offset, None, None, set([BB_EXIT]), []
    )

    start_offset = 0
    end_offset = -1
    jump_offsets = set()
    prev_offset = -1
    endloop_offsets = [-1]
    flags = set([BB_ENTRY])
    end_try_offset_stack = []
    try_stack = [end_block]
    end_try_offset = None
    loop_offset = None
    return_blocks = []

    for i, inst in enumerate(instructions):
        if print_instructions:
            print(inst)
        prev_offset = end_offset
        end_offset = inst.offset
        op = inst.opcode
        offset = inst.offset
        follow_offset = next_offset(op, BB.opcode, offset)

        if offset == end_try_offset:
            if len(end_try_offset_stack):
                end_try_offset = end_try_offset_stack[-1]
                end_try_offset_stack.pop()
            else:
                end_try_offset = None

        if op in BB.LOOP_INSTRUCTIONS:
            jump_offset = follow_offset + inst.arg
            endloop_offsets.append(jump_offset)
            loop_offset = offset
        elif offset == endloop_offsets[-1]:
            endloop_offsets.pop()
        pass

        if op in BB.LOOP_INSTRUCTIONS:
            flags.add(BB_LOOP)
        elif op in BB.BREAK_INSTRUCTIONS:
            flags.add(BB_BREAK)
            jump_offsets.add(endloop_offsets[-1])
            block, flags, jump_offsets = BB.add_bb(
                start_offset,
                end_offset,
                loop_offset,
                follow_offset,
                flags,
                jump_offsets,
            )
            loop_offset = None
            if BB_TRY in block.flags:
                try_stack.append(block)
            start_offset = follow_offset

        if offset in jump_targets:
            # Fallthrough path and jump target path.
            # This instruction definitely starts a new basic block
            # Close off any prior basic block
            if start_offset < end_offset:
                block, flags, jump_offsets = BB.add_bb(
                    start_offset,
                    prev_offset,
                    loop_offset,
                    end_offset,
                    flags,
                    jump_offsets,
                )
                loop_offset = None
                if BB_TRY in block.flags:
                    try_stack.append(block)
                    pass

                start_offset = end_offset
                pass

        # Add block flags for certain classes of instructions
        if op in BB.JUMP_CONDITONAL:
            flags.add(BB_JUMP_CONDITIONAL)

        if op in BB.POP_BLOCK_INSTRUCTIONS:
            flags.add(BB_POP_BLOCK)
            if start_offset == offset:
                flags.add(BB_STARTS_POP_BLOCK)
                flags.remove(BB_POP_BLOCK)
        elif op in BB.EXCEPT_INSTRUCTIONS:
            if sys.version_info[0:2] <= (2, 7):
                # In Python up to 2.7, thre'POP_TOP'S at the beginning of a block
                # indicate an exception handler. We also check
                # that we are nested inside a "try".
                if len(try_stack) == 0 or start_offset != offset:
                    continue
                pass
                if (
                    instructions[i + 1].opcode != BB.opcode.opmap["POP_TOP"]
                    or instructions[i + 2].opcode != BB.opcode.opmap["POP_TOP"]
                ):
                    continue
            flags.add(BB_EXCEPT)
            try_stack[-1].exception_offsets.add(start_offset)
            pass
        elif op in BB.TRY_INSTRUCTIONS:
            end_try_offset_stack.append(inst.argval)
            flags.add(BB_TRY)
        elif op in BB.END_FINALLY_INSTRUCTIONS:
            flags.add(BB_END_FINALLY)
            try_stack[-1].exception_offsets.add(start_offset)
        elif op in BB.FOR_INSTRUCTIONS:
            flags.add(BB_FOR)
            jump_offsets.add(inst.argval)
            block, flags, jump_offsets = BB.add_bb(
                start_offset,
                end_offset,
                loop_offset,
                follow_offset,
                flags,
                jump_offsets,
            )
            loop_offset = None
            start_offset = follow_offset
        elif op in BB.JUMP_INSTRUCTIONS:
            # Some sort of jump instruction.
            # Figure out where we jump to amd add it to this
            # basic block's jump offsets.
            jump_offset = inst.argval

            jump_offsets.add(jump_offset)
            if op in BB.JUMP_UNCONDITONAL:
                flags.add(BB_JUMP_UNCONDITIONAL)
                if jump_offset == follow_offset:
                    flags.add(BB_JUMP_TO_FALLTHROUGH)
                    pass
                block, flags, jump_offsets = BB.add_bb(
                    start_offset,
                    end_offset,
                    loop_offset,
                    follow_offset,
                    flags,
                    jump_offsets,
                )
                loop_offset = None
                if BB_TRY in block.flags:
                    try_stack.append(block)
                    pass

                start_offset = follow_offset
            elif version[:2] >= (3, 9) or (
                version[:2] < (3, 8) and op != BB.opcode.SETUP_LOOP
            ):
                if op in BB.FINALLY_INSTRUCTIONS:
                    flags.add(BB_FINALLY)

                block, flags, jump_offsets = BB.add_bb(
                    start_offset,
                    end_offset,
                    loop_offset,
                    follow_offset,
                    flags,
                    jump_offsets,
                )
                loop_offset = None
                if BB_TRY in block.flags:
                    try_stack.append(block)
                start_offset = follow_offset

                pass
        elif op in BB.NOFOLLOW_INSTRUCTIONS:
            flags.add(BB_NOFOLLOW)
            if op in BB.RETURN_INSTRUCTIONS:
                flags.add(BB_RETURN)

            last_block, flags, jump_offsets = BB.add_bb(
                start_offset,
                end_offset,
                loop_offset,
                follow_offset,
                flags,
                jump_offsets,
            )
            loop_offset = None
            start_offset = follow_offset
            if op in BB.RETURN_INSTRUCTIONS:
                return_blocks.append(last_block)
            pass
        pass

    # If the bytecode comes from Python, then there is possibly an
    # advantage in treating a return in a block as an instruction
    # which flows to the next instruction, since that will treat
    # blocks with unreachable instructions the way Python source
    # does - the code after that exists.
    #
    # However if you care about analysis, then
    # Hook RETURN_VALUE instructions to the exit block offset
    if more_precise_returns:
        for block in return_blocks:
            block.jump_offsets.add(end_bb_offset)
            block.edge_count += 1

    if len(BB.bb_list):
        BB.bb_list[-1].follow_offset = None
        BB.start_block = BB.bb_list[0]

    # Add remaining instructions?
    if start_offset <= end_offset:
        BB.bb_list.append(
            BasicBlock(
                start_offset,
                end_offset,
                loop_offset,
                None,
                flags=flags,
                jump_offsets=jump_offsets,
            )
        )
        loop_offset = None
        pass

    return BB