Esempio n. 1
0
 def op_range(self, start, end):
     """
     Iterate through positions of opcodes, skipping
     arguments.
     """
     while start < end:
         yield start
         start += instruction_size(self.code[start], self.opc)
Esempio n. 2
0
 def get_inst(self, offset):
     # Instructions can get moved as a result of EXTENDED_ARGS removal.
     # So if "offset" is not in self.offset2inst_index, then
     # we assume that it was an instruction moved back.
     # We check that assumption though by looking at
     # self.code's opcode.
     if offset not in self.offset2inst_index:
         offset -= instruction_size(self.opc.EXTENDED_ARG, self.opc)
         assert self.code[offset] == self.opc.EXTENDED_ARG
     return self.insts[self.offset2inst_index[offset]]
Esempio n. 3
0
 def build_prev_op(self):
     """
     Compose 'list-map' which allows to jump to previous
     op, given offset of current op as index.
     """
     code = self.code
     codelen = len(code)
     # 2.x uses prev 3.x uses prev_op. Sigh
     # Until we get this sorted out.
     self.prev = self.prev_op = [0]
     for offset in self.op_range(0, codelen):
         op = code[offset]
         for _ in range(instruction_size(op, self.opc)):
             self.prev_op.append(offset)
Esempio n. 4
0
    def detect_control_flow(self, offset, targets, inst_index):
        """
        Detect structures and their boundaries to fix optimized jumps
        Python 3.0 is more like Python 2.6 than it is Python 3.x.
        So we have a special routine here.
        """

        code = self.code
        op = self.insts[inst_index].opcode

        # Detect parent structure
        parent = self.structs[0]
        start = parent["start"]
        end = parent["end"]

        # Pick inner-most parent for our offset
        for struct in self.structs:
            current_start = struct["start"]
            current_end = struct["end"]
            if (current_start <= offset < current_end) and (
                    current_start >= start and current_end <= end):
                start = current_start
                end = current_end
                parent = struct

        if op == self.opc.SETUP_LOOP:
            # We categorize loop types: 'for', 'while', 'while 1' with
            # possibly suffixes '-loop' and '-else'
            # Try to find the jump_back instruction of the loop.
            # It could be a return instruction.

            start += instruction_size(op, self.opc)
            target = self.get_target(offset)
            end = self.restrict_to_parent(target, parent)
            self.setup_loops[target] = offset

            if target != end:
                self.fixed_jumps[offset] = end

            (line_no, next_line_byte) = self.lines[offset]
            jump_back = self.last_instr(start, end, self.opc.JUMP_ABSOLUTE,
                                        next_line_byte, False)

            if jump_back:
                jump_forward_offset = xdis.next_offset(code[jump_back],
                                                       self.opc, jump_back)
            else:
                jump_forward_offset = None

            return_val_offset1 = self.prev[self.prev[end]]

            if (jump_back and jump_back != self.prev_op[end]
                    and self.is_jump_forward(jump_forward_offset)):
                if code[self.prev_op[end]] == self.opc.RETURN_VALUE or (
                        code[self.prev_op[end]] == self.opc.POP_BLOCK
                        and code[return_val_offset1] == self.opc.RETURN_VALUE):
                    jump_back = None
            if not jump_back:
                # loop suite ends in return
                jump_back = self.last_instr(start, end, self.opc.RETURN_VALUE)
                if not jump_back:
                    return

                jb_inst = self.get_inst(jump_back)
                jump_back = self.next_offset(jb_inst.opcode, jump_back)

                if_offset = None
                if code[self.prev_op[next_line_byte]] not in JUMP_TF:
                    if_offset = self.prev[next_line_byte]
                if if_offset:
                    loop_type = "while"
                    self.ignore_if.add(if_offset)
                else:
                    loop_type = "for"
                target = next_line_byte
                end = jump_back + 3
            else:
                if self.get_target(jump_back) >= next_line_byte:
                    jump_back = self.last_instr(start, end,
                                                self.opc.JUMP_ABSOLUTE, start,
                                                False)

                jb_inst = self.get_inst(jump_back)

                jb_next_offset = self.next_offset(jb_inst.opcode, jump_back)
                if end > jb_next_offset and self.is_jump_forward(end):
                    if self.is_jump_forward(jb_next_offset):
                        if self.get_target(jump_back +
                                           4) == self.get_target(end):
                            self.fixed_jumps[offset] = jump_back + 4
                            end = jb_next_offset
                elif target < offset:
                    self.fixed_jumps[offset] = jump_back + 4
                    end = jb_next_offset

                target = self.get_target(jump_back)

                if code[target] in (self.opc.FOR_ITER, self.opc.GET_ITER):
                    loop_type = "for"
                else:
                    loop_type = "while"
                    test = self.prev_op[next_line_byte]

                    if test == offset:
                        loop_type = "while 1"
                    elif self.code[test] in self.opc.JUMP_OPs:
                        self.ignore_if.add(test)
                        test_target = self.get_target(test)
                        if test_target > (jump_back + 3):
                            jump_back = test_target
                self.not_continue.add(jump_back)
            self.loops.append(target)
            self.structs.append({
                "type": loop_type + "-loop",
                "start": target,
                "end": jump_back
            })
            after_jump_offset = xdis.next_offset(code[jump_back], self.opc,
                                                 jump_back)
            if self.get_inst(after_jump_offset).opname == "POP_TOP":
                after_jump_offset = xdis.next_offset(code[after_jump_offset],
                                                     self.opc,
                                                     after_jump_offset)
            if after_jump_offset != end:
                self.structs.append({
                    "type": loop_type + "-else",
                    "start": after_jump_offset,
                    "end": end,
                })
        elif op in self.pop_jump_tf:
            start = offset + instruction_size(op, self.opc)
            target = self.get_target(offset)
            rtarget = self.restrict_to_parent(target, parent)
            prev_op = self.prev_op

            # Do not let jump to go out of parent struct bounds
            if target != rtarget and parent["type"] == "and/or":
                self.fixed_jumps[offset] = rtarget
                return

            # Does this jump to right after another conditional jump that is
            # not myself?  If so, it's part of a larger conditional.
            # rocky: if we have a conditional jump to the next instruction, then
            # possibly I am "skipping over" a "pass" or null statement.

            if ((code[prev_op[target]] in self.pop_jump_if_pop)
                    and (target > offset) and prev_op[target] != offset):
                self.fixed_jumps[offset] = prev_op[target]
                self.structs.append({
                    "type": "and/or",
                    "start": start,
                    "end": prev_op[target]
                })
                return

            # The op offset just before the target jump offset is important
            # in making a determination of what we have. Save that.
            pre_rtarget = prev_op[rtarget]

            # Is it an "and" inside an "if" or "while" block
            if op == opc.JUMP_IF_FALSE:

                # Search for another JUMP_IF_FALSE targetting the same op,
                # in current statement, starting from current offset, and filter
                # everything inside inner 'or' jumps and midline ifs
                match = self.rem_or(start, self.next_stmt[offset],
                                    opc.JUMP_IF_FALSE, target)

                # If we still have any offsets in set, start working on it
                if match:
                    is_jump_forward = self.is_jump_forward(pre_rtarget)
                    if (is_jump_forward and pre_rtarget not in self.stmts
                            and self.restrict_to_parent(
                                self.get_target(pre_rtarget),
                                parent) == rtarget):
                        if (code[prev_op[pre_rtarget]]
                                == self.opc.JUMP_ABSOLUTE
                                and self.remove_mid_line_ifs([offset])
                                and target == self.get_target(
                                    prev_op[pre_rtarget]) and
                            (prev_op[pre_rtarget] not in self.stmts
                             or self.get_target(prev_op[pre_rtarget]) >
                             prev_op[pre_rtarget]) and 1 == len(
                                 self.remove_mid_line_ifs(
                                     self.rem_or(start, prev_op[pre_rtarget],
                                                 JUMP_TF, target)))):
                            pass
                        elif (code[prev_op[pre_rtarget]]
                              == self.opc.RETURN_VALUE
                              and self.remove_mid_line_ifs([offset])
                              and 1 == (len(
                                  set(
                                      self.remove_mid_line_ifs(
                                          self.rem_or(
                                              start,
                                              prev_op[pre_rtarget],
                                              JUMP_TF,
                                              target,
                                          )))
                                  | set(
                                      self.remove_mid_line_ifs(
                                          self.rem_or(
                                              start,
                                              prev_op[pre_rtarget],
                                              (
                                                  opc.JUMP_IF_FALSE,
                                                  opc.JUMP_IF_TRUE,
                                                  opc.JUMP_ABSOLUTE,
                                              ),
                                              pre_rtarget,
                                              True,
                                          )))))):
                            pass
                        else:
                            fix = None
                            jump_ifs = self.inst_matches(
                                start, self.next_stmt[offset],
                                opc.JUMP_IF_FALSE)
                            last_jump_good = True
                            for j in jump_ifs:
                                if target == self.get_target(j):
                                    # FIXME: remove magic number
                                    if self.lines[
                                            j].next == j + 3 and last_jump_good:
                                        fix = j
                                        break
                                else:
                                    last_jump_good = False
                            self.fixed_jumps[offset] = fix or match[-1]
                            return
                    else:
                        self.fixed_jumps[offset] = match[-1]
                        return
            # op == JUMP_IF_TRUE
            else:
                next = self.next_stmt[offset]
                if prev_op[next] == offset:
                    pass
                elif self.is_jump_forward(next) and target == self.get_target(
                        next):
                    if code[prev_op[next]] == opc.JUMP_IF_FALSE:
                        if (code[next] == self.opc.JUMP_FORWARD
                                or target != rtarget
                                or code[prev_op[pre_rtarget]]
                                not in (self.opc.JUMP_ABSOLUTE,
                                        self.opc.RETURN_VALUE)):
                            self.fixed_jumps[offset] = prev_op[next]
                            return
                elif (code[next] == self.opc.JUMP_ABSOLUTE
                      and self.is_jump_forward(target)
                      and self.get_target(target) == self.get_target(next)):
                    self.fixed_jumps[offset] = prev_op[next]
                    return

            # Don't add a struct for a while test, it's already taken care of
            if offset in self.ignore_if:
                return

            if (code[pre_rtarget] == self.opc.JUMP_ABSOLUTE
                    and pre_rtarget in self.stmts and pre_rtarget != offset
                    and prev_op[pre_rtarget] != offset
                    and not (code[rtarget] == self.opc.JUMP_ABSOLUTE
                             and code[rtarget + 3] == self.opc.POP_BLOCK
                             and code[prev_op[pre_rtarget]] !=
                             self.opc.JUMP_ABSOLUTE)):
                rtarget = pre_rtarget

            # Does the "jump if" jump beyond a jump op?
            # That is, we have something like:
            #  JUMP_IF_FALSE HERE
            #  ...
            # JUMP_FORWARD
            # HERE:
            #
            # If so, this can be block inside an "if" statement
            # or a conditional assignment like:
            #   x = 1 if x else 2
            #
            # There are other contexts we may need to consider
            # like whether the target is "END_FINALLY"
            # or if the condition jump is to a forward location
            if self.is_jump_forward(pre_rtarget):
                if_end = self.get_target(pre_rtarget, 0)

                # If the jump target is back, we are looping
                if if_end < pre_rtarget and (code[prev_op[if_end]]
                                             == self.opc.SETUP_LOOP):
                    if if_end > start:
                        return

                end = self.restrict_to_parent(if_end, parent)

                self.structs.append({
                    "type": "if-then",
                    "start": start,
                    "end": pre_rtarget
                })
                self.not_continue.add(pre_rtarget)

                # if rtarget < end and (
                #         code[rtarget] not in (self.opc.END_FINALLY,
                #                               self.opc.JUMP_ABSOLUTE) and
                #         code[prev_op[pre_rtarget]] not in (self.opc.POP_EXCEPT,
                #                                         self.opc.END_FINALLY)):
                #     self.structs.append({'type': 'else',
                #                          'start': rtarget,
                #                          'end': end})
                #     self.else_start[rtarget] = end
            elif self.is_jump_back(pre_rtarget, 0):
                if_end = rtarget
                self.structs.append({
                    "type": "if-then",
                    "start": start,
                    "end": pre_rtarget
                })
                self.not_continue.add(pre_rtarget)
            elif code[pre_rtarget] in (self.opc.RETURN_VALUE,
                                       self.opc.BREAK_LOOP):
                self.structs.append({
                    "type": "if-then",
                    "start": start,
                    "end": rtarget
                })
                # It is important to distingish if this return is inside some sort
                # except block return
                jump_prev = prev_op[offset]
                if self.is_pypy and code[jump_prev] == self.opc.COMPARE_OP:
                    if self.opc.cmp_op[code[jump_prev +
                                            1]] == "exception-match":
                        return
                if self.version >= 3.5:
                    # Python 3.5 may remove as dead code a JUMP
                    # instruction after a RETURN_VALUE. So we check
                    # based on seeing SETUP_EXCEPT various places.
                    if code[rtarget] == self.opc.SETUP_EXCEPT:
                        return
                    # Check that next instruction after pops and jump is
                    # not from SETUP_EXCEPT
                    next_op = rtarget
                    if code[next_op] == self.opc.POP_BLOCK:
                        next_op += instruction_size(self.code[next_op],
                                                    self.opc)
                    if code[next_op] == self.opc.JUMP_ABSOLUTE:
                        next_op += instruction_size(self.code[next_op],
                                                    self.opc)
                    if next_op in targets:
                        for try_op in targets[next_op]:
                            come_from_op = code[try_op]
                            if come_from_op == self.opc.SETUP_EXCEPT:
                                return
                            pass
                    pass
                if code[pre_rtarget] == self.opc.RETURN_VALUE:
                    if self.version == 3.0:
                        next_op = rtarget
                        if code[next_op] == self.opc.POP_TOP:
                            next_op = rtarget
                        for block in self.structs:
                            if (block["type"] == "while-loop"
                                    and block["end"] == next_op):
                                return
                        next_op += instruction_size(self.code[next_op],
                                                    self.opc)
                        if code[next_op] == self.opc.POP_BLOCK:
                            return
                    self.return_end_ifs.add(pre_rtarget)
                else:
                    self.fixed_jumps[offset] = rtarget
                    self.not_continue.add(pre_rtarget)

        elif op == self.opc.SETUP_EXCEPT:
            target = self.get_target(offset)
            end = self.restrict_to_parent(target, parent)
            self.fixed_jumps[offset] = end
        elif op == self.opc.SETUP_FINALLY:
            target = self.get_target(offset)
            end = self.restrict_to_parent(target, parent)
            self.fixed_jumps[offset] = end
        elif op in self.jump_if_pop:
            target = self.get_target(offset)
            if target > offset:
                unop_target = self.last_instr(offset, target,
                                              self.opc.JUMP_FORWARD, target)
                if unop_target and code[unop_target + 3] != self.opc.ROT_TWO:
                    self.fixed_jumps[offset] = unop_target
                else:
                    self.fixed_jumps[offset] = self.restrict_to_parent(
                        target, parent)
                    pass
                pass
        elif self.version >= 3.5:
            # 3.5+ has Jump optimization which too often causes RETURN_VALUE to get
            # misclassified as RETURN_END_IF. Handle that here.
            # In RETURN_VALUE, JUMP_ABSOLUTE, RETURN_VALUE is never RETURN_END_IF
            if op == self.opc.RETURN_VALUE:
                if (offset + 1 < len(code)
                        and code[offset + 1] == self.opc.JUMP_ABSOLUTE
                        and offset in self.return_end_ifs):
                    self.return_end_ifs.remove(offset)
                    pass
                pass
            elif op == self.opc.JUMP_FORWARD:
                # If we have:
                #   JUMP_FORWARD x, [non-jump, insns], RETURN_VALUE, x:
                # then RETURN_VALUE is not RETURN_END_IF
                rtarget = self.get_target(offset)
                rtarget_prev = self.prev[rtarget]
                if (code[rtarget_prev] == self.opc.RETURN_VALUE
                        and rtarget_prev in self.return_end_ifs):
                    i = rtarget_prev
                    while i != offset:
                        if code[i] in [opc.JUMP_FORWARD, opc.JUMP_ABSOLUTE]:
                            return
                        i = self.prev[i]
                    self.return_end_ifs.remove(rtarget_prev)
                pass
        return
    def build_statement_indices(self):
        code = self.code
        start = 0
        end = codelen = len(code)

        # Compose preliminary list of indices with statements,
        # using plain statement opcodes
        prelim = self.inst_matches(start, end, self.statement_opcodes)

        # Initialize final container with statements with
        # preliminary data
        stmts = self.stmts = set(prelim)

        # Same for opcode sequences
        pass_stmts = set()
        for sequence in self.statement_opcode_sequences:
            for i in self.op_range(start, end - (len(sequence) + 1)):
                match = True
                for elem in sequence:
                    if elem != code[i]:
                        match = False
                        break
                    i += instruction_size(code[i], self.opc)

                if match is True:
                    i = self.prev_op[i]
                    stmts.add(i)
                    pass_stmts.add(i)

        # Initialize statement list with the full data we've gathered so far
        if pass_stmts:
            stmt_offset_list = list(stmts)
            stmt_offset_list.sort()
        else:
            stmt_offset_list = prelim
        # 'List-map' which contains offset of start of
        # next statement, when op offset is passed as index
        self.next_stmt = slist = []
        last_stmt_offset = -1
        i = 0
        # Go through all statement offsets
        for stmt_offset in stmt_offset_list:
            # Process absolute jumps, but do not remove 'pass' statements
            # from the set
            if (
                code[stmt_offset] == self.opc.JUMP_ABSOLUTE
                and stmt_offset not in pass_stmts
            ):
                # If absolute jump occurs in forward direction or it takes off from the
                # same line as previous statement, this is not a statement
                # FIXME: 0 isn't always correct
                target = self.get_target(stmt_offset)
                if (
                    target > stmt_offset
                    or self.lines[last_stmt_offset].l_no == self.lines[stmt_offset].l_no
                ):
                    stmts.remove(stmt_offset)
                    continue
                # Rewing ops till we encounter non-JUMP_ABSOLUTE one
                j = self.prev_op[stmt_offset]
                while code[j] == self.opc.JUMP_ABSOLUTE:
                    j = self.prev_op[j]
                # If we got here, then it's list comprehension which
                # is not a statement too
                if code[j] == self.opc.LIST_APPEND:
                    stmts.remove(stmt_offset)
                    continue
            # Exclude ROT_TWO + POP_TOP
            elif (
                code[stmt_offset] == self.opc.POP_TOP
                and code[self.prev_op[stmt_offset]] == self.opc.ROT_TWO
            ):
                stmts.remove(stmt_offset)
                continue
            # Exclude FOR_ITER + designators
            elif code[stmt_offset] in self.designator_ops:
                j = self.prev_op[stmt_offset]
                while code[j] in self.designator_ops:
                    j = self.prev_op[j]
                if code[j] == self.opc.FOR_ITER:
                    stmts.remove(stmt_offset)
                    continue
            # Add to list another list with offset of current statement,
            # equal to length of previous statement
            slist += [stmt_offset] * (stmt_offset - i)
            last_stmt_offset = stmt_offset
            i = stmt_offset
        # Finish filling the list for last statement
        slist += [codelen] * (codelen - len(slist))