Exemple #1
0
 def op_range(self, start, end):
     """
     Iterate through positions of opcodes, skipping
     arguments.
     """
     while start < end:
         yield start
         start += op_size(self.code[start], self.opc)
Exemple #2
0
 def build_prev_op(self):
     """
     Compose 'list-map' which allows to jump to previous
     op, given offset of current op as index.
     """
     code = self.code
     codelen = len(code)
     # 2.x uses prev 3.x uses prev_op. Sigh
     # Until we get this sorted out.
     self.prev = self.prev_op = [0]
     for offset in self.op_range(0, codelen):
         op = code[offset]
         for _ in range(op_size(op, self.opc)):
             self.prev_op.append(offset)
    def detect_control_flow(self, offset, targets):
        """
        Detect structures and their boundaries to fix optimized jumps
        Python 3.0 is more like Python 2.6 than it is Python 3.x.
        So we have a special routine here.
        """

        code = self.code
        op = code[offset]

        # Detect parent structure
        parent = self.structs[0]
        start = parent['start']
        end = parent['end']

        # Pick inner-most parent for our offset
        for struct in self.structs:
            current_start = struct['start']
            current_end = struct['end']
            if ((current_start <= offset < current_end)
                    and (current_start >= start and current_end <= end)):
                start = current_start
                end = current_end
                parent = struct

        if op == self.opc.SETUP_LOOP:
            # We categorize loop types: 'for', 'while', 'while 1' with
            # possibly suffixes '-loop' and '-else'
            # Try to find the jump_back instruction of the loop.
            # It could be a return instruction.

            start = offset + 3
            target = self.get_target(offset)
            end = self.restrict_to_parent(target, parent)
            self.setup_loop_targets[offset] = target
            self.setup_loops[target] = offset

            if target != end:
                self.fixed_jumps[offset] = end

            (line_no, next_line_byte) = self.lines[offset]
            jump_back = self.last_instr(start, end, self.opc.JUMP_ABSOLUTE,
                                        next_line_byte, False)

            if jump_back:
                jump_forward_offset = jump_back + 3
            else:
                jump_forward_offset = None

            return_val_offset1 = self.prev[self.prev[end]]

            if (jump_back and jump_back != self.prev_op[end]
                    and self.is_jump_forward(jump_forward_offset)):
                if (code[self.prev_op[end]] == self.opc.RETURN_VALUE or
                    (code[self.prev_op[end]] == self.opc.POP_BLOCK
                     and code[return_val_offset1] == self.opc.RETURN_VALUE)):
                    jump_back = None
            if not jump_back:
                # loop suite ends in return
                jump_back = self.last_instr(start, end, self.opc.RETURN_VALUE)
                if not jump_back:
                    return

                jump_back += 2
                if_offset = None
                if code[self.prev_op[next_line_byte]] not in JUMP_TF:
                    if_offset = self.prev[next_line_byte]
                if if_offset:
                    loop_type = 'while'
                    self.ignore_if.add(if_offset)
                else:
                    loop_type = 'for'
                target = next_line_byte
                end = jump_back + 3
            else:
                if self.get_target(jump_back) >= next_line_byte:
                    jump_back = self.last_instr(start, end,
                                                self.opc.JUMP_ABSOLUTE, start,
                                                False)
                if end > jump_back + 4 and self.is_jump_forward(end):
                    if self.is_jump_forward(jump_back + 4):
                        if self.get_target(jump_back +
                                           4) == self.get_target(end):
                            self.fixed_jumps[offset] = jump_back + 4
                            end = jump_back + 4
                elif target < offset:
                    self.fixed_jumps[offset] = jump_back + 4
                    end = jump_back + 4

                target = self.get_target(jump_back)

                if code[target] in (self.opc.FOR_ITER, self.opc.GET_ITER):
                    loop_type = 'for'
                else:
                    loop_type = 'while'
                    test = self.prev_op[next_line_byte]

                    if test == offset:
                        loop_type = 'while 1'
                    elif self.code[test] in opc.hasjabs + opc.hasjrel:
                        self.ignore_if.add(test)
                        test_target = self.get_target(test)
                        if test_target > (jump_back + 3):
                            jump_back = test_target
                self.not_continue.add(jump_back)
            self.loops.append(target)
            self.structs.append({
                'type': loop_type + '-loop',
                'start': target,
                'end': jump_back
            })
            if jump_back + 3 != end:
                self.structs.append({
                    'type': loop_type + '-else',
                    'start': jump_back + 3,
                    'end': end
                })
        elif op in JUMP_TF:
            start = offset + op_size(op, self.opc)
            target = self.get_target(offset)
            rtarget = self.restrict_to_parent(target, parent)
            prev_op = self.prev_op

            # Do not let jump to go out of parent struct bounds
            if target != rtarget and parent['type'] == 'and/or':
                self.fixed_jumps[offset] = rtarget
                return

            # Does this jump to right after another conditional jump that is
            # not myself?  If so, it's part of a larger conditional.
            # rocky: if we have a conditional jump to the next instruction, then
            # possibly I am "skipping over" a "pass" or null statement.

            if ((code[prev_op[target]] in self.pop_jump_if_pop)
                    and (target > offset) and prev_op[target] != offset):
                self.fixed_jumps[offset] = prev_op[target]
                self.structs.append({
                    'type': 'and/or',
                    'start': start,
                    'end': prev_op[target]
                })
                return

            # The op offset just before the target jump offset is important
            # in making a determination of what we have. Save that.
            pre_rtarget = prev_op[rtarget]

            # Is it an "and" inside an "if" or "while" block
            if op == opc.JUMP_IF_FALSE:

                # Search for another JUMP_IF_FALSE targetting the same op,
                # in current statement, starting from current offset, and filter
                # everything inside inner 'or' jumps and midline ifs
                match = self.rem_or(start, self.next_stmt[offset],
                                    opc.JUMP_IF_FALSE, target)

                # If we still have any offsets in set, start working on it
                if match:
                    is_jump_forward = self.is_jump_forward(pre_rtarget)
                    if (is_jump_forward and pre_rtarget not in self.stmts
                            and self.restrict_to_parent(
                                self.get_target(pre_rtarget),
                                parent) == rtarget):
                        if (code[prev_op[pre_rtarget]]
                                == self.opc.JUMP_ABSOLUTE
                                and self.remove_mid_line_ifs([offset])
                                and target == self.get_target(
                                    prev_op[pre_rtarget]) and
                            (prev_op[pre_rtarget] not in self.stmts
                             or self.get_target(prev_op[pre_rtarget]) >
                             prev_op[pre_rtarget]) and 1 == len(
                                 self.remove_mid_line_ifs(
                                     self.rem_or(start, prev_op[pre_rtarget],
                                                 JUMP_TF, target)))):
                            pass
                        elif (code[prev_op[pre_rtarget]]
                              == self.opc.RETURN_VALUE
                              and self.remove_mid_line_ifs([offset])
                              and 1 == (len(
                                  set(
                                      self.remove_mid_line_ifs(
                                          self.rem_or(start,
                                                      prev_op[pre_rtarget],
                                                      JUMP_TF, target)))
                                  | set(
                                      self.remove_mid_line_ifs(
                                          self.rem_or(
                                              start, prev_op[pre_rtarget],
                                              (opc.JUMP_IF_FALSE,
                                               opc.JUMP_IF_TRUE,
                                               opc.JUMP_ABSOLUTE
                                               ), pre_rtarget, True)))))):
                            pass
                        else:
                            fix = None
                            jump_ifs = self.all_instr(start,
                                                      self.next_stmt[offset],
                                                      opc.JUMP_IF_FALSE)
                            last_jump_good = True
                            for j in jump_ifs:
                                if target == self.get_target(j):
                                    if self.lines[
                                            j].next == j + 3 and last_jump_good:
                                        fix = j
                                        break
                                else:
                                    last_jump_good = False
                            self.fixed_jumps[offset] = fix or match[-1]
                            return
                    else:
                        self.fixed_jumps[offset] = match[-1]
                        return
            # op == JUMP_IF_TRUE
            else:
                next = self.next_stmt[offset]
                if prev_op[next] == offset:
                    pass
                elif self.is_jump_forward(next) and target == self.get_target(
                        next):
                    if code[prev_op[next]] == opc.JUMP_IF_FALSE:
                        if (code[next] == self.opc.JUMP_FORWARD
                                or target != rtarget
                                or code[prev_op[pre_rtarget]]
                                not in (self.opc.JUMP_ABSOLUTE,
                                        self.opc.RETURN_VALUE)):
                            self.fixed_jumps[offset] = prev_op[next]
                            return
                elif (code[next] == self.opc.JUMP_ABSOLUTE
                      and self.is_jump_forward(target)
                      and self.get_target(target) == self.get_target(next)):
                    self.fixed_jumps[offset] = prev_op[next]
                    return

            # Don't add a struct for a while test, it's already taken care of
            if offset in self.ignore_if:
                return

            if (code[pre_rtarget] == self.opc.JUMP_ABSOLUTE
                    and pre_rtarget in self.stmts and pre_rtarget != offset
                    and prev_op[pre_rtarget] != offset
                    and not (code[rtarget] == self.opc.JUMP_ABSOLUTE
                             and code[rtarget + 3] == self.opc.POP_BLOCK
                             and code[prev_op[pre_rtarget]] !=
                             self.opc.JUMP_ABSOLUTE)):
                rtarget = pre_rtarget

            # Does the "jump if" jump beyond a jump op?
            # That is, we have something like:
            #  JUMP_IF_FALSE HERE
            #  ...
            # JUMP_FORWARD
            # HERE:
            #
            # If so, this can be block inside an "if" statement
            # or a conditional assignment like:
            #   x = 1 if x else 2
            #
            # There are other contexts we may need to consider
            # like whether the target is "END_FINALLY"
            # or if the condition jump is to a forward location
            if self.is_jump_forward(pre_rtarget):
                if_end = self.get_target(pre_rtarget)

                # If the jump target is back, we are looping
                if (if_end < pre_rtarget
                        and (code[prev_op[if_end]] == self.opc.SETUP_LOOP)):
                    if (if_end > start):
                        return

                end = self.restrict_to_parent(if_end, parent)

                self.structs.append({
                    'type': 'if-then',
                    'start': start,
                    'end': pre_rtarget
                })
                self.not_continue.add(pre_rtarget)

                # if rtarget < end and (
                #         code[rtarget] not in (self.opc.END_FINALLY,
                #                               self.opc.JUMP_ABSOLUTE) and
                #         code[prev_op[pre_rtarget]] not in (self.opc.POP_EXCEPT,
                #                                         self.opc.END_FINALLY)):
                #     self.structs.append({'type': 'else',
                #                          'start': rtarget,
                #                          'end': end})
                #     self.else_start[rtarget] = end
            elif self.is_jump_back(pre_rtarget):
                if_end = rtarget
                self.structs.append({
                    'type': 'if-then',
                    'start': start,
                    'end': pre_rtarget
                })
                self.not_continue.add(pre_rtarget)
            elif code[pre_rtarget] in (self.opc.RETURN_VALUE,
                                       self.opc.BREAK_LOOP):
                self.structs.append({
                    'type': 'if-then',
                    'start': start,
                    'end': rtarget
                })
                # It is important to distingish if this return is inside some sort
                # except block return
                jump_prev = prev_op[offset]
                if self.is_pypy and code[jump_prev] == self.opc.COMPARE_OP:
                    if self.opc.cmp_op[code[jump_prev +
                                            1]] == 'exception match':
                        return
                if self.version >= 3.5:
                    # Python 3.5 may remove as dead code a JUMP
                    # instruction after a RETURN_VALUE. So we check
                    # based on seeing SETUP_EXCEPT various places.
                    if code[rtarget] == self.opc.SETUP_EXCEPT:
                        return
                    # Check that next instruction after pops and jump is
                    # not from SETUP_EXCEPT
                    next_op = rtarget
                    if code[next_op] == self.opc.POP_BLOCK:
                        next_op += op_size(self.code[next_op], self.opc)
                    if code[next_op] == self.opc.JUMP_ABSOLUTE:
                        next_op += op_size(self.code[next_op], self.opc)
                    if next_op in targets:
                        for try_op in targets[next_op]:
                            come_from_op = code[try_op]
                            if come_from_op == self.opc.SETUP_EXCEPT:
                                return
                            pass
                    pass
                if code[pre_rtarget] == self.opc.RETURN_VALUE:
                    self.return_end_ifs.add(pre_rtarget)
                else:
                    self.fixed_jumps[offset] = rtarget
                    self.not_continue.add(pre_rtarget)

        elif op == self.opc.SETUP_EXCEPT:
            target = self.get_target(offset)
            end = self.restrict_to_parent(target, parent)
            self.fixed_jumps[offset] = end
        elif op == self.opc.SETUP_FINALLY:
            target = self.get_target(offset)
            end = self.restrict_to_parent(target, parent)
            self.fixed_jumps[offset] = end
        elif op in self.jump_if_pop:
            target = self.get_target(offset)
            if target > offset:
                unop_target = self.last_instr(offset, target,
                                              self.opc.JUMP_FORWARD, target)
                if unop_target and code[unop_target + 3] != self.opc.ROT_TWO:
                    self.fixed_jumps[offset] = unop_target
                else:
                    self.fixed_jumps[offset] = self.restrict_to_parent(
                        target, parent)
                    pass
                pass
        elif self.version >= 3.5:
            # 3.5+ has Jump optimization which too often causes RETURN_VALUE to get
            # misclassified as RETURN_END_IF. Handle that here.
            # In RETURN_VALUE, JUMP_ABSOLUTE, RETURN_VALUE is never RETURN_END_IF
            if op == self.opc.RETURN_VALUE:
                if (offset + 1 < len(code)
                        and code[offset + 1] == self.opc.JUMP_ABSOLUTE
                        and offset in self.return_end_ifs):
                    self.return_end_ifs.remove(offset)
                    pass
                pass
            elif op == self.opc.JUMP_FORWARD:
                # If we have:
                #   JUMP_FORWARD x, [non-jump, insns], RETURN_VALUE, x:
                # then RETURN_VALUE is not RETURN_END_IF
                rtarget = self.get_target(offset)
                rtarget_prev = self.prev[rtarget]
                if (code[rtarget_prev] == self.opc.RETURN_VALUE
                        and rtarget_prev in self.return_end_ifs):
                    i = rtarget_prev
                    while i != offset:
                        if code[i] in [opc.JUMP_FORWARD, opc.JUMP_ABSOLUTE]:
                            return
                        i = self.prev[i]
                    self.return_end_ifs.remove(rtarget_prev)
                pass
        return
    def detect_control_flow(self, offset, op):
        """
        Detect type of block structures and their boundaries to fix optimized jumps
        in python2.3+
        """

        code = self.code

        # Detect parent structure
        parent = self.structs[0]
        start = parent['start']
        end = parent['end']

        # Pick inner-most parent for our offset
        for struct in self.structs:
            current_start = struct['start']
            current_end = struct['end']
            if ((current_start <= offset < current_end)
                    and (current_start >= start and current_end <= end)):
                start = current_start
                end = current_end
                parent = struct

        if op == self.opc.SETUP_LOOP:

            # We categorize loop types: 'for', 'while', 'while 1' with
            # possibly suffixes '-loop' and '-else'
            # Try to find the jump_back instruction of the loop.
            # It could be a return instruction.

            start = offset + 3
            target = self.get_target(offset, op)
            end = self.restrict_to_parent(target, parent)
            self.setup_loop_targets[offset] = target
            self.setup_loops[target] = offset

            if target != end:
                self.fixed_jumps[offset] = end

            (line_no, next_line_byte) = self.lines[offset]
            jump_back = self.last_instr(start, end, self.opc.JUMP_ABSOLUTE,
                                        next_line_byte, False)

            if jump_back:
                # Account for the fact that < 2.7 has an explicit
                # POP_TOP instruction in the equivalate POP_JUMP_IF
                # construct
                if self.version < 2.7:
                    jump_forward_offset = jump_back + 4
                    return_val_offset1 = self.prev[self.prev[self.prev[end]]]
                    # Is jump back really "back"?
                    jump_target = self.get_target(jump_back, code[jump_back])
                    if (jump_target > jump_back or code[jump_back + 3] in [
                            self.opc.JUMP_FORWARD, self.opc.JUMP_ABSOLUTE
                    ]):
                        jump_back = None
                        pass
                else:
                    jump_forward_offset = jump_back + 3
                    return_val_offset1 = self.prev[self.prev[end]]

            if (jump_back and jump_back != self.prev[end]
                    and code[jump_forward_offset] in self.jump_forward):
                if (code[self.prev[end]] == self.opc.RETURN_VALUE or
                    (code[self.prev[end]] == self.opc.POP_BLOCK
                     and code[return_val_offset1] == self.opc.RETURN_VALUE)):
                    jump_back = None
            if not jump_back:
                # loop suite ends in return
                # scanner26 of wbiti had:
                # jump_back = self.last_instr(start, end, self.opc.JUMP_ABSOLUTE, start, False)
                jump_back = self.last_instr(start, end, self.opc.RETURN_VALUE)
                if not jump_back:
                    return
                jump_back += 1

                if_offset = None
                if self.version < 2.7:
                    # Look for JUMP_IF POP_TOP ...
                    if (code[self.prev[next_line_byte]] == self.opc.POP_TOP
                            and (code[self.prev[self.prev[next_line_byte]]]
                                 in self.pop_jump_if)):
                        if_offset = self.prev[self.prev[next_line_byte]]
                elif code[self.prev[next_line_byte]] in self.pop_jump_if:
                    # Look for POP_JUMP_IF ...
                    if_offset = self.prev[next_line_byte]
                if if_offset:
                    loop_type = 'while'
                    self.ignore_if.add(if_offset)
                    if self.version < 2.7 and (code[self.prev[jump_back]]
                                               == self.opc.RETURN_VALUE):
                        self.ignore_if.add(self.prev[jump_back])
                        pass
                    pass
                else:
                    loop_type = 'for'
                target = next_line_byte
                end = jump_back + 3
            else:
                if self.get_target(jump_back) >= next_line_byte:
                    jump_back = self.last_instr(start, end,
                                                self.opc.JUMP_ABSOLUTE, start,
                                                False)
                if end > jump_back + 4 and code[end] in self.jump_forward:
                    if code[jump_back + 4] in self.jump_forward:
                        if self.get_target(jump_back +
                                           4) == self.get_target(end):
                            self.fixed_jumps[offset] = jump_back + 4
                            end = jump_back + 4
                elif target < offset:
                    self.fixed_jumps[offset] = jump_back + 4
                    end = jump_back + 4

                target = self.get_target(jump_back, self.opc.JUMP_ABSOLUTE)

                if (self.version > 2.1 and code[target]
                        in (self.opc.FOR_ITER, self.opc.GET_ITER)):
                    loop_type = 'for'
                else:
                    loop_type = 'while'
                    if (self.version < 2.7
                            and self.code[self.prev[next_line_byte]]
                            == self.opc.POP_TOP):
                        test = self.prev[self.prev[next_line_byte]]
                    else:
                        test = self.prev[next_line_byte]

                    if test == offset:
                        loop_type = 'while 1'
                    elif self.code[test] in self.opc.JUMP_OPs:
                        self.ignore_if.add(test)
                        test_target = self.get_target(test)
                        if test_target > (jump_back + 3):
                            jump_back = test_target
                self.not_continue.add(jump_back)
            self.loops.append(target)
            self.structs.append({
                'type': loop_type + '-loop',
                'start': target,
                'end': jump_back
            })
            if jump_back + 3 != end:
                self.structs.append({
                    'type': loop_type + '-else',
                    'start': jump_back + 3,
                    'end': end
                })
        elif op == self.opc.SETUP_EXCEPT:
            start = offset + op_size(op, self.opc)
            target = self.get_target(offset, op)
            end = self.restrict_to_parent(target, parent)
            if target != end:
                self.fixed_jumps[offset] = end
                # print target, end, parent
            # Add the try block
            self.structs.append({
                'type': 'try',
                'start': start - 3,
                'end': end - 4
            })
            # Now isolate the except and else blocks
            end_else = start_else = self.get_target(self.prev[end])

            end_finally_offset = end
            setup_except_nest = 0
            while end_finally_offset < len(self.code):
                if self.code[end_finally_offset] == self.opc.END_FINALLY:
                    if setup_except_nest == 0:
                        break
                    else:
                        setup_except_nest -= 1
                elif self.code[end_finally_offset] == self.opc.SETUP_EXCEPT:
                    setup_except_nest += 1
                end_finally_offset += op_size(code[end_finally_offset],
                                              self.opc)
                pass

            # Add the except blocks
            i = end
            while i < len(self.code) and i < end_finally_offset:
                jmp = self.next_except_jump(i)
                if jmp is None:  # check
                    i = self.next_stmt[i]
                    continue
                if self.code[jmp] == self.opc.RETURN_VALUE:
                    self.structs.append({
                        'type': 'except',
                        'start': i,
                        'end': jmp + 1
                    })
                    i = jmp + 1
                else:
                    target = self.get_target(jmp)
                    if target != start_else:
                        end_else = self.get_target(jmp)
                    if self.code[jmp] == self.opc.JUMP_FORWARD:
                        if self.version <= 2.6:
                            self.fixed_jumps[jmp] = target
                        else:
                            self.fixed_jumps[jmp] = -1
                    self.structs.append({
                        'type': 'except',
                        'start': i,
                        'end': jmp
                    })
                    i = jmp + 3

            # Add the try-else block
            if end_else != start_else:
                r_end_else = self.restrict_to_parent(end_else, parent)
                # May be able to drop the 2.7 test.
                if self.version == 2.7:
                    self.structs.append({
                        'type': 'try-else',
                        'start': i + 1,
                        'end': r_end_else
                    })
                    self.fixed_jumps[i] = r_end_else
            else:
                self.fixed_jumps[i] = i + 1

        elif op in self.pop_jump_if:
            target = self.get_target(offset, op)
            rtarget = self.restrict_to_parent(target, parent)

            # Do not let jump to go out of parent struct bounds
            if target != rtarget and parent['type'] == 'and/or':
                self.fixed_jumps[offset] = rtarget
                return

            jump_if_offset = offset

            start = offset + 3
            pre = self.prev

            # Does this jump to right after another conditional jump that is
            # not myself?  If so, it's part of a larger conditional.
            # rocky: if we have a conditional jump to the next instruction, then
            # possibly I am "skipping over" a "pass" or null statement.

            if self.version < 2.7:
                op_testset = set([
                    self.opc.POP_TOP, self.opc.JUMP_IF_TRUE,
                    self.opc.JUMP_IF_FALSE
                ])
            else:
                op_testset = self.pop_jump_if_or_pop | self.pop_jump_if

            if (code[pre[target]] in op_testset and (target > offset)):
                self.fixed_jumps[offset] = pre[target]
                self.structs.append({
                    'type': 'and/or',
                    'start': start,
                    'end': pre[target]
                })
                return

            # The op offset just before the target jump offset is important
            # in making a determination of what we have. Save that.
            pre_rtarget = pre[rtarget]

            # Is it an "and" inside an "if" or "while" block
            if op == self.opc.PJIF:

                # Search for other POP_JUMP_IF_FALSE targetting the same op,
                # in current statement, starting from current offset, and filter
                # everything inside inner 'or' jumps and midline ifs
                match = self.rem_or(start, self.next_stmt[offset],
                                    self.opc.PJIF, target)

                # If we still have any offsets in set, start working on it
                if match:
                    if code[pre_rtarget] in self.jump_forward \
                            and pre_rtarget not in self.stmts \
                            and self.restrict_to_parent(self.get_target(pre_rtarget), parent) == rtarget:
                        if code[pre[pre_rtarget]] == self.opc.JUMP_ABSOLUTE \
                                and self.remove_mid_line_ifs([offset]) \
                                and target == self.get_target(pre[pre_rtarget]) \
                                and (pre[pre_rtarget] not in self.stmts or self.get_target(pre[pre_rtarget]) > pre[pre_rtarget])\
                                and 1 == len(self.remove_mid_line_ifs(self.rem_or(start, pre[pre_rtarget], self.pop_jump_if, target))):
                            pass
                        elif code[pre[pre_rtarget]] == self.opc.RETURN_VALUE \
                                and self.remove_mid_line_ifs([offset]) \
                                and 1 == (len(set(self.remove_mid_line_ifs(self.rem_or(start,
                                                                                       pre[pre_rtarget],
                                                                                       self.pop_jump_if, target)))
                                              | set(self.remove_mid_line_ifs(self.rem_or(start, pre[pre_rtarget],
                                                            (self.opc.PJIF, self.opc.PJIT, self.opc.JUMP_ABSOLUTE), pre_rtarget, True))))):
                            pass
                        else:
                            fix = None
                            jump_ifs = self.all_instr(start,
                                                      self.next_stmt[offset],
                                                      self.opc.PJIF)
                            last_jump_good = True
                            for j in jump_ifs:
                                if target == self.get_target(j):
                                    if self.lines[
                                            j].next == j + 3 and last_jump_good:
                                        fix = j
                                        break
                                else:
                                    last_jump_good = False
                            self.fixed_jumps[offset] = fix or match[-1]
                            return
                    else:
                        if (self.version < 2.7 and parent['type']
                                in ('root', 'for-loop', 'if-then', 'else',
                                    'try')):
                            self.fixed_jumps[offset] = rtarget
                        else:
                            # note test for < 2.7 might be superflous although informative
                            # for 2.7 a different branch is taken and the below code is handled
                            # under: elif op in self.pop_jump_if_or_pop
                            # below
                            self.fixed_jumps[offset] = match[-1]
                        return
            else:  # op != self.opc.PJIT
                if self.version < 2.7 and code[offset + 3] == self.opc.POP_TOP:
                    assert_offset = offset + 4
                else:
                    assert_offset = offset + 3
                if (assert_offset) in self.load_asserts:
                    if code[pre_rtarget] == self.opc.RAISE_VARARGS:
                        return
                    self.load_asserts.remove(assert_offset)

                next = self.next_stmt[offset]
                if pre[next] == offset:
                    pass
                elif code[
                        next] in self.jump_forward and target == self.get_target(
                            next):
                    if code[pre[next]] == self.opc.PJIF:
                        if code[next] == self.opc.JUMP_FORWARD or target != rtarget or code[
                                pre[pre_rtarget]] not in (
                                    self.opc.JUMP_ABSOLUTE,
                                    self.opc.RETURN_VALUE):
                            self.fixed_jumps[offset] = pre[next]
                            return
                elif code[next] == self.opc.JUMP_ABSOLUTE and code[
                        target] in self.jump_forward:
                    next_target = self.get_target(next)
                    if self.get_target(target) == next_target:
                        self.fixed_jumps[offset] = pre[next]
                        return
                    elif code[
                            next_target] in self.jump_forward and self.get_target(
                                next_target) == self.get_target(target):
                        self.fixed_jumps[offset] = pre[next]
                        return

            # don't add a struct for a while test, it's already taken care of
            if offset in self.ignore_if:
                return

            if self.version == 2.7:
                if code[pre_rtarget] == self.opc.JUMP_ABSOLUTE and pre_rtarget in self.stmts \
                        and pre_rtarget != offset and pre[pre_rtarget] != offset:
                    if code[rtarget] == self.opc.JUMP_ABSOLUTE and code[
                            rtarget + 3] == self.opc.POP_BLOCK:
                        if code[pre[pre_rtarget]] != self.opc.JUMP_ABSOLUTE:
                            pass
                        elif self.get_target(pre[pre_rtarget]) != target:
                            pass
                        else:
                            rtarget = pre_rtarget
                    else:
                        rtarget = pre_rtarget

            # Does the "jump if" jump beyond a jump op?
            # That is, we have something like:
            #  POP_JUMP_IF_FALSE HERE
            #  ...
            # JUMP_FORWARD
            # HERE:
            #
            # If so, this can be block inside an "if" statement
            # or a conditional assignment like:
            #   x = 1 if x else 2
            #
            # There are other contexts we may need to consider
            # like whether the target is "END_FINALLY"
            # or if the condition jump is to a forward location
            code_pre_rtarget = code[pre_rtarget]

            if code_pre_rtarget in self.jump_forward:
                if_end = self.get_target(pre_rtarget)

                # Is this a loop and not an "if" statment?
                if (if_end < pre_rtarget) and (pre[if_end]
                                               in self.setup_loop_targets):

                    if (if_end > start):
                        return
                    else:
                        # We still have the case in 2.7 that the next instruction
                        # is a jump to a SETUP_LOOP target.
                        next_offset = target + op_size(self.code[target],
                                                       self.opc)
                        next_op = self.code[next_offset]
                        if self.op_name(next_op) == 'JUMP_FORWARD':
                            jump_target = self.get_target(next_offset, next_op)
                            if jump_target in self.setup_loops:
                                self.structs.append({
                                    'type': 'while-loop',
                                    'start': jump_if_offset,
                                    'end': jump_target
                                })
                                self.fixed_jumps[jump_if_offset] = jump_target
                                return

                end = self.restrict_to_parent(if_end, parent)

                if_then_maybe = None

                if 2.2 <= self.version <= 2.6:
                    # Take the JUMP_IF target. In an "if/then", it will be
                    # a POP_TOP instruction and the instruction before it
                    # will be a JUMP_FORWARD to just after the POP_TOP.
                    # For example:
                    # Good:
                    # 3  JUMP_IF_FALSE        33  'to 39'
                    # ..
                    # 36  JUMP_FORWARD          1  'to 40'
                    # 39  POP_TOP
                    # 40 ...
                    # example:

                    # BAD (is an "and"):
                    # 28  JUMP_IF_FALSE         4  'to 35'
                    # ...
                    # 32  JUMP_ABSOLUTE        40  'to 40' # should be 36 or there should
                    #                                      # be a COME_FROM at the pop top
                    #                                      # before 40 to 35
                    # 35  POP_TOP
                    # 36 ...
                    # 39  POP_TOP
                    # 39_0  COME_FROM 3
                    # 40 ...

                    if self.opname_for_offset(jump_if_offset).startswith(
                            'JUMP_IF'):
                        jump_if_target = code[jump_if_offset + 1]
                        if self.opname_for_offset(jump_if_target +
                                                  jump_if_offset +
                                                  3) == 'POP_TOP':
                            jump_inst = jump_if_target + jump_if_offset
                            jump_offset = code[jump_inst + 1]
                            jump_op = self.opname_for_offset(jump_inst)
                            if (jump_op == 'JUMP_FORWARD'
                                    and jump_offset == 1):
                                self.structs.append({
                                    'type': 'if-then',
                                    'start': start - 3,
                                    'end': pre_rtarget
                                })

                                self.thens[start] = end
                            elif jump_op == 'JUMP_ABSOLUTE':
                                if_then_maybe = {
                                    'type': 'if-then',
                                    'start': start - 3,
                                    'end': pre_rtarget
                                }

                elif self.version == 2.7:
                    self.structs.append({
                        'type': 'if-then',
                        'start': start - 3,
                        'end': pre_rtarget
                    })

                # FIXME: this is yet another case were we need dominators.
                if pre_rtarget not in self.linestartoffsets or self.version < 2.7:
                    self.not_continue.add(pre_rtarget)

                if rtarget < end:
                    # We have an "else" block  of some kind.
                    # Is it associated with "if_then_maybe" seen above?
                    # These will be linked in this funny way:

                    # 198  JUMP_IF_FALSE        18  'to 219'
                    # 201  POP_TOP
                    # ...
                    # 216  JUMP_ABSOLUTE       256  'to 256'
                    # 219  POP_TOP
                    # ...
                    # 252  JUMP_FORWARD          1  'to 256'
                    # 255  POP_TOP
                    # 256
                    if if_then_maybe and jump_op == 'JUMP_ABSOLUTE':
                        jump_target = self.get_target(jump_inst,
                                                      code[jump_inst])
                        if self.opname_for_offset(end) == 'JUMP_FORWARD':
                            end_target = self.get_target(end, code[end])
                            if jump_target == end_target:
                                self.structs.append(if_then_maybe)
                                self.thens[start] = end

                    self.structs.append({
                        'type': 'else',
                        'start': rtarget,
                        'end': end
                    })
            elif code_pre_rtarget == self.opc.RETURN_VALUE:
                if self.version == 2.7 or pre_rtarget not in self.ignore_if:
                    self.structs.append({
                        'type': 'if-then',
                        'start': start,
                        'end': rtarget
                    })
                    self.thens[start] = rtarget
                    if self.version == 2.7 or code[pre_rtarget +
                                                   1] != self.opc.JUMP_FORWARD:
                        self.fixed_jumps[offset] = rtarget
                        self.return_end_ifs.add(pre_rtarget)

        elif op in self.pop_jump_if_or_pop:
            target = self.get_target(offset, op)
            self.fixed_jumps[offset] = self.restrict_to_parent(target, parent)
    def build_statement_indices(self):
        code = self.code
        start = 0
        end = len(code)

        stmt_opcode_seqs = frozenset([(self.opc.PJIF, self.opc.JUMP_FORWARD),
                                      (self.opc.PJIF, self.opc.JUMP_ABSOLUTE),
                                      (self.opc.PJIT, self.opc.JUMP_FORWARD),
                                      (self.opc.PJIT, self.opc.JUMP_ABSOLUTE)])

        prelim = self.all_instr(start, end, self.stmt_opcodes)

        stmts = self.stmts = set(prelim)
        pass_stmts = set()
        for seq in stmt_opcode_seqs:
            for i in self.op_range(start, end - (len(seq) + 1)):
                match = True
                for elem in seq:
                    if elem != code[i]:
                        match = False
                        break
                    i += op_size(code[i], self.opc)

                if match:
                    i = self.prev[i]
                    stmts.add(i)
                    pass_stmts.add(i)

        if pass_stmts:
            stmt_list = list(stmts)
            stmt_list.sort()
        else:
            stmt_list = prelim
        last_stmt = -1
        self.next_stmt = []
        slist = self.next_stmt = []
        i = 0
        for s in stmt_list:
            if code[s] == self.opc.JUMP_ABSOLUTE and s not in pass_stmts:
                target = self.get_target(s)
                if target > s or self.lines[last_stmt].l_no == self.lines[
                        s].l_no:
                    stmts.remove(s)
                    continue
                j = self.prev[s]
                while code[j] == self.opc.JUMP_ABSOLUTE:
                    j = self.prev[j]
                if (self.version >= 2.3 and self.opname_for_offset(j)
                        == 'LIST_APPEND'):  # list comprehension
                    stmts.remove(s)
                    continue
            elif code[s] == self.opc.POP_TOP:
                # The POP_TOP in:
                #   ROT_TWO, POP_TOP,
                #   RETURN_xxx, POP_TOP (in 2.6-), or
                #   JUMP_IF_{FALSE,TRUE}, POP_TOP  (in 2.6-)
                # is part of the previous instruction and not the
                # beginning of a new statement
                prev = code[self.prev[s]]
                if (prev == self.opc.ROT_TWO or self.version < 2.7 and prev
                        in (self.opc.JUMP_IF_FALSE, self.opc.JUMP_IF_TRUE,
                            self.opc.RETURN_VALUE)):
                    stmts.remove(s)
                    continue
            elif code[s] in self.designator_ops:
                j = self.prev[s]
                while code[j] in self.designator_ops:
                    j = self.prev[j]
                if self.version > 2.1 and code[j] == self.opc.FOR_ITER:
                    stmts.remove(s)
                    continue
            last_stmt = s
            slist += [s] * (s - i)
            i = s
        slist += [end] * (end - len(slist))
Exemple #6
0
    def build_statement_indices(self):
        code = self.code
        start = 0
        end = codelen = len(code)

        # Compose preliminary list of indices with statements,
        # using plain statement opcodes
        prelim = self.all_instr(start, end, self.statement_opcodes)

        # Initialize final container with statements with
        # preliminary data
        stmts = self.stmts = set(prelim)

        # Same for opcode sequences
        pass_stmts = set()
        for sequence in self.statement_opcode_sequences:
            for i in self.op_range(start, end-(len(sequence)+1)):
                match = True
                for elem in sequence:
                    if elem != code[i]:
                        match = False
                        break
                    i += op_size(code[i], self.opc)

                if match is True:
                    i = self.prev_op[i]
                    stmts.add(i)
                    pass_stmts.add(i)

        # Initialize statement list with the full data we've gathered so far
        if pass_stmts:
            stmt_offset_list = list(stmts)
            stmt_offset_list.sort()
        else:
            stmt_offset_list = prelim
        # 'List-map' which contains offset of start of
        # next statement, when op offset is passed as index
        self.next_stmt = slist = []
        last_stmt_offset = -1
        i = 0
        # Go through all statement offsets
        for stmt_offset in stmt_offset_list:
            # Process absolute jumps, but do not remove 'pass' statements
            # from the set
            if (code[stmt_offset] == self.opc.JUMP_ABSOLUTE
                and stmt_offset not in pass_stmts):
                # If absolute jump occurs in forward direction or it takes off from the
                # same line as previous statement, this is not a statement
                target = self.get_target(stmt_offset)
                if target > stmt_offset or self.lines[last_stmt_offset].l_no == self.lines[stmt_offset].l_no:
                    stmts.remove(stmt_offset)
                    continue
                # Rewing ops till we encounter non-JUMP_ABSOLUTE one
                j = self.prev_op[stmt_offset]
                while code[j] == self.opc.JUMP_ABSOLUTE:
                    j = self.prev_op[j]
                # If we got here, then it's list comprehension which
                # is not a statement too
                if code[j] == self.opc.LIST_APPEND:
                    stmts.remove(stmt_offset)
                    continue
            # Exclude ROT_TWO + POP_TOP
            elif (code[stmt_offset] == self.opc.POP_TOP
                  and code[self.prev_op[stmt_offset]] == self.opc.ROT_TWO):
                stmts.remove(stmt_offset)
                continue
            # Exclude FOR_ITER + designators
            elif code[stmt_offset] in self.designator_ops:
                j = self.prev_op[stmt_offset]
                while code[j] in self.designator_ops:
                    j = self.prev_op[j]
                if code[j] == self.opc.FOR_ITER:
                    stmts.remove(stmt_offset)
                    continue
            # Add to list another list with offset of current statement,
            # equal to length of previous statement
            slist += [stmt_offset] * (stmt_offset-i)
            last_stmt_offset = stmt_offset
            i = stmt_offset
        # Finish filling the list for last statement
        slist += [codelen] * (codelen-len(slist))