def op_range(self, start, end): """ Iterate through positions of opcodes, skipping arguments. """ while start < end: yield start start += op_size(self.code[start], self.opc)
def build_prev_op(self): """ Compose 'list-map' which allows to jump to previous op, given offset of current op as index. """ code = self.code codelen = len(code) # 2.x uses prev 3.x uses prev_op. Sigh # Until we get this sorted out. self.prev = self.prev_op = [0] for offset in self.op_range(0, codelen): op = code[offset] for _ in range(op_size(op, self.opc)): self.prev_op.append(offset)
def detect_control_flow(self, offset, targets): """ Detect structures and their boundaries to fix optimized jumps Python 3.0 is more like Python 2.6 than it is Python 3.x. So we have a special routine here. """ code = self.code op = code[offset] # Detect parent structure parent = self.structs[0] start = parent['start'] end = parent['end'] # Pick inner-most parent for our offset for struct in self.structs: current_start = struct['start'] current_end = struct['end'] if ((current_start <= offset < current_end) and (current_start >= start and current_end <= end)): start = current_start end = current_end parent = struct if op == self.opc.SETUP_LOOP: # We categorize loop types: 'for', 'while', 'while 1' with # possibly suffixes '-loop' and '-else' # Try to find the jump_back instruction of the loop. # It could be a return instruction. start = offset + 3 target = self.get_target(offset) end = self.restrict_to_parent(target, parent) self.setup_loop_targets[offset] = target self.setup_loops[target] = offset if target != end: self.fixed_jumps[offset] = end (line_no, next_line_byte) = self.lines[offset] jump_back = self.last_instr(start, end, self.opc.JUMP_ABSOLUTE, next_line_byte, False) if jump_back: jump_forward_offset = jump_back + 3 else: jump_forward_offset = None return_val_offset1 = self.prev[self.prev[end]] if (jump_back and jump_back != self.prev_op[end] and self.is_jump_forward(jump_forward_offset)): if (code[self.prev_op[end]] == self.opc.RETURN_VALUE or (code[self.prev_op[end]] == self.opc.POP_BLOCK and code[return_val_offset1] == self.opc.RETURN_VALUE)): jump_back = None if not jump_back: # loop suite ends in return jump_back = self.last_instr(start, end, self.opc.RETURN_VALUE) if not jump_back: return jump_back += 2 if_offset = None if code[self.prev_op[next_line_byte]] not in JUMP_TF: if_offset = self.prev[next_line_byte] if if_offset: loop_type = 'while' self.ignore_if.add(if_offset) else: loop_type = 'for' target = next_line_byte end = jump_back + 3 else: if self.get_target(jump_back) >= next_line_byte: jump_back = self.last_instr(start, end, self.opc.JUMP_ABSOLUTE, start, False) if end > jump_back + 4 and self.is_jump_forward(end): if self.is_jump_forward(jump_back + 4): if self.get_target(jump_back + 4) == self.get_target(end): self.fixed_jumps[offset] = jump_back + 4 end = jump_back + 4 elif target < offset: self.fixed_jumps[offset] = jump_back + 4 end = jump_back + 4 target = self.get_target(jump_back) if code[target] in (self.opc.FOR_ITER, self.opc.GET_ITER): loop_type = 'for' else: loop_type = 'while' test = self.prev_op[next_line_byte] if test == offset: loop_type = 'while 1' elif self.code[test] in opc.hasjabs + opc.hasjrel: self.ignore_if.add(test) test_target = self.get_target(test) if test_target > (jump_back + 3): jump_back = test_target self.not_continue.add(jump_back) self.loops.append(target) self.structs.append({ 'type': loop_type + '-loop', 'start': target, 'end': jump_back }) if jump_back + 3 != end: self.structs.append({ 'type': loop_type + '-else', 'start': jump_back + 3, 'end': end }) elif op in JUMP_TF: start = offset + op_size(op, self.opc) target = self.get_target(offset) rtarget = self.restrict_to_parent(target, parent) prev_op = self.prev_op # Do not let jump to go out of parent struct bounds if target != rtarget and parent['type'] == 'and/or': self.fixed_jumps[offset] = rtarget return # Does this jump to right after another conditional jump that is # not myself? If so, it's part of a larger conditional. # rocky: if we have a conditional jump to the next instruction, then # possibly I am "skipping over" a "pass" or null statement. if ((code[prev_op[target]] in self.pop_jump_if_pop) and (target > offset) and prev_op[target] != offset): self.fixed_jumps[offset] = prev_op[target] self.structs.append({ 'type': 'and/or', 'start': start, 'end': prev_op[target] }) return # The op offset just before the target jump offset is important # in making a determination of what we have. Save that. pre_rtarget = prev_op[rtarget] # Is it an "and" inside an "if" or "while" block if op == opc.JUMP_IF_FALSE: # Search for another JUMP_IF_FALSE targetting the same op, # in current statement, starting from current offset, and filter # everything inside inner 'or' jumps and midline ifs match = self.rem_or(start, self.next_stmt[offset], opc.JUMP_IF_FALSE, target) # If we still have any offsets in set, start working on it if match: is_jump_forward = self.is_jump_forward(pre_rtarget) if (is_jump_forward and pre_rtarget not in self.stmts and self.restrict_to_parent( self.get_target(pre_rtarget), parent) == rtarget): if (code[prev_op[pre_rtarget]] == self.opc.JUMP_ABSOLUTE and self.remove_mid_line_ifs([offset]) and target == self.get_target( prev_op[pre_rtarget]) and (prev_op[pre_rtarget] not in self.stmts or self.get_target(prev_op[pre_rtarget]) > prev_op[pre_rtarget]) and 1 == len( self.remove_mid_line_ifs( self.rem_or(start, prev_op[pre_rtarget], JUMP_TF, target)))): pass elif (code[prev_op[pre_rtarget]] == self.opc.RETURN_VALUE and self.remove_mid_line_ifs([offset]) and 1 == (len( set( self.remove_mid_line_ifs( self.rem_or(start, prev_op[pre_rtarget], JUMP_TF, target))) | set( self.remove_mid_line_ifs( self.rem_or( start, prev_op[pre_rtarget], (opc.JUMP_IF_FALSE, opc.JUMP_IF_TRUE, opc.JUMP_ABSOLUTE ), pre_rtarget, True)))))): pass else: fix = None jump_ifs = self.all_instr(start, self.next_stmt[offset], opc.JUMP_IF_FALSE) last_jump_good = True for j in jump_ifs: if target == self.get_target(j): if self.lines[ j].next == j + 3 and last_jump_good: fix = j break else: last_jump_good = False self.fixed_jumps[offset] = fix or match[-1] return else: self.fixed_jumps[offset] = match[-1] return # op == JUMP_IF_TRUE else: next = self.next_stmt[offset] if prev_op[next] == offset: pass elif self.is_jump_forward(next) and target == self.get_target( next): if code[prev_op[next]] == opc.JUMP_IF_FALSE: if (code[next] == self.opc.JUMP_FORWARD or target != rtarget or code[prev_op[pre_rtarget]] not in (self.opc.JUMP_ABSOLUTE, self.opc.RETURN_VALUE)): self.fixed_jumps[offset] = prev_op[next] return elif (code[next] == self.opc.JUMP_ABSOLUTE and self.is_jump_forward(target) and self.get_target(target) == self.get_target(next)): self.fixed_jumps[offset] = prev_op[next] return # Don't add a struct for a while test, it's already taken care of if offset in self.ignore_if: return if (code[pre_rtarget] == self.opc.JUMP_ABSOLUTE and pre_rtarget in self.stmts and pre_rtarget != offset and prev_op[pre_rtarget] != offset and not (code[rtarget] == self.opc.JUMP_ABSOLUTE and code[rtarget + 3] == self.opc.POP_BLOCK and code[prev_op[pre_rtarget]] != self.opc.JUMP_ABSOLUTE)): rtarget = pre_rtarget # Does the "jump if" jump beyond a jump op? # That is, we have something like: # JUMP_IF_FALSE HERE # ... # JUMP_FORWARD # HERE: # # If so, this can be block inside an "if" statement # or a conditional assignment like: # x = 1 if x else 2 # # There are other contexts we may need to consider # like whether the target is "END_FINALLY" # or if the condition jump is to a forward location if self.is_jump_forward(pre_rtarget): if_end = self.get_target(pre_rtarget) # If the jump target is back, we are looping if (if_end < pre_rtarget and (code[prev_op[if_end]] == self.opc.SETUP_LOOP)): if (if_end > start): return end = self.restrict_to_parent(if_end, parent) self.structs.append({ 'type': 'if-then', 'start': start, 'end': pre_rtarget }) self.not_continue.add(pre_rtarget) # if rtarget < end and ( # code[rtarget] not in (self.opc.END_FINALLY, # self.opc.JUMP_ABSOLUTE) and # code[prev_op[pre_rtarget]] not in (self.opc.POP_EXCEPT, # self.opc.END_FINALLY)): # self.structs.append({'type': 'else', # 'start': rtarget, # 'end': end}) # self.else_start[rtarget] = end elif self.is_jump_back(pre_rtarget): if_end = rtarget self.structs.append({ 'type': 'if-then', 'start': start, 'end': pre_rtarget }) self.not_continue.add(pre_rtarget) elif code[pre_rtarget] in (self.opc.RETURN_VALUE, self.opc.BREAK_LOOP): self.structs.append({ 'type': 'if-then', 'start': start, 'end': rtarget }) # It is important to distingish if this return is inside some sort # except block return jump_prev = prev_op[offset] if self.is_pypy and code[jump_prev] == self.opc.COMPARE_OP: if self.opc.cmp_op[code[jump_prev + 1]] == 'exception match': return if self.version >= 3.5: # Python 3.5 may remove as dead code a JUMP # instruction after a RETURN_VALUE. So we check # based on seeing SETUP_EXCEPT various places. if code[rtarget] == self.opc.SETUP_EXCEPT: return # Check that next instruction after pops and jump is # not from SETUP_EXCEPT next_op = rtarget if code[next_op] == self.opc.POP_BLOCK: next_op += op_size(self.code[next_op], self.opc) if code[next_op] == self.opc.JUMP_ABSOLUTE: next_op += op_size(self.code[next_op], self.opc) if next_op in targets: for try_op in targets[next_op]: come_from_op = code[try_op] if come_from_op == self.opc.SETUP_EXCEPT: return pass pass if code[pre_rtarget] == self.opc.RETURN_VALUE: self.return_end_ifs.add(pre_rtarget) else: self.fixed_jumps[offset] = rtarget self.not_continue.add(pre_rtarget) elif op == self.opc.SETUP_EXCEPT: target = self.get_target(offset) end = self.restrict_to_parent(target, parent) self.fixed_jumps[offset] = end elif op == self.opc.SETUP_FINALLY: target = self.get_target(offset) end = self.restrict_to_parent(target, parent) self.fixed_jumps[offset] = end elif op in self.jump_if_pop: target = self.get_target(offset) if target > offset: unop_target = self.last_instr(offset, target, self.opc.JUMP_FORWARD, target) if unop_target and code[unop_target + 3] != self.opc.ROT_TWO: self.fixed_jumps[offset] = unop_target else: self.fixed_jumps[offset] = self.restrict_to_parent( target, parent) pass pass elif self.version >= 3.5: # 3.5+ has Jump optimization which too often causes RETURN_VALUE to get # misclassified as RETURN_END_IF. Handle that here. # In RETURN_VALUE, JUMP_ABSOLUTE, RETURN_VALUE is never RETURN_END_IF if op == self.opc.RETURN_VALUE: if (offset + 1 < len(code) and code[offset + 1] == self.opc.JUMP_ABSOLUTE and offset in self.return_end_ifs): self.return_end_ifs.remove(offset) pass pass elif op == self.opc.JUMP_FORWARD: # If we have: # JUMP_FORWARD x, [non-jump, insns], RETURN_VALUE, x: # then RETURN_VALUE is not RETURN_END_IF rtarget = self.get_target(offset) rtarget_prev = self.prev[rtarget] if (code[rtarget_prev] == self.opc.RETURN_VALUE and rtarget_prev in self.return_end_ifs): i = rtarget_prev while i != offset: if code[i] in [opc.JUMP_FORWARD, opc.JUMP_ABSOLUTE]: return i = self.prev[i] self.return_end_ifs.remove(rtarget_prev) pass return
def detect_control_flow(self, offset, op): """ Detect type of block structures and their boundaries to fix optimized jumps in python2.3+ """ code = self.code # Detect parent structure parent = self.structs[0] start = parent['start'] end = parent['end'] # Pick inner-most parent for our offset for struct in self.structs: current_start = struct['start'] current_end = struct['end'] if ((current_start <= offset < current_end) and (current_start >= start and current_end <= end)): start = current_start end = current_end parent = struct if op == self.opc.SETUP_LOOP: # We categorize loop types: 'for', 'while', 'while 1' with # possibly suffixes '-loop' and '-else' # Try to find the jump_back instruction of the loop. # It could be a return instruction. start = offset + 3 target = self.get_target(offset, op) end = self.restrict_to_parent(target, parent) self.setup_loop_targets[offset] = target self.setup_loops[target] = offset if target != end: self.fixed_jumps[offset] = end (line_no, next_line_byte) = self.lines[offset] jump_back = self.last_instr(start, end, self.opc.JUMP_ABSOLUTE, next_line_byte, False) if jump_back: # Account for the fact that < 2.7 has an explicit # POP_TOP instruction in the equivalate POP_JUMP_IF # construct if self.version < 2.7: jump_forward_offset = jump_back + 4 return_val_offset1 = self.prev[self.prev[self.prev[end]]] # Is jump back really "back"? jump_target = self.get_target(jump_back, code[jump_back]) if (jump_target > jump_back or code[jump_back + 3] in [ self.opc.JUMP_FORWARD, self.opc.JUMP_ABSOLUTE ]): jump_back = None pass else: jump_forward_offset = jump_back + 3 return_val_offset1 = self.prev[self.prev[end]] if (jump_back and jump_back != self.prev[end] and code[jump_forward_offset] in self.jump_forward): if (code[self.prev[end]] == self.opc.RETURN_VALUE or (code[self.prev[end]] == self.opc.POP_BLOCK and code[return_val_offset1] == self.opc.RETURN_VALUE)): jump_back = None if not jump_back: # loop suite ends in return # scanner26 of wbiti had: # jump_back = self.last_instr(start, end, self.opc.JUMP_ABSOLUTE, start, False) jump_back = self.last_instr(start, end, self.opc.RETURN_VALUE) if not jump_back: return jump_back += 1 if_offset = None if self.version < 2.7: # Look for JUMP_IF POP_TOP ... if (code[self.prev[next_line_byte]] == self.opc.POP_TOP and (code[self.prev[self.prev[next_line_byte]]] in self.pop_jump_if)): if_offset = self.prev[self.prev[next_line_byte]] elif code[self.prev[next_line_byte]] in self.pop_jump_if: # Look for POP_JUMP_IF ... if_offset = self.prev[next_line_byte] if if_offset: loop_type = 'while' self.ignore_if.add(if_offset) if self.version < 2.7 and (code[self.prev[jump_back]] == self.opc.RETURN_VALUE): self.ignore_if.add(self.prev[jump_back]) pass pass else: loop_type = 'for' target = next_line_byte end = jump_back + 3 else: if self.get_target(jump_back) >= next_line_byte: jump_back = self.last_instr(start, end, self.opc.JUMP_ABSOLUTE, start, False) if end > jump_back + 4 and code[end] in self.jump_forward: if code[jump_back + 4] in self.jump_forward: if self.get_target(jump_back + 4) == self.get_target(end): self.fixed_jumps[offset] = jump_back + 4 end = jump_back + 4 elif target < offset: self.fixed_jumps[offset] = jump_back + 4 end = jump_back + 4 target = self.get_target(jump_back, self.opc.JUMP_ABSOLUTE) if (self.version > 2.1 and code[target] in (self.opc.FOR_ITER, self.opc.GET_ITER)): loop_type = 'for' else: loop_type = 'while' if (self.version < 2.7 and self.code[self.prev[next_line_byte]] == self.opc.POP_TOP): test = self.prev[self.prev[next_line_byte]] else: test = self.prev[next_line_byte] if test == offset: loop_type = 'while 1' elif self.code[test] in self.opc.JUMP_OPs: self.ignore_if.add(test) test_target = self.get_target(test) if test_target > (jump_back + 3): jump_back = test_target self.not_continue.add(jump_back) self.loops.append(target) self.structs.append({ 'type': loop_type + '-loop', 'start': target, 'end': jump_back }) if jump_back + 3 != end: self.structs.append({ 'type': loop_type + '-else', 'start': jump_back + 3, 'end': end }) elif op == self.opc.SETUP_EXCEPT: start = offset + op_size(op, self.opc) target = self.get_target(offset, op) end = self.restrict_to_parent(target, parent) if target != end: self.fixed_jumps[offset] = end # print target, end, parent # Add the try block self.structs.append({ 'type': 'try', 'start': start - 3, 'end': end - 4 }) # Now isolate the except and else blocks end_else = start_else = self.get_target(self.prev[end]) end_finally_offset = end setup_except_nest = 0 while end_finally_offset < len(self.code): if self.code[end_finally_offset] == self.opc.END_FINALLY: if setup_except_nest == 0: break else: setup_except_nest -= 1 elif self.code[end_finally_offset] == self.opc.SETUP_EXCEPT: setup_except_nest += 1 end_finally_offset += op_size(code[end_finally_offset], self.opc) pass # Add the except blocks i = end while i < len(self.code) and i < end_finally_offset: jmp = self.next_except_jump(i) if jmp is None: # check i = self.next_stmt[i] continue if self.code[jmp] == self.opc.RETURN_VALUE: self.structs.append({ 'type': 'except', 'start': i, 'end': jmp + 1 }) i = jmp + 1 else: target = self.get_target(jmp) if target != start_else: end_else = self.get_target(jmp) if self.code[jmp] == self.opc.JUMP_FORWARD: if self.version <= 2.6: self.fixed_jumps[jmp] = target else: self.fixed_jumps[jmp] = -1 self.structs.append({ 'type': 'except', 'start': i, 'end': jmp }) i = jmp + 3 # Add the try-else block if end_else != start_else: r_end_else = self.restrict_to_parent(end_else, parent) # May be able to drop the 2.7 test. if self.version == 2.7: self.structs.append({ 'type': 'try-else', 'start': i + 1, 'end': r_end_else }) self.fixed_jumps[i] = r_end_else else: self.fixed_jumps[i] = i + 1 elif op in self.pop_jump_if: target = self.get_target(offset, op) rtarget = self.restrict_to_parent(target, parent) # Do not let jump to go out of parent struct bounds if target != rtarget and parent['type'] == 'and/or': self.fixed_jumps[offset] = rtarget return jump_if_offset = offset start = offset + 3 pre = self.prev # Does this jump to right after another conditional jump that is # not myself? If so, it's part of a larger conditional. # rocky: if we have a conditional jump to the next instruction, then # possibly I am "skipping over" a "pass" or null statement. if self.version < 2.7: op_testset = set([ self.opc.POP_TOP, self.opc.JUMP_IF_TRUE, self.opc.JUMP_IF_FALSE ]) else: op_testset = self.pop_jump_if_or_pop | self.pop_jump_if if (code[pre[target]] in op_testset and (target > offset)): self.fixed_jumps[offset] = pre[target] self.structs.append({ 'type': 'and/or', 'start': start, 'end': pre[target] }) return # The op offset just before the target jump offset is important # in making a determination of what we have. Save that. pre_rtarget = pre[rtarget] # Is it an "and" inside an "if" or "while" block if op == self.opc.PJIF: # Search for other POP_JUMP_IF_FALSE targetting the same op, # in current statement, starting from current offset, and filter # everything inside inner 'or' jumps and midline ifs match = self.rem_or(start, self.next_stmt[offset], self.opc.PJIF, target) # If we still have any offsets in set, start working on it if match: if code[pre_rtarget] in self.jump_forward \ and pre_rtarget not in self.stmts \ and self.restrict_to_parent(self.get_target(pre_rtarget), parent) == rtarget: if code[pre[pre_rtarget]] == self.opc.JUMP_ABSOLUTE \ and self.remove_mid_line_ifs([offset]) \ and target == self.get_target(pre[pre_rtarget]) \ and (pre[pre_rtarget] not in self.stmts or self.get_target(pre[pre_rtarget]) > pre[pre_rtarget])\ and 1 == len(self.remove_mid_line_ifs(self.rem_or(start, pre[pre_rtarget], self.pop_jump_if, target))): pass elif code[pre[pre_rtarget]] == self.opc.RETURN_VALUE \ and self.remove_mid_line_ifs([offset]) \ and 1 == (len(set(self.remove_mid_line_ifs(self.rem_or(start, pre[pre_rtarget], self.pop_jump_if, target))) | set(self.remove_mid_line_ifs(self.rem_or(start, pre[pre_rtarget], (self.opc.PJIF, self.opc.PJIT, self.opc.JUMP_ABSOLUTE), pre_rtarget, True))))): pass else: fix = None jump_ifs = self.all_instr(start, self.next_stmt[offset], self.opc.PJIF) last_jump_good = True for j in jump_ifs: if target == self.get_target(j): if self.lines[ j].next == j + 3 and last_jump_good: fix = j break else: last_jump_good = False self.fixed_jumps[offset] = fix or match[-1] return else: if (self.version < 2.7 and parent['type'] in ('root', 'for-loop', 'if-then', 'else', 'try')): self.fixed_jumps[offset] = rtarget else: # note test for < 2.7 might be superflous although informative # for 2.7 a different branch is taken and the below code is handled # under: elif op in self.pop_jump_if_or_pop # below self.fixed_jumps[offset] = match[-1] return else: # op != self.opc.PJIT if self.version < 2.7 and code[offset + 3] == self.opc.POP_TOP: assert_offset = offset + 4 else: assert_offset = offset + 3 if (assert_offset) in self.load_asserts: if code[pre_rtarget] == self.opc.RAISE_VARARGS: return self.load_asserts.remove(assert_offset) next = self.next_stmt[offset] if pre[next] == offset: pass elif code[ next] in self.jump_forward and target == self.get_target( next): if code[pre[next]] == self.opc.PJIF: if code[next] == self.opc.JUMP_FORWARD or target != rtarget or code[ pre[pre_rtarget]] not in ( self.opc.JUMP_ABSOLUTE, self.opc.RETURN_VALUE): self.fixed_jumps[offset] = pre[next] return elif code[next] == self.opc.JUMP_ABSOLUTE and code[ target] in self.jump_forward: next_target = self.get_target(next) if self.get_target(target) == next_target: self.fixed_jumps[offset] = pre[next] return elif code[ next_target] in self.jump_forward and self.get_target( next_target) == self.get_target(target): self.fixed_jumps[offset] = pre[next] return # don't add a struct for a while test, it's already taken care of if offset in self.ignore_if: return if self.version == 2.7: if code[pre_rtarget] == self.opc.JUMP_ABSOLUTE and pre_rtarget in self.stmts \ and pre_rtarget != offset and pre[pre_rtarget] != offset: if code[rtarget] == self.opc.JUMP_ABSOLUTE and code[ rtarget + 3] == self.opc.POP_BLOCK: if code[pre[pre_rtarget]] != self.opc.JUMP_ABSOLUTE: pass elif self.get_target(pre[pre_rtarget]) != target: pass else: rtarget = pre_rtarget else: rtarget = pre_rtarget # Does the "jump if" jump beyond a jump op? # That is, we have something like: # POP_JUMP_IF_FALSE HERE # ... # JUMP_FORWARD # HERE: # # If so, this can be block inside an "if" statement # or a conditional assignment like: # x = 1 if x else 2 # # There are other contexts we may need to consider # like whether the target is "END_FINALLY" # or if the condition jump is to a forward location code_pre_rtarget = code[pre_rtarget] if code_pre_rtarget in self.jump_forward: if_end = self.get_target(pre_rtarget) # Is this a loop and not an "if" statment? if (if_end < pre_rtarget) and (pre[if_end] in self.setup_loop_targets): if (if_end > start): return else: # We still have the case in 2.7 that the next instruction # is a jump to a SETUP_LOOP target. next_offset = target + op_size(self.code[target], self.opc) next_op = self.code[next_offset] if self.op_name(next_op) == 'JUMP_FORWARD': jump_target = self.get_target(next_offset, next_op) if jump_target in self.setup_loops: self.structs.append({ 'type': 'while-loop', 'start': jump_if_offset, 'end': jump_target }) self.fixed_jumps[jump_if_offset] = jump_target return end = self.restrict_to_parent(if_end, parent) if_then_maybe = None if 2.2 <= self.version <= 2.6: # Take the JUMP_IF target. In an "if/then", it will be # a POP_TOP instruction and the instruction before it # will be a JUMP_FORWARD to just after the POP_TOP. # For example: # Good: # 3 JUMP_IF_FALSE 33 'to 39' # .. # 36 JUMP_FORWARD 1 'to 40' # 39 POP_TOP # 40 ... # example: # BAD (is an "and"): # 28 JUMP_IF_FALSE 4 'to 35' # ... # 32 JUMP_ABSOLUTE 40 'to 40' # should be 36 or there should # # be a COME_FROM at the pop top # # before 40 to 35 # 35 POP_TOP # 36 ... # 39 POP_TOP # 39_0 COME_FROM 3 # 40 ... if self.opname_for_offset(jump_if_offset).startswith( 'JUMP_IF'): jump_if_target = code[jump_if_offset + 1] if self.opname_for_offset(jump_if_target + jump_if_offset + 3) == 'POP_TOP': jump_inst = jump_if_target + jump_if_offset jump_offset = code[jump_inst + 1] jump_op = self.opname_for_offset(jump_inst) if (jump_op == 'JUMP_FORWARD' and jump_offset == 1): self.structs.append({ 'type': 'if-then', 'start': start - 3, 'end': pre_rtarget }) self.thens[start] = end elif jump_op == 'JUMP_ABSOLUTE': if_then_maybe = { 'type': 'if-then', 'start': start - 3, 'end': pre_rtarget } elif self.version == 2.7: self.structs.append({ 'type': 'if-then', 'start': start - 3, 'end': pre_rtarget }) # FIXME: this is yet another case were we need dominators. if pre_rtarget not in self.linestartoffsets or self.version < 2.7: self.not_continue.add(pre_rtarget) if rtarget < end: # We have an "else" block of some kind. # Is it associated with "if_then_maybe" seen above? # These will be linked in this funny way: # 198 JUMP_IF_FALSE 18 'to 219' # 201 POP_TOP # ... # 216 JUMP_ABSOLUTE 256 'to 256' # 219 POP_TOP # ... # 252 JUMP_FORWARD 1 'to 256' # 255 POP_TOP # 256 if if_then_maybe and jump_op == 'JUMP_ABSOLUTE': jump_target = self.get_target(jump_inst, code[jump_inst]) if self.opname_for_offset(end) == 'JUMP_FORWARD': end_target = self.get_target(end, code[end]) if jump_target == end_target: self.structs.append(if_then_maybe) self.thens[start] = end self.structs.append({ 'type': 'else', 'start': rtarget, 'end': end }) elif code_pre_rtarget == self.opc.RETURN_VALUE: if self.version == 2.7 or pre_rtarget not in self.ignore_if: self.structs.append({ 'type': 'if-then', 'start': start, 'end': rtarget }) self.thens[start] = rtarget if self.version == 2.7 or code[pre_rtarget + 1] != self.opc.JUMP_FORWARD: self.fixed_jumps[offset] = rtarget self.return_end_ifs.add(pre_rtarget) elif op in self.pop_jump_if_or_pop: target = self.get_target(offset, op) self.fixed_jumps[offset] = self.restrict_to_parent(target, parent)
def build_statement_indices(self): code = self.code start = 0 end = len(code) stmt_opcode_seqs = frozenset([(self.opc.PJIF, self.opc.JUMP_FORWARD), (self.opc.PJIF, self.opc.JUMP_ABSOLUTE), (self.opc.PJIT, self.opc.JUMP_FORWARD), (self.opc.PJIT, self.opc.JUMP_ABSOLUTE)]) prelim = self.all_instr(start, end, self.stmt_opcodes) stmts = self.stmts = set(prelim) pass_stmts = set() for seq in stmt_opcode_seqs: for i in self.op_range(start, end - (len(seq) + 1)): match = True for elem in seq: if elem != code[i]: match = False break i += op_size(code[i], self.opc) if match: i = self.prev[i] stmts.add(i) pass_stmts.add(i) if pass_stmts: stmt_list = list(stmts) stmt_list.sort() else: stmt_list = prelim last_stmt = -1 self.next_stmt = [] slist = self.next_stmt = [] i = 0 for s in stmt_list: if code[s] == self.opc.JUMP_ABSOLUTE and s not in pass_stmts: target = self.get_target(s) if target > s or self.lines[last_stmt].l_no == self.lines[ s].l_no: stmts.remove(s) continue j = self.prev[s] while code[j] == self.opc.JUMP_ABSOLUTE: j = self.prev[j] if (self.version >= 2.3 and self.opname_for_offset(j) == 'LIST_APPEND'): # list comprehension stmts.remove(s) continue elif code[s] == self.opc.POP_TOP: # The POP_TOP in: # ROT_TWO, POP_TOP, # RETURN_xxx, POP_TOP (in 2.6-), or # JUMP_IF_{FALSE,TRUE}, POP_TOP (in 2.6-) # is part of the previous instruction and not the # beginning of a new statement prev = code[self.prev[s]] if (prev == self.opc.ROT_TWO or self.version < 2.7 and prev in (self.opc.JUMP_IF_FALSE, self.opc.JUMP_IF_TRUE, self.opc.RETURN_VALUE)): stmts.remove(s) continue elif code[s] in self.designator_ops: j = self.prev[s] while code[j] in self.designator_ops: j = self.prev[j] if self.version > 2.1 and code[j] == self.opc.FOR_ITER: stmts.remove(s) continue last_stmt = s slist += [s] * (s - i) i = s slist += [end] * (end - len(slist))
def build_statement_indices(self): code = self.code start = 0 end = codelen = len(code) # Compose preliminary list of indices with statements, # using plain statement opcodes prelim = self.all_instr(start, end, self.statement_opcodes) # Initialize final container with statements with # preliminary data stmts = self.stmts = set(prelim) # Same for opcode sequences pass_stmts = set() for sequence in self.statement_opcode_sequences: for i in self.op_range(start, end-(len(sequence)+1)): match = True for elem in sequence: if elem != code[i]: match = False break i += op_size(code[i], self.opc) if match is True: i = self.prev_op[i] stmts.add(i) pass_stmts.add(i) # Initialize statement list with the full data we've gathered so far if pass_stmts: stmt_offset_list = list(stmts) stmt_offset_list.sort() else: stmt_offset_list = prelim # 'List-map' which contains offset of start of # next statement, when op offset is passed as index self.next_stmt = slist = [] last_stmt_offset = -1 i = 0 # Go through all statement offsets for stmt_offset in stmt_offset_list: # Process absolute jumps, but do not remove 'pass' statements # from the set if (code[stmt_offset] == self.opc.JUMP_ABSOLUTE and stmt_offset not in pass_stmts): # If absolute jump occurs in forward direction or it takes off from the # same line as previous statement, this is not a statement target = self.get_target(stmt_offset) if target > stmt_offset or self.lines[last_stmt_offset].l_no == self.lines[stmt_offset].l_no: stmts.remove(stmt_offset) continue # Rewing ops till we encounter non-JUMP_ABSOLUTE one j = self.prev_op[stmt_offset] while code[j] == self.opc.JUMP_ABSOLUTE: j = self.prev_op[j] # If we got here, then it's list comprehension which # is not a statement too if code[j] == self.opc.LIST_APPEND: stmts.remove(stmt_offset) continue # Exclude ROT_TWO + POP_TOP elif (code[stmt_offset] == self.opc.POP_TOP and code[self.prev_op[stmt_offset]] == self.opc.ROT_TWO): stmts.remove(stmt_offset) continue # Exclude FOR_ITER + designators elif code[stmt_offset] in self.designator_ops: j = self.prev_op[stmt_offset] while code[j] in self.designator_ops: j = self.prev_op[j] if code[j] == self.opc.FOR_ITER: stmts.remove(stmt_offset) continue # Add to list another list with offset of current statement, # equal to length of previous statement slist += [stmt_offset] * (stmt_offset-i) last_stmt_offset = stmt_offset i = stmt_offset # Finish filling the list for last statement slist += [codelen] * (codelen-len(slist))