def findlabels(code, opc): """Detect all offsets in a byte code which are jump targets. Return the list of offsets. """ labels = [] # enumerate() is not an option, since we sometimes process # multiple elements on a single pass through the loop try: n = len(code) except: code = code.co_code n = len(code) offset = 0 while offset < n: op = code2num(code, offset) offset += 1 if op >= opc.HAVE_ARGUMENT: arg = code2num(code, offset) + code2num(code, offset+1)*256 offset += 2 label = -1 if op in opc.JREL_OPS: label = offset + arg elif op in opc.JABS_OPS: label = arg if label >= 0: if label not in labels: labels.append(label) return labels
def unpack_opargs_bytecode(code, opc): extended_arg = 0 try: n = len(code) except TypeError: code = code.co_code n = len(code) offset = 0 while offset < n: prev_offset = offset op = code2num(code, offset) offset += 1 if op_has_argument(op, opc): arg = code2num(code, offset) | extended_arg extended_arg = extended_arg_val(opc, arg) if op == opc.EXTENDED_ARG else 0 offset += 2 else: arg = None yield (prev_offset, op, arg)
def get_instructions_bytes(bytecode, opc, varnames=None, names=None, constants=None, cells=None, linestarts=None, line_offset=0): """Iterate over the instructions in a bytecode string. Generates a sequence of Instruction namedtuples giving the details of each opcode. Additional information about the code's runtime environment (e.g. variable names, constants) can be specified using optional arguments. """ labels = opc.findlabels(bytecode, opc) # label_maps = get_jump_target_maps(bytecode, opc) extended_arg = 0 # FIXME: We really need to distinguish 3.6.0a1 from 3.6.a3. # See below FIXME python_36 = True if opc.python_version >= 3.6 else False starts_line = None # enumerate() is not an option, since we sometimes process # multiple elements on a single pass through the loop n = len(bytecode) i = 0 extended_arg_count = 0 extended_arg = 0 extended_arg_size = op_size(opc.EXTENDED_ARG, opc) while i < n: op = code2num(bytecode, i) offset = i if linestarts is not None: starts_line = linestarts.get(i, None) if starts_line is not None: starts_line += line_offset if i in labels: # come_from = label_maps[i] if False: # come_from[0] > i: is_jump_target = 'loop' # print("XXX %s at %d" % (opc.opname[op], i)) # from trepan.api import debug; debug() else: is_jump_target = True else: is_jump_target = False i += 1 arg = None argval = None argrepr = '' has_arg = op_has_argument(op, opc) optype = None if has_arg: if python_36: arg = code2num(bytecode, i) | extended_arg extended_arg = (arg << 8) if op == opc.EXTENDED_ARG else 0 # FIXME: Python 3.6.0a1 is 2, for 3.6.a3 we have 1 i += 1 else: arg = code2num( bytecode, i) + code2num(bytecode, i + 1) * 256 + extended_arg i += 2 extended_arg = arg * 65536 if op == opc.EXTENDED_ARG else 0 # Set argval to the dereferenced value of the argument when # availabe, and argrepr to the string representation of argval. # disassemble_bytes needs the string repr of the # raw name index for LOAD_GLOBAL, LOAD_CONST, etc. argval = arg if op in opc.CONST_OPS: argval, argrepr = _get_const_info(arg, constants) optype = 'const' elif op in opc.NAME_OPS: argval, argrepr = _get_name_info(arg, names) optype = 'name' elif op in opc.JREL_OPS: argval = i + arg argrepr = "to " + repr(argval) optype = 'jrel' elif op in opc.JABS_OPS: argval = arg argrepr = "to " + repr(argval) optype = 'jabs' elif op in opc.LOCAL_OPS: argval, argrepr = _get_name_info(arg, varnames) optype = 'local' elif op in opc.COMPARE_OPS: argval = opc.cmp_op[arg] argrepr = argval optype = 'compare' elif op in opc.FREE_OPS: argval, argrepr = _get_name_info(arg, cells) optype = 'free' elif op in opc.NARGS_OPS: optype = 'nargs' if not python_36: argrepr = ( "%d positional, %d keyword pair" % (code2num(bytecode, i - 2), code2num(bytecode, i - 1))) # This has to come after hasnargs. Some are in both? elif op in opc.VARGS_OPS: optype = 'vargs' if hasattr( opc, 'opcode_arg_fmt') and opc.opname[op] in opc.opcode_arg_fmt: argrepr = opc.opcode_arg_fmt[opc.opname[op]](arg) elif python_36: i += 1 opname = opc.opname[op] inst_size = op_size(op, opc) + (extended_arg_count * extended_arg_size) yield Instruction(opname, op, optype, inst_size, arg, argval, argrepr, has_arg, offset, starts_line, is_jump_target, extended_arg_count != 0) extended_arg_count = extended_arg_count + 1 if op == opc.EXTENDED_ARG else 0
def get_instructions_bytes(bytecode, opc, varnames=None, names=None, constants=None, cells=None, linestarts=None, line_offset=0): """Iterate over the instructions in a bytecode string. Generates a sequence of Instruction namedtuples giving the details of each opcode. Additional information about the code's runtime environment (e.g. variable names, constants) can be specified using optional arguments. """ labels = opc.findlabels(bytecode, opc) # label_maps = get_jump_target_maps(bytecode, opc) extended_arg = 0 # FIXME: We really need to distinguish 3.6.0a1 from 3.6.a3. # See below FIXME python_36 = True if opc.python_version >= 3.6 else False starts_line = None # enumerate() is not an option, since we sometimes process # multiple elements on a single pass through the loop n = len(bytecode) i = 0 extended_arg_count = 0 extended_arg = 0 extended_arg_size = op_size(opc.EXTENDED_ARG, opc) while i < n: op = code2num(bytecode, i) offset = i if linestarts is not None: starts_line = linestarts.get(i, None) if starts_line is not None: starts_line += line_offset if i in labels: # come_from = label_maps[i] if False: # come_from[0] > i: is_jump_target = 'loop' # print("XXX %s at %d" % (opc.opname[op], i)) # from trepan.api import debug; debug() else: is_jump_target = True else: is_jump_target = False i += 1 arg = None argval = None argrepr = '' has_arg = op_has_argument(op, opc) optype = None if has_arg: if python_36: arg = code2num(bytecode, i) | extended_arg extended_arg = (arg << 8) if op == opc.EXTENDED_ARG else 0 # FIXME: Python 3.6.0a1 is 2, for 3.6.a3 we have 1 i += 1 else: arg = code2num(bytecode, i) + code2num(bytecode, i+1)*256 + extended_arg i += 2 extended_arg = arg*65536 if op == opc.EXTENDED_ARG else 0 # Set argval to the dereferenced value of the argument when # availabe, and argrepr to the string representation of argval. # disassemble_bytes needs the string repr of the # raw name index for LOAD_GLOBAL, LOAD_CONST, etc. argval = arg if op in opc.CONST_OPS: argval, argrepr = _get_const_info(arg, constants) optype = 'const' elif op in opc.NAME_OPS: argval, argrepr = _get_name_info(arg, names) optype = 'name' elif op in opc.JREL_OPS: argval = i + arg argrepr = "to " + repr(argval) optype = 'jrel' elif op in opc.JABS_OPS: argval = arg argrepr = "to " + repr(argval) optype = 'jabs' elif op in opc.LOCAL_OPS: argval, argrepr = _get_name_info(arg, varnames) optype = 'local' elif op in opc.COMPARE_OPS: argval = opc.cmp_op[arg] argrepr = argval optype = 'compare' elif op in opc.FREE_OPS: argval, argrepr = _get_name_info(arg, cells) optype = 'free' elif op in opc.NARGS_OPS: optype = 'nargs' if not python_36: argrepr = ("%d positional, %d keyword pair" % (code2num(bytecode, i-2), code2num(bytecode, i-1))) # This has to come after hasnargs. Some are in both? elif op in opc.VARGS_OPS: optype = 'vargs' if hasattr(opc, 'opcode_arg_fmt') and opc.opname[op] in opc.opcode_arg_fmt: argrepr = opc.opcode_arg_fmt[opc.opname[op]](arg) elif python_36: i += 1 opname = opc.opname[op] inst_size = op_size(op, opc) + (extended_arg_count * extended_arg_size) yield Instruction(opname, op, optype, inst_size, arg, argval, argrepr, has_arg, offset, starts_line, is_jump_target, extended_arg_count != 0) extended_arg_count = extended_arg_count + 1 if op == opc.EXTENDED_ARG else 0