def create_code(asm, label, backpatch): # print('label: ', asm.label) # print('backpatch: ', asm.backpatch_inst) bcode = [] # print(asm.code.instructions) offset = 0 extended_value = 0 offset2label = {label[j]: j for j in label} for i, inst in enumerate(asm.code.instructions): bcode.append(inst.opcode) if offset in offset2label: if is_int(offset2label[offset]): inst.line_no = int(offset2label[offset]) asm.code.co_lnotab[offset] = inst.line_no inst.offset = offset offset += xdis.op_size(inst.opcode, asm.opc) if xdis.op_has_argument(inst.opcode, asm.opc): if inst in backpatch: target = inst.arg try: if inst.opcode in asm.opc.JREL_OPS: inst.arg = label[target] - offset else: inst.arg = label[target] pass pass except KeyError: err("Label %s not found" % target, inst, i) elif is_int(inst.arg): if inst.opcode == asm.opc.EXTENDED_ARG: extended_value += inst.arg if asm.opc.version >= 3.6: extended_value <<= 8 else: extended_value <<= 16 pass elif inst.arg.startswith('(') and inst.arg.endswith(')'): operand = inst.arg[1:-1] if inst.opcode in asm.opc.COMPARE_OPS: if operand in cmp_op: inst.arg = cmp_op.index(operand) else: err("Can't handle compare operand %s" % inst.arg, inst, i) pass elif inst.opcode in asm.opc.CONST_OPS: operand = ast.literal_eval(operand) update_code_field('co_consts', operand, inst, asm.code) elif inst.opcode in asm.opc.LOCAL_OPS: update_code_field('co_varnames', operand, inst, asm.code) elif inst.opcode in asm.opc.NAME_OPS: update_code_field('co_names', operand, inst, asm.code) elif inst.opcode in asm.opc.FREE_OPS: if operand in asm.code.co_cellvars: inst.arg = asm.code.co_cellvars.index(operand) else: update_code_field('co_freevars', operand, inst, asm.code) else: # from trepan.api import debug; debug() err("Can't handle operand %s" % inst.arg, inst, i) else: # from trepan.api import debug; debug() err( "Don't understand operand %s expecting int or (..)" % inst.arg, inst, i) if asm.opc.version < 3.6: if inst.opcode == asm.opc.EXTENDED_ARG: arg_tup = xdis.util.num2code(inst.arg) else: arg_tup = xdis.util.num2code(inst.arg - extended_value) extended_value = 0 bcode += arg_tup # 3.6 else: if inst.opcode == asm.opc.EXTENDED_ARG: bcode.append(inst.arg) else: bcode.append(inst.arg - extended_value) extended_value = 0 elif asm.opc.version >= 3.6: bcode.append(0) if asm.opc.version >= 3.0: co_code = bytearray() for j in bcode: co_code.append(j) asm.code.co_code = bytes(co_code) else: asm.code.co_code = ''.join([chr(j) for j in bcode]) # Stamp might be added here code = asm.code.freeze() # asm.print_instructions() # print (*args) # co = self.Code(*args) return code
def asm_file(path): offset = 0 methods = {} method_name = None asm = None backpatch_inst = set([]) label = {} lines = open(path).readlines() i = 0 bytecode_seen = False while i < len(lines): line = lines[i] i += 1 if line.startswith('#'): if line.startswith('# Python bytecode '): python_version = line[len('# Python bytecode '):].strip( ).split()[0] asm = Assembler(python_version) asm.code_init() bytecode_seen = True elif line.startswith('# Timestamp in code: '): text = line[len('# Timestamp in code: '):].strip() time_str = text.split()[0] if is_int(time_str): asm.timestamp = int(time_str) elif line.startswith('# Method Name: '): if method_name: co = create_code(asm, label, backpatch_inst) asm.update_lists(co, label, backpatch_inst) label = {} backpatch_inst = set([]) methods[method_name] = co offset = 0 asm.code_init() asm.code.co_name = line[len('# Method Name: '):].strip() method_name = asm.code.co_name elif line.startswith('# Filename: '): asm.code.co_filename = line[len('# Filename: '):].strip() elif line.startswith('# First Line: '): s = line[len('# First Line: '):].strip() first_lineno = int(s) asm.code.co_firstlineno = first_lineno elif line.startswith('# Argument count: '): argc = line[len('# Argument count: '):].strip().split()[0] asm.code.co_argcount = ast.literal_eval(argc) elif line.startswith('# Number of locals: '): l_str = line[len('# Number of locals: '):].strip() asm.code.co_nlocals = int(l_str) elif line.startswith("# Source code size mod 2**32: "): l_str = line[len("# Source code size mod 2**32: " ):-len(' bytes')].strip() asm.size = int(l_str) elif line.startswith('# Stack size: '): l_str = line[len('# Stack size: '):].strip() asm.code.co_stacksize = int(l_str) elif line.startswith('# Flags: '): flags = line[len('# Flags: '):].strip().split()[0] asm.code.co_flags = ast.literal_eval(flags) elif line.startswith('# Constants:'): count = 0 while i < len(lines): line = lines[i] i += 1 match = re.match('^#\s+(\d+): (.+)$', line) if match: index = int(match.group(1)) assert index == count expr = match.group(2) match = re.match( '<code[2,3]? object (\S+) at (0x[0-f]+)', expr) if match: name = match.group(1) m2 = re.match("^<(.+)>$", name) if m2: name = "%s_%s" % (m2.group(1), match.group(2)) if name in methods: asm.code.co_consts.append(methods[name]) else: print( "line %d (%s, %s): can't find method %s" % (i, asm.code.co_filename, method_name, name)) asm.code.co_consts.append("**bogus %s**" % name) else: asm.code.co_consts.append(ast.literal_eval(expr)) count += 1 else: i -= 1 break pass pass elif line.startswith('# Cell variables:'): i = update_code_tuple_field('co_cellvars', asm.code, lines, i) elif line.startswith('# Free variables:'): i = update_code_tuple_field('co_freevars', asm.code, lines, i) elif line.startswith('# Names:'): i = update_code_tuple_field('co_names', asm.code, lines, i) elif line.startswith('# Varnames:'): line = lines[i] asm.code.co_varnames = line[1:].strip().split(', ') i += 1 else: if not line.strip(): continue match = re.match('^([^\s]+):$', line) if match: label[match.group(1)] = offset continue match = re.match('^\s*([\d]+):\s*$', line) if match: line_no = int(match.group(1)) asm.code.co_lnotab[offset] = line_no continue # Opcode section assert bytecode_seen, 'File needs to start out with: # Python bytecode <version>' fields = line.strip().split() line_no = None l = len(fields) if l > 1: if fields[0] == '>>': fields = fields[1:] l -= 1 if is_lineno(fields[0]) and is_int(fields[1]): line_no = int(fields[0][:-1]) opname, operand = get_opname_operand(fields[2:]) elif is_lineno(fields[0]): line_no = int(fields[0][:-1]) fields = fields[1:] if fields[0] == '>>': fields = fields[1:] if is_int(fields[0]): fields = fields[1:] opname, operand = get_opname_operand(fields) elif is_int(fields[0]): opname, operand = get_opname_operand(fields[1:]) else: opname, operand = get_opname_operand(fields) else: opname, _ = get_opname_operand(fields) if opname in asm.opc.opname: inst = Instruction() inst.opname = opname.replace('+', '_') inst.opcode = asm.opc.opmap[inst.opname] if xdis.op_has_argument(inst.opcode, asm.opc): inst.arg = operand else: inst.arg = None inst.line_no = line_no asm.code.instructions.append(inst) if inst.opcode in asm.opc.JUMP_OPS: if not is_int(operand): backpatch_inst.add(inst) offset += xdis.op_size(inst.opcode, asm.opc) else: raise RuntimeError("Illegal opname %s in:\n%s" % (opname, line)) pass pass # print(asm.code.co_lnotab) if asm: co = create_code(asm, label, backpatch_inst) asm.update_lists(co, label, backpatch_inst) asm.code_list.reverse() asm.status = 'finished' return asm
def asm_file(path): offset = 0 methods = {} method_name = None asm = None backpatch_inst = set([]) label = {} lines = open(path).readlines() i = 0 bytecode_seen = False while i < len(lines): line = lines[i] i += 1 if line.startswith("#"): match = re.match("^# (Pypy )?Python bytecode ", line) if match: if match.group(1): is_pypy = len(pypy_str) pypy_str = match.group(1) else: is_pypy = False pypy_str = "" python_version = (line[len("# Python bytecode " + pypy_str):].strip().split()[0]) asm = Assembler(python_version, is_pypy) if python_version >= 3.8: TypeError( "Creating Python version %s not supported yet. Feel free to fix and put in a PR.\n" % python_version) asm.code_init(python_version) bytecode_seen = True elif line.startswith("# Timestamp in code: "): text = line[len("# Timestamp in code: "):].strip() time_str = text.split()[0] if is_int(time_str): asm.timestamp = int(time_str) elif line.startswith("# Method Name: "): if method_name: co = create_code(asm, label, backpatch_inst) asm.update_lists(co, label, backpatch_inst) label = {} backpatch_inst = set([]) methods[method_name] = co offset = 0 asm.code_init(python_version) asm.code.co_name = line[len("# Method Name: "):].strip() method_name = asm.code.co_name elif line.startswith("# SipHash: "): siphash = line[len("# ShipHash: "):].strip().split()[0] asm.siphash = ast.literal_eval(siphash) if asm.siphash != 0: raise TypeError( "SIP hashes not supported yet. Feel free to fix and in a PR.\n" ) elif line.startswith("# Filename: "): asm.code.co_filename = line[len("# Filename: "):].strip() elif line.startswith("# First Line: "): s = line[len("# First Line: "):].strip() first_lineno = int(s) asm.code.co_firstlineno = first_lineno elif line.startswith("# Argument count: "): argc = line[len("# Argument count: "):].strip().split()[0] elif line.startswith("# Position-only argument count: "): argc = (line[len("# Position-only argument count: "):].strip(). split()[0]) asm.code.co_posonlyargcount = ast.literal_eval(argc) elif line.startswith("# Keyword-only argument count: "): argc = line[len("# Keyword-only argument count: "):].strip( ).split()[0] asm.code.co_kwonlyargcount = ast.literal_eval(argc) elif line.startswith("# Number of locals: "): l_str = line[len("# Number of locals: "):].strip() asm.code.co_nlocals = int(l_str) elif line.startswith("# Source code size mod 2**32: "): l_str = line[len("# Source code size mod 2**32: " ):-len(" bytes")].strip() asm.size = int(l_str) elif line.startswith("# Stack size: "): l_str = line[len("# Stack size: "):].strip() asm.code.co_stacksize = int(l_str) elif line.startswith("# Flags: "): flags = line[len("# Flags: "):].strip().split()[0] asm.code.co_flags = ast.literal_eval(flags) elif line.startswith("# Constants:"): count = 0 while i < len(lines): line = lines[i] i += 1 match = re.match("^#\s+(\d+): (.+)$", line) if match: index = int(match.group(1)) assert index == count, ( "Constant index {%d} found on line {%d}" "doesn't match expected constant index {%d}." % (index, i, count)) expr = match.group(2) match = re.match( "<(?:Code\d+ )?code object (\S+) at (0x[0-f]+)", expr) if match: name = match.group(1) m2 = re.match("^<(.+)>$", name) if m2: name = "%s_%s" % (m2.group(1), match.group(2)) if name in methods: asm.code.co_consts.append(methods[name]) else: print( "line %d (%s, %s): can't find method %s" % (i, asm.code.co_filename, method_name, name)) asm.code.co_consts.append("**bogus %s**" % name) else: asm.code.co_consts.append(ast.literal_eval(expr)) count += 1 else: i -= 1 break pass pass elif line.startswith("# Cell variables:"): i = update_code_tuple_field("co_cellvars", asm.code, lines, i) elif line.startswith("# Free variables:"): i = update_code_tuple_field("co_freevars", asm.code, lines, i) elif line.startswith("# Names:"): i = update_code_tuple_field("co_names", asm.code, lines, i) elif line.startswith("# Varnames:"): line = lines[i] asm.code.co_varnames = line[1:].strip().split(", ") i += 1 elif line.startswith("# Positional arguments:"): line = lines[i] args = line[1:].strip().split(", ") asm.code.co_argcount = len(args) i += 1 else: if not line.strip(): continue match = re.match("^([^\s]+):$", line) if match: label[match.group(1)] = offset continue match = re.match("^\s*([\d]+):\s*$", line) if match: line_no = int(match.group(1)) asm.code.co_lnotab[offset] = line_no continue # Opcode section assert ( bytecode_seen ), "File needs to start out with: # Python bytecode <version>" fields = line.strip().split() line_no = None l = len(fields) if l > 1: if fields[0] == ">>": fields = fields[1:] l -= 1 if is_lineno(fields[0]) and is_int(fields[1]): line_no = int(fields[0][:-1]) opname, operand = get_opname_operand(fields[2:]) elif is_lineno(fields[0]): line_no = int(fields[0][:-1]) fields = fields[1:] if fields[0] == ">>": fields = fields[1:] if is_int(fields[0]): fields = fields[1:] opname, operand = get_opname_operand(fields) elif is_int(fields[0]): opname, operand = get_opname_operand(fields[1:]) else: opname, operand = get_opname_operand(fields) else: opname, _ = get_opname_operand(fields) if opname in asm.opc.opname: inst = Instruction() inst.opname = opname.replace("+", "_") inst.opcode = asm.opc.opmap[inst.opname] if xdis.op_has_argument(inst.opcode, asm.opc): inst.arg = operand else: inst.arg = None inst.line_no = line_no asm.code.instructions.append(inst) if inst.opcode in asm.opc.JUMP_OPS: if not is_int(operand): backpatch_inst.add(inst) offset += xdis.op_size(inst.opcode, asm.opc) else: raise RuntimeError("Illegal opname %s in:\n%s" % (opname, line)) pass pass # print(asm.code.co_lnotab) if asm: co = create_code(asm, label, backpatch_inst) asm.update_lists(co, label, backpatch_inst) asm.code_list.reverse() asm.status = "finished" return asm
def parse_byte_and_args(self, byte_code, replay=False): """ Parse 1 - 3 bytes of bytecode into an instruction and optionally arguments. Argument replay is used to handle breakpoints. """ f = self.frame f_code = f.f_code co_code = f_code.co_code extended_arg = 0 # Note: There is never more than one argument. # The list size is used to indicate whether an argument # exists or not. # FIMXE: remove and use int_arg as a indicator of whether # the argument exists. arguments = [] int_arg = None while True: if f.fallthrough: if not replay: f.f_lasti = next_offset(byte_code, self.opc, f.f_lasti) else: # Jump instructions must set this False. f.fallthrough = True offset = f.f_lasti line_number = self.frame.linestarts.get(offset, None) if line_number is not None: f.f_lineno = line_number if not replay: byte_code = byteint(co_code[offset]) byte_name = self.opc.opname[byte_code] arg_offset = offset + 1 arg = None if op_has_argument(byte_code, self.opc): if self.version >= 3.6: int_arg = code2num(co_code, arg_offset) | extended_arg # Note: Python 3.6.0a1 is 2, for 3.6.a3 and beyond we have 1 arg_offset += 1 if byte_code == self.opc.EXTENDED_ARG: extended_arg = int_arg << 8 continue else: extended_arg = 0 else: int_arg = (code2num(co_code, arg_offset) + code2num(co_code, arg_offset + 1) * 256 + extended_arg) arg_offset += 2 if byte_code == self.opc.EXTENDED_ARG: extended_arg = int_arg * 65536 continue else: extended_arg = 0 if byte_code in self.opc.CONST_OPS: arg = f_code.co_consts[int_arg] elif byte_code in self.opc.FREE_OPS: if int_arg < len(f_code.co_cellvars): arg = f_code.co_cellvars[int_arg] else: var_idx = int_arg - len(f.f_code.co_cellvars) arg = f_code.co_freevars[var_idx] elif byte_code in self.opc.NAME_OPS: arg = f_code.co_names[int_arg] elif byte_code in self.opc.JREL_OPS: # Many relative jumps are conditional, # so setting f.fallthrough is wrong. arg = arg_offset + int_arg elif byte_code in self.opc.JABS_OPS: # We probably could set fallthough, since many (all?) # of these are unconditional, but we'll make the jump do # the work of setting. arg = int_arg elif byte_code in self.opc.LOCAL_OPS: arg = f_code.co_varnames[int_arg] else: arg = int_arg arguments = [arg] break return byte_name, byte_code, int_arg, arguments, offset, line_number
def create_code(asm, label, backpatch): # print('label: ', asm.label) # print('backpatch: ', asm.backpatch_inst) bcode = [] # print(asm.code.instructions) offset = 0 extended_value = 0 offset2label = {label[j]: j for j in label} for i, inst in enumerate(asm.code.instructions): bcode.append(inst.opcode) if offset in offset2label: if is_int(offset2label[offset]): inst.line_no = int(offset2label[offset]) asm.code.co_lnotab[offset] = inst.line_no inst.offset = offset offset += xdis.op_size(inst.opcode, asm.opc) if xdis.op_has_argument(inst.opcode, asm.opc): if inst in backpatch: target = inst.arg match = re.match(r"^(L\d+)(?: \(to \d+\))?$", target) if match: target = match.group(1) try: if inst.opcode in asm.opc.JREL_OPS: inst.arg = label[target] - offset else: inst.arg = label[target] pass pass except KeyError: err(f"Label {target} not found.\nI know about {backpatch}", inst, i) elif is_int(inst.arg): if inst.opcode == asm.opc.EXTENDED_ARG: extended_value += inst.arg if asm.opc.version >= 3.6: extended_value <<= 8 else: extended_value <<= 16 pass elif inst.arg.startswith("(") and inst.arg.endswith(")"): operand = inst.arg[1:-1] if inst.opcode in asm.opc.COMPARE_OPS: if operand in cmp_op: inst.arg = cmp_op.index(operand) else: err("Can't handle compare operand %s" % inst.arg, inst, i) pass elif inst.opcode in asm.opc.CONST_OPS: if not (operand.startswith("<Code") or operand.startswith("<code")): operand = ast.literal_eval(operand) update_code_field("co_consts", operand, inst, asm.code) elif inst.opcode in asm.opc.LOCAL_OPS: update_code_field("co_varnames", operand, inst, asm.code) elif inst.opcode in asm.opc.NAME_OPS: update_code_field("co_names", operand, inst, asm.code) elif inst.opcode in asm.opc.FREE_OPS: if operand in asm.code.co_cellvars: inst.arg = asm.code.co_cellvars.index(operand) else: update_code_field("co_freevars", operand, inst, asm.code) else: # from trepan.api import debug; debug() err("Can't handle operand %s" % inst.arg, inst, i) else: # from trepan.api import debug; debug() err( "Don't understand operand %s expecting int or (..)" % inst.arg, inst, i, ) if asm.opc.version_tuple < (3, 6): if inst.opcode == asm.opc.EXTENDED_ARG: arg_tup = xdis.util.num2code(inst.arg) else: arg_tup = xdis.util.num2code(inst.arg - extended_value) extended_value = 0 bcode += arg_tup # 3.6 else: if inst.opcode == asm.opc.EXTENDED_ARG: bcode.append(inst.arg) else: bcode.append(inst.arg - extended_value) extended_value = 0 elif asm.opc.version_tuple >= (3, 6): bcode.append(0) if asm.opc.version_tuple >= (3, 0): co_code = bytearray() for j in bcode: co_code.append(j % 255) asm.code.co_code = bytes(co_code) else: asm.code.co_code = "".join([chr(j) for j in bcode]) # Stamp might be added here if asm.python_version[:2] == PYTHON_VERSION_TRIPLE[:2]: code = asm.code.to_native() else: code = asm.code.freeze() # asm.print_instructions() # print (*args) # co = self.Code(*args) return code