def disassemble(self, co, classname=None): """ Disassemble a code object, returning a list of 'Token'. The main part of this procedure is modelled after dis.disassemble(). """ rv = [] customize = {} Token = self.Token # shortcut self.code = array("B", co.co_code) n = len(self.code) # linestarts contains bloc code adresse (addr,block) self.linestarts = list(dis.findlinestarts(co)) self.prev = [0] # change jump struct self.restructRelativeJump() # class and names if classname: classname = "_" + classname.lstrip("_") + "__" def unmangle(name): if name.startswith(classname) and name[-2:] != "__": return name[len(classname) - 2 :] return name free = [unmangle(name) for name in (co.co_cellvars + co.co_freevars)] names = [unmangle(name) for name in co.co_names] varnames = [unmangle(name) for name in co.co_varnames] else: free = co.co_cellvars + co.co_freevars names = co.co_names varnames = co.co_varnames self.names = names # add instruction to remonde in "toDel" list toDel = [] # add instruction to change in "toChange" list self.toChange = [] for i in self.op_range(0, n): op = self.code[i] ret = self.getOpcodeToDel(i) if ret != None: toDel += ret if toDel: # degeu a revoir / repenser (tout faire d'un coup? chaud) toDel = sorted(list(set(toDel))) delta = 0 for x in toDel: if self.code[x - delta] >= dis.HAVE_ARGUMENT: self.code.pop(x - delta) self.restructCode(x - delta) self.code.pop(x - delta) self.restructCode(x - delta) self.code.pop(x - delta) self.restructCode(x - delta) delta += 3 else: self.code.pop(x - delta) self.restructCode(x - delta) delta += 1 # mapping adresses of prev instru n = len(self.code) for i in self.op_range(0, n): op = self.code[i] self.prev.append(i) if op >= HAVE_ARGUMENT: self.prev.append(i) self.prev.append(i) j = 0 linestarts = self.linestarts self.lines = [] linetuple = namedtuple("linetuple", ["l_no", "next"]) linestartoffsets = {a for (a, _) in linestarts} (prev_start_byte, prev_line_no) = linestarts[0] for (start_byte, line_no) in linestarts[1:]: while j < start_byte: self.lines.append(linetuple(prev_line_no, start_byte)) j += 1 last_op = self.code[self.prev[start_byte]] (prev_start_byte, prev_line_no) = (start_byte, line_no) while j < n: self.lines.append(linetuple(prev_line_no, n)) j += 1 # self.lines contains (block,addrLastInstr) cf = self.find_jump_targets(self.code) # contains (code, [addrRefToCode]) last_stmt = self.next_stmt[0] i = self.next_stmt[last_stmt] replace = {} while i < n - 1: if self.lines[last_stmt].next > i: if self.code[last_stmt] == PRINT_ITEM: if self.code[i] == PRINT_ITEM: replace[i] = "PRINT_ITEM_CONT" elif self.code[i] == PRINT_NEWLINE: replace[i] = "PRINT_NEWLINE_CONT" last_stmt = i i = self.next_stmt[i] imports = self.all_instr(0, n, (IMPORT_NAME, IMPORT_FROM, IMPORT_STAR)) if len(imports) > 1: last_import = imports[0] for i in imports[1:]: if self.lines[last_import].next > i: if self.code[last_import] == IMPORT_NAME == self.code[i]: replace[i] = "IMPORT_NAME_CONT" last_import = i extended_arg = 0 for offset in self.op_range(0, n): op = self.code[offset] opname = dis.opname[op] oparg = None pattr = None if offset in cf: k = 0 for j in cf[offset]: rv.append(Token("COME_FROM", None, repr(j), offset="%s_%d" % (offset, k))) k += 1 if op >= HAVE_ARGUMENT: oparg = self.get_argument(offset) + extended_arg extended_arg = 0 if op == dis.EXTENDED_ARG: raise "TODO" extended_arg = oparg * 65536L continue if op in dis.hasconst: const = co.co_consts[oparg] if type(const) == types.CodeType: oparg = const if const.co_name == "<lambda>": assert opname == "LOAD_CONST" opname = "LOAD_LAMBDA" elif const.co_name == "<genexpr>": opname = "LOAD_GENEXPR" elif const.co_name == "<dictcomp>": opname = "LOAD_DICTCOMP" elif const.co_name == "<setcomp>": opname = "LOAD_SETCOMP" # verify uses 'pattr' for comparism, since 'attr' # now holds Code(const) and thus can not be used # for comparism (todo: think about changing this) # pattr = 'code_object @ 0x%x %s->%s' %\ # (id(const), const.co_filename, const.co_name) pattr = "<code_object " + const.co_name + ">" else: pattr = const elif op in dis.hasname: pattr = names[oparg] elif op in dis.hasjrel: pattr = repr(offset + 3 + oparg) elif op in dis.hasjabs: pattr = repr(oparg) elif op in dis.haslocal: pattr = varnames[oparg] elif op in dis.hascompare: pattr = dis.cmp_op[oparg] elif op in dis.hasfree: pattr = free[oparg] if offset in self.toChange: if self.code[offset] == JA and self.code[oparg] == WITH_CLEANUP: opname = "SETUP_WITH" cf[oparg] = cf.get(oparg, []) + [offset] if op in ( BUILD_LIST, BUILD_TUPLE, BUILD_SLICE, UNPACK_SEQUENCE, MAKE_FUNCTION, CALL_FUNCTION, MAKE_CLOSURE, CALL_FUNCTION_VAR, CALL_FUNCTION_KW, CALL_FUNCTION_VAR_KW, DUP_TOPX, ): # CE - Hack for >= 2.5 # Now all values loaded via LOAD_CLOSURE are packed into # a tuple before calling MAKE_CLOSURE. if op == BUILD_TUPLE and self.code[offset - 3] == LOAD_CLOSURE: continue else: opname = "%s_%d" % (opname, oparg) if op != BUILD_SLICE: customize[opname] = oparg elif op == JA: target = self.get_target(offset) if target < offset: if ( offset in self.stmts and self.code[offset + 3] not in (END_FINALLY, POP_BLOCK) and offset not in self.not_continue ): opname = "CONTINUE" else: opname = "JUMP_BACK" elif op == LOAD_GLOBAL: try: if pattr == "AssertionError" and rv and rv[-1] == "JUMP_IF_TRUE": opname = "LOAD_ASSERT" except AttributeError: pass elif op == RETURN_VALUE: if offset in self.return_end_ifs: opname = "RETURN_END_IF" if offset not in replace: rv.append(Token(opname, oparg, pattr, offset, linestart=offset in linestartoffsets)) else: rv.append(Token(replace[offset], oparg, pattr, offset, linestart=offset in linestartoffsets)) if self.showasm: out = self.out # shortcut for t in rv: print >> out, t print >> out return rv, customize
def disassemble(self, co, classname=None): ''' Disassemble a code object, returning a list of 'Token'. The main part of this procedure is modelled after dis.disassemble(). ''' rv = [] customize = {} Token = self.Token # shortcut self.code = array('B', co.co_code) for i in self.op_range(0, len(self.code)): if self.code[i] in (RETURN_VALUE, END_FINALLY): n = i + 1 self.code = array('B', co.co_code[:n]) # linestarts contains bloc code adresse (addr,block) self.linestarts = list(dis.findlinestarts(co)) self.prev = [0] # class and names if classname: classname = '_' + classname.lstrip('_') + '__' def unmangle(name): if name.startswith(classname) and name[-2:] != '__': return name[len(classname) - 2:] return name free = [ unmangle(name) for name in (co.co_cellvars + co.co_freevars) ] names = [ unmangle(name) for name in co.co_names ] varnames = [ unmangle(name) for name in co.co_varnames ] else: free = co.co_cellvars + co.co_freevars names = co.co_names varnames = co.co_varnames self.names = names # list of instruction to remove/add or change to match with bytecode 2.7 self.toChange = [] self.restructBytecode() codelen = len(self.code) # mapping adresses of prev instru for i in self.op_range(0, codelen): op = self.code[i] self.prev.append(i) if self.op_hasArgument(op): self.prev.append(i) self.prev.append(i) j = 0 linestarts = self.linestarts self.lines = [] linetuple = namedtuple('linetuple', ['l_no', 'next']) linestartoffsets = {a for (a, _) in linestarts} (prev_start_byte, prev_line_no) = linestarts[0] for (start_byte, line_no) in linestarts[1:]: while j < start_byte: self.lines.append(linetuple(prev_line_no, start_byte)) j += 1 last_op = self.code[self.prev[start_byte]] (prev_start_byte, prev_line_no) = (start_byte, line_no) while j < codelen: self.lines.append(linetuple(prev_line_no, codelen)) j+=1 # self.lines contains (block,addrLastInstr) self.load_asserts = set() for i in self.op_range(0, codelen): if self.code[i] == PJIT and self.code[i+3] == LOAD_GLOBAL: if names[self.get_argument(i+3)] == 'AssertionError': self.load_asserts.add(i+3) cf = self.find_jump_targets(self.code) # contains (code, [addrRefToCode]) last_stmt = self.next_stmt[0] i = self.next_stmt[last_stmt] replace = {} while i < codelen-1: if self.lines[last_stmt].next > i: if self.code[last_stmt] == PRINT_ITEM: if self.code[i] == PRINT_ITEM: replace[i] = 'PRINT_ITEM_CONT' elif self.code[i] == PRINT_NEWLINE: replace[i] = 'PRINT_NEWLINE_CONT' last_stmt = i i = self.next_stmt[i] imports = self.all_instr(0, codelen, (IMPORT_NAME, IMPORT_FROM, IMPORT_STAR)) if len(imports) > 1: last_import = imports[0] for i in imports[1:]: if self.lines[last_import].next > i: if self.code[last_import] == IMPORT_NAME == self.code[i]: replace[i] = 'IMPORT_NAME_CONT' last_import = i extended_arg = 0 for offset in self.op_range(0, codelen): op = self.code[offset] op_name = opname[op] oparg = None; pattr = None if offset in cf: k = 0 for j in cf[offset]: rv.append(Token('COME_FROM', None, repr(j), offset="%s_%d" % (offset, k) )) k += 1 if self.op_hasArgument(op): oparg = self.get_argument(offset) + extended_arg extended_arg = 0 if op == EXTENDED_ARG: raise NotImplementedError extended_arg = oparg * 65536L continue if op in hasconst: const = co.co_consts[oparg] if type(const) == types.CodeType: oparg = const if const.co_name == '<lambda>': assert op_name == 'LOAD_CONST' op_name = 'LOAD_LAMBDA' elif const.co_name == '<genexpr>': op_name = 'LOAD_GENEXPR' elif const.co_name == '<dictcomp>': op_name = 'LOAD_DICTCOMP' elif const.co_name == '<setcomp>': op_name = 'LOAD_SETCOMP' # verify uses 'pattr' for comparism, since 'attr' # now holds Code(const) and thus can not be used # for comparism (todo: think about changing this) #pattr = 'code_object @ 0x%x %s->%s' %\ # (id(const), const.co_filename, const.co_name) pattr = '<code_object ' + const.co_name + '>' else: pattr = const elif op in hasname: pattr = names[oparg] elif op in hasjrel: pattr = repr(offset + 3 + oparg) elif op in hasjabs: pattr = repr(oparg) elif op in haslocal: pattr = varnames[oparg] elif op in hascompare: pattr = cmp_op[oparg] elif op in hasfree: pattr = free[oparg] if offset in self.toChange: if self.code[offset] == JA and self.code[oparg] == WITH_CLEANUP: op_name = 'SETUP_WITH' cf[oparg] = cf.get(oparg, []) + [offset] if op in (BUILD_LIST, BUILD_TUPLE, BUILD_SLICE, UNPACK_SEQUENCE, MAKE_FUNCTION, CALL_FUNCTION, MAKE_CLOSURE, CALL_FUNCTION_VAR, CALL_FUNCTION_KW, CALL_FUNCTION_VAR_KW, DUP_TOPX, RAISE_VARARGS ): # CE - Hack for >= 2.5 # Now all values loaded via LOAD_CLOSURE are packed into # a tuple before calling MAKE_CLOSURE. if op == BUILD_TUPLE and \ self.code[self.prev[offset]] == LOAD_CLOSURE: continue else: op_name = '%s_%d' % (op_name, oparg) if op != BUILD_SLICE: customize[op_name] = oparg elif op == JA: target = self.get_target(offset) if target < offset: if offset in self.stmts and self.code[offset+3] not in (END_FINALLY, POP_BLOCK) \ and offset not in self.not_continue: op_name = 'CONTINUE' else: op_name = 'JUMP_BACK' elif op == LOAD_GLOBAL: if offset in self.load_asserts: op_name = 'LOAD_ASSERT' elif op == RETURN_VALUE: if offset in self.return_end_ifs: op_name = 'RETURN_END_IF' if offset not in replace: rv.append(Token(op_name, oparg, pattr, offset, linestart = offset in linestartoffsets)) else: rv.append(Token(replace[offset], oparg, pattr, offset, linestart = offset in linestartoffsets)) if self.showasm: out = self.out # shortcut for t in rv: print >>out, t print >>out return rv, customize
def disassemble(self, co, classname=None): ''' Disassemble a code object, returning a list of 'Token'. The main part of this procedure is modelled after dis.disassemble(). ''' rv = [] customize = {} Token = self.Token # shortcut self.code = array('B', co.co_code) for i in self.op_range(0, len(self.code)): if self.code[i] in (RETURN_VALUE, END_FINALLY): n = i + 1 self.code = array('B', co.co_code[:n]) # linestarts contains bloc code adresse (addr,block) self.linestarts = list(dis.findlinestarts(co)) self.prev = [0] # class and names if classname: classname = '_' + classname.lstrip('_') + '__' def unmangle(name): if name.startswith(classname) and name[-2:] != '__': return name[len(classname) - 2:] return name free = [ unmangle(name) for name in (co.co_cellvars + co.co_freevars) ] names = [unmangle(name) for name in co.co_names] varnames = [unmangle(name) for name in co.co_varnames] else: free = co.co_cellvars + co.co_freevars names = co.co_names varnames = co.co_varnames self.names = names # list of instruction to remove/add or change to match with bytecode 2.7 self.toChange = [] self.restructBytecode() codelen = len(self.code) # mapping adresses of prev instru for i in self.op_range(0, codelen): op = self.code[i] self.prev.append(i) if self.op_hasArgument(op): self.prev.append(i) self.prev.append(i) j = 0 linestarts = self.linestarts self.lines = [] linetuple = namedtuple('linetuple', ['l_no', 'next']) linestartoffsets = {a for (a, _) in linestarts} (prev_start_byte, prev_line_no) = linestarts[0] for (start_byte, line_no) in linestarts[1:]: while j < start_byte: self.lines.append(linetuple(prev_line_no, start_byte)) j += 1 last_op = self.code[self.prev[start_byte]] (prev_start_byte, prev_line_no) = (start_byte, line_no) while j < codelen: self.lines.append(linetuple(prev_line_no, codelen)) j += 1 # self.lines contains (block,addrLastInstr) self.load_asserts = set() for i in self.op_range(0, codelen): if self.code[i] == PJIT and self.code[i + 3] == LOAD_GLOBAL: if names[self.get_argument(i + 3)] == 'AssertionError': self.load_asserts.add(i + 3) cf = self.find_jump_targets(self.code) # contains (code, [addrRefToCode]) last_stmt = self.next_stmt[0] i = self.next_stmt[last_stmt] replace = {} while i < codelen - 1: if self.lines[last_stmt].next > i: if self.code[last_stmt] == PRINT_ITEM: if self.code[i] == PRINT_ITEM: replace[i] = 'PRINT_ITEM_CONT' elif self.code[i] == PRINT_NEWLINE: replace[i] = 'PRINT_NEWLINE_CONT' last_stmt = i i = self.next_stmt[i] imports = self.all_instr(0, codelen, (IMPORT_NAME, IMPORT_FROM, IMPORT_STAR)) if len(imports) > 1: last_import = imports[0] for i in imports[1:]: if self.lines[last_import].next > i: if self.code[last_import] == IMPORT_NAME == self.code[i]: replace[i] = 'IMPORT_NAME_CONT' last_import = i extended_arg = 0 for offset in self.op_range(0, codelen): op = self.code[offset] op_name = opname[op] oparg = None pattr = None if offset in cf: k = 0 for j in cf[offset]: rv.append( Token('COME_FROM', None, repr(j), offset="%s_%d" % (offset, k))) k += 1 if self.op_hasArgument(op): oparg = self.get_argument(offset) + extended_arg extended_arg = 0 if op == EXTENDED_ARG: raise NotImplementedError extended_arg = oparg * 65536L continue if op in hasconst: const = co.co_consts[oparg] if type(const) == types.CodeType: oparg = const if const.co_name == '<lambda>': assert op_name == 'LOAD_CONST' op_name = 'LOAD_LAMBDA' elif const.co_name == '<genexpr>': op_name = 'LOAD_GENEXPR' elif const.co_name == '<dictcomp>': op_name = 'LOAD_DICTCOMP' elif const.co_name == '<setcomp>': op_name = 'LOAD_SETCOMP' # verify uses 'pattr' for comparism, since 'attr' # now holds Code(const) and thus can not be used # for comparism (todo: think about changing this) #pattr = 'code_object @ 0x%x %s->%s' %\ # (id(const), const.co_filename, const.co_name) pattr = '<code_object ' + const.co_name + '>' else: pattr = const elif op in hasname: pattr = names[oparg] elif op in hasjrel: pattr = repr(offset + 3 + oparg) elif op in hasjabs: pattr = repr(oparg) elif op in haslocal: pattr = varnames[oparg] elif op in hascompare: pattr = cmp_op[oparg] elif op in hasfree: pattr = free[oparg] if offset in self.toChange: if self.code[offset] == JA and self.code[oparg] == WITH_CLEANUP: op_name = 'SETUP_WITH' cf[oparg] = cf.get(oparg, []) + [offset] if op in (BUILD_LIST, BUILD_TUPLE, BUILD_SLICE, UNPACK_SEQUENCE, MAKE_FUNCTION, CALL_FUNCTION, MAKE_CLOSURE, CALL_FUNCTION_VAR, CALL_FUNCTION_KW, CALL_FUNCTION_VAR_KW, DUP_TOPX, RAISE_VARARGS): # CE - Hack for >= 2.5 # Now all values loaded via LOAD_CLOSURE are packed into # a tuple before calling MAKE_CLOSURE. if op == BUILD_TUPLE and \ self.code[self.prev[offset]] == LOAD_CLOSURE: continue else: op_name = '%s_%d' % (op_name, oparg) if op != BUILD_SLICE: customize[op_name] = oparg elif op == JA: target = self.get_target(offset) if target < offset: if offset in self.stmts and self.code[offset+3] not in (END_FINALLY, POP_BLOCK) \ and offset not in self.not_continue: op_name = 'CONTINUE' else: op_name = 'JUMP_BACK' elif op == LOAD_GLOBAL: if offset in self.load_asserts: op_name = 'LOAD_ASSERT' elif op == RETURN_VALUE: if offset in self.return_end_ifs: op_name = 'RETURN_END_IF' if offset not in replace: rv.append( Token(op_name, oparg, pattr, offset, linestart=offset in linestartoffsets)) else: rv.append( Token(replace[offset], oparg, pattr, offset, linestart=offset in linestartoffsets)) if self.showasm: out = self.out # shortcut for t in rv: print >> out, t print >> out return rv, customize