def uncompyle(version, co, out=None, showasm=False, showast=False, timestamp=None, showgrammar=False, code_objects={}): """ disassembles and deparses a given code block 'co' """ assert iscode(co) # store final output stream for case of error real_out = out or sys.stdout print('# Python %s (decompiled from Python %s)' % (version, PYTHON_VERSION), file=real_out) if co.co_filename: print('# Embedded file name: %s' % co.co_filename, file=real_out) if timestamp: print('# Compiled at: %s' % datetime.datetime.fromtimestamp(timestamp), file=real_out) try: pysource.deparse_code(version, co, out, showasm, showast, showgrammar, code_objects=code_objects) except pysource.SourceWalkerError as e: # deparsing failed if real_out != out: print(e, file=real_out) raise
def python_parser(version, co, out=sys.stdout, showasm=False, parser_debug=PARSER_DEFAULT_DEBUG): assert iscode(co) from uncompyle6.scanner import get_scanner scanner = get_scanner(version) tokens, customize = scanner.disassemble(co) if showasm: for t in tokens: print(t) p = get_python_parser(version, parser_debug) return parse(p, tokens, customize)
def deparse_code(version, co, out=StringIO(), showasm=False, showast=False, showgrammar=False): assert iscode(co) # store final output stream for case of error scanner = get_scanner(version) tokens, customize = scanner.disassemble(co) tokens, customize = scanner.disassemble(co) if showasm: for t in tokens: print(t) debug_parser = dict(PARSER_DEFAULT_DEBUG) debug_parser['reduce'] = showgrammar # Build AST from disassembly. # deparsed = pysource.FragmentsWalker(out, scanner, showast=showast) deparsed = FragmentsWalker(version, scanner, showast=showast, debug_parser=debug_parser) deparsed.ast = deparsed.build_ast(tokens, customize) assert deparsed.ast == 'stmts', 'Should have parsed grammar start' del tokens # save memory # convert leading '__doc__ = "..." into doc string assert deparsed.ast == 'stmts' deparsed.mod_globs = pysource.find_globals(deparsed.ast, set()) # Just when you think we've forgotten about what we # were supposed to to: Generate source from AST! deparsed.gen_source(deparsed.ast, co.co_name, customize) deparsed.set_pos_info(deparsed.ast, 0, len(deparsed.text)) deparsed.fixup_parents(deparsed.ast, None) for g in deparsed.mod_globs: deparsed.write('# global %s ## Warning: Unused global' % g) if deparsed.ERROR: raise deparsed.ERROR return deparsed
def python_parser(version, co, out=sys.stdout, showasm=False, parser_debug=PARSER_DEFAULT_DEBUG): assert iscode(co) from uncompyle6.scanner import get_scanner scanner = get_scanner(version) tokens, customize = scanner.disassemble(co) if showasm: for t in tokens: print(t) # For heavy grammar debugging parser_debug = {'rules': True, 'transition': True, 'reduce' : True} p = get_python_parser(version, parser_debug) return parse(p, tokens, customize)
def listcomprehension_walk3(self, node, iter_index, code_index=-5): """List comprehensions the way they are done in Python3. They're more other comprehensions, e.g. set comprehensions See if we can combine code. """ p = self.prec self.prec = 27 code = node[code_index].attr assert iscode(code) # Or Code3 code = Code(code, self.scanner, self.currentclass) # assert isinstance(code, Code) ast = self.build_ast(code._tokens, code._customize) self.customize(code._customize) ast = ast[0][0][0][0][0] n = ast[iter_index] assert n == 'list_iter' # find innermost node while n == 'list_iter': # list_iter n = n[0] # recurse one step if n == 'list_for': designator = n[2] n = n[3] elif n == 'list_if': # FIXME: just a guess designator = n[1] n = n[2] elif n == 'list_ifnot': # FIXME: just a guess designator = n[1] n = n[2] assert n == 'lc_body', ast self.preorder(n[0]) self.write(' for ') start = len(self.f.getvalue()) self.preorder(designator) self.set_pos_info(designator, start, len(self.f.getvalue())) self.write(' in ') start = len(self.f.getvalue()) node[-3].parent = node self.preorder(node[-3]) self.set_pos_info(node[-3], start, len(self.f.getvalue())) # self.preorder(ast[iter_index]) self.prec = p
def listcomprehension_walk3(self, node, iter_index, code_index=-5): """List comprehensions the way they are done in Python3. They're more other comprehensions, e.g. set comprehensions See if we can combine code. """ p = self.prec self.prec = 27 code = node[code_index].attr assert iscode(code) # Or Code3 code = Code(code, self.scanner, self.currentclass) ast = self.build_ast(code._tokens, code._customize) self.customize(code._customize) ast = ast[0][0][0][0][0] n = ast[iter_index] assert n == 'list_iter' # find innermost node while n == 'list_iter': n = n[0] # recurse one step if n == 'list_for': designator = n[2] n = n[3] elif n == 'list_if': # FIXME: just a guess designator = n[1] n = n[2] elif n == 'list_if_not': # FIXME: just a guess designator = n[1] n = n[2] assert n == 'lc_body', ast self.preorder(n[0]) self.write(' for ') start = len(self.f.getvalue()) self.preorder(designator) self.set_pos_info(designator, start, len(self.f.getvalue())) self.write(' in ') start = len(self.f.getvalue()) node[-3].parent = node self.preorder(node[-3]) self.set_pos_info(node[-3], start, len(self.f.getvalue())) # self.preorder(ast[iter_index]) self.prec = p
def deparse_code(version, co, out=StringIO(), showasm=False, showast=False, showgrammar=False): assert iscode(co) # store final output stream for case of error scanner = get_scanner(version) tokens, customize = scanner.disassemble(co) tokens, customize = scanner.disassemble(co) if showasm: for t in tokens: print(t) debug_parser = dict(PARSER_DEFAULT_DEBUG) if showgrammar: debug_parser['reduce'] = showgrammar debug_parser['errorstack'] = True # Build AST from disassembly. # deparsed = pysource.FragmentsWalker(out, scanner, showast=showast) deparsed = FragmentsWalker(version, scanner, showast=showast, debug_parser=debug_parser) deparsed.ast = deparsed.build_ast(tokens, customize) assert deparsed.ast == 'stmts', 'Should have parsed grammar start' del tokens # save memory # convert leading '__doc__ = "..." into doc string assert deparsed.ast == 'stmts' deparsed.mod_globs = pysource.find_globals(deparsed.ast, set()) # Just when you think we've forgotten about what we # were supposed to to: Generate source from AST! deparsed.gen_source(deparsed.ast, co.co_name, customize) deparsed.set_pos_info(deparsed.ast, 0, len(deparsed.text)) deparsed.fixup_parents(deparsed.ast, None) for g in deparsed.mod_globs: deparsed.write('# global %s ## Warning: Unused global' % g) if deparsed.ERROR: raise deparsed.ERROR return deparsed
def disco(version, co, out=None): """ diassembles and deparses a given code block 'co' """ assert iscode(co) # store final output stream for case of error real_out = out or sys.stdout print('# Python %s' % version, file=real_out) if co.co_filename: print('# Embedded file name: %s' % co.co_filename, file=real_out) scanner = get_scanner(version) tokens, customize = scanner.disassemble(co) for t in tokens: print(t, file=real_out) print(file=out)
def comprehension_walk(self, node, iter_index, code_index=-5): p = self.prec self.prec = 27 if hasattr(node[code_index], 'attr'): code = node[code_index].attr elif hasattr(node[1][1], 'attr'): code = node[1][1].attr else: assert False assert iscode(code) code = Code(code, self.scanner, self.currentclass) ast = self.build_ast(code._tokens, code._customize) self.customize(code._customize) ast = ast[0][0][0] n = ast[iter_index] assert n == 'comp_iter' # find innermost node while n == 'comp_iter': n = n[0] # recurse one step if n == 'comp_for': n = n[3] elif n == 'comp_if': n = n[2] elif n == 'comp_ifnot': n = n[2] assert n == 'comp_body', ast self.preorder(n[0]) self.write(' for ') start = len(self.f.getvalue()) designator = ast[iter_index-1] self.preorder(designator) self.set_pos_info(ast[iter_index-1], start, len(self.f.getvalue())) self.write(' in ') start = len(self.f.getvalue()) node[-3].parent = node self.preorder(node[-3]) self.set_pos_info(node[-3], start, len(self.f.getvalue())) start = len(self.f.getvalue()) self.preorder(ast[iter_index]) self.set_pos_info(iter_index, start, len(self.f.getvalue())) self.prec = p
def comprehension_walk(self, node, iter_index): p = self.prec self.prec = 27 code = node[-5].attr assert iscode(co) code = Code(code, self.scanner, self.currentclass) # assert isinstance(code, Code) ast = self.build_ast(code._tokens, code._customize) self.customize(code._customize) ast = ast[0][0][0] n = ast[iter_index] assert n == 'comp_iter' # find innermost node while n == 'comp_iter': n = n[0] # recurse one step if n == 'comp_for': n = n[3] elif n == 'comp_if': n = n[2] elif n == 'comp_ifnot': n = n[2] assert n == 'comp_body', ast self.preorder(n[0]) self.write(' for ') start = len(self.f.getvalue()) designator = ast[iter_index - 1] self.preorder(designator) self.set_pos_info(ast[iter_index - 1], start, len(self.f.getvalue())) self.write(' in ') start = len(self.f.getvalue()) node[-3].parent = node self.preorder(node[-3]) self.set_pos_info(node[-3], start, len(self.f.getvalue())) start = len(self.f.getvalue()) self.preorder(ast[iter_index]) self.set_pos_info(iter_index, start, len(self.f.getvalue())) self.prec = p
def disassemble(self, co, classname=None, code_objects={}): """ Disassemble a code object, returning a list of 'Token'. The main part of this procedure is modelled after dis.disassemble(). """ # import dis; dis.disassemble(co) # DEBUG rv = [] customize = {} Token = self.Token # shortcut self.code = array('B', co.co_code) for i in self.op_range(0, len(self.code)): if self.code[i] in (RETURN_VALUE, END_FINALLY): n = i + 1 self.code = array('B', co.co_code[:n]) self.prev = [0] # mapping addresses of instruction & argument for i in self.op_range(0, n): op = self.code[i] self.prev.append(i) if op >= HAVE_ARGUMENT: self.prev.append(i) self.prev.append(i) self.lines = [] linetuple = namedtuple('linetuple', ['l_no', 'next']) j = 0 # linestarts is a tuple of (offset, line number). # Turn that in a has that we can index linestarts = list(dis.findlinestarts(co)) linestartoffsets = {} for offset, lineno in linestarts: linestartoffsets[offset] = lineno (prev_start_byte, prev_line_no) = linestarts[0] for (start_byte, line_no) in linestarts[1:]: while j < start_byte: self.lines.append(linetuple(prev_line_no, start_byte)) j += 1 prev_line_no = start_byte while j < n: self.lines.append(linetuple(prev_line_no, n)) j+=1 # self.lines contains (block,addrLastInstr) if classname: classname = '_' + classname.lstrip('_') + '__' def unmangle(name): if name.startswith(classname) and name[-2:] != '__': return name[len(classname) - 2:] return name free = [ unmangle(name) for name in (co.co_cellvars + co.co_freevars) ] names = [ unmangle(name) for name in co.co_names ] varnames = [ unmangle(name) for name in co.co_varnames ] else: free = co.co_cellvars + co.co_freevars names = co.co_names varnames = co.co_varnames self.load_asserts = set() for i in self.op_range(0, n): if self.code[i] == PJIT and self.code[i+3] == LOAD_GLOBAL: if names[self.get_argument(i+3)] == 'AssertionError': self.load_asserts.add(i+3) cf = self.find_jump_targets(self.code) # contains (code, [addrRefToCode]) last_stmt = self.next_stmt[0] i = self.next_stmt[last_stmt] replace = {} while i < n-1: if self.lines[last_stmt].next > i: if self.code[last_stmt] == PRINT_ITEM: if self.code[i] == PRINT_ITEM: replace[i] = 'PRINT_ITEM_CONT' elif self.code[i] == PRINT_NEWLINE: replace[i] = 'PRINT_NEWLINE_CONT' last_stmt = i i = self.next_stmt[i] imports = self.all_instr(0, n, (IMPORT_NAME, IMPORT_FROM, IMPORT_STAR)) if len(imports) > 1: last_import = imports[0] for i in imports[1:]: if self.lines[last_import].next > i: if self.code[last_import] == IMPORT_NAME == self.code[i]: replace[i] = 'IMPORT_NAME_CONT' last_import = i extended_arg = 0 for offset in self.op_range(0, n): if offset in cf: k = 0 for j in cf[offset]: rv.append(Token('COME_FROM', None, repr(j), offset="%s_%d" % (offset, k))) k += 1 op = self.code[offset] op_name = opname[op] oparg = None; pattr = None if op >= HAVE_ARGUMENT: oparg = self.get_argument(offset) + extended_arg extended_arg = 0 if op == EXTENDED_ARG: extended_arg = oparg * scan.L65536 continue if op in hasconst: const = co.co_consts[oparg] if iscode(const): oparg = const if const.co_name == '<lambda>': assert op_name == 'LOAD_CONST' op_name = 'LOAD_LAMBDA' elif const.co_name == '<genexpr>': op_name = 'LOAD_GENEXPR' elif const.co_name == '<dictcomp>': op_name = 'LOAD_DICTCOMP' elif const.co_name == '<setcomp>': op_name = 'LOAD_SETCOMP' # verify() uses 'pattr' for comparison, since 'attr' # now holds Code(const) and thus can not be used # for comparison (todo: think about changing this) # pattr = 'code_object @ 0x%x %s->%s' %\ # (id(const), const.co_filename, const.co_name) pattr = '<code_object ' + const.co_name + '>' else: pattr = const elif op in hasname: pattr = names[oparg] elif op in hasjrel: pattr = repr(offset + 3 + oparg) elif op in hasjabs: pattr = repr(oparg) elif op in haslocal: pattr = varnames[oparg] elif op in hascompare: pattr = cmp_op[oparg] elif op in hasfree: pattr = free[oparg] if op in (BUILD_LIST, BUILD_TUPLE, BUILD_SET, BUILD_SLICE, UNPACK_SEQUENCE, MAKE_FUNCTION, CALL_FUNCTION, MAKE_CLOSURE, CALL_FUNCTION_VAR, CALL_FUNCTION_KW, CALL_FUNCTION_VAR_KW, DUP_TOPX, RAISE_VARARGS ): # CE - Hack for >= 2.5 # Now all values loaded via LOAD_CLOSURE are packed into # a tuple before calling MAKE_CLOSURE. if op == BUILD_TUPLE and \ self.code[self.prev[offset]] == LOAD_CLOSURE: continue else: op_name = '%s_%d' % (op_name, oparg) if op != BUILD_SLICE: customize[op_name] = oparg elif op == JA: target = self.get_target(offset) if target < offset: if offset in self.stmts and self.code[offset+3] not in (END_FINALLY, POP_BLOCK) \ and offset not in self.not_continue: op_name = 'CONTINUE' else: op_name = 'JUMP_BACK' elif op == LOAD_GLOBAL: if offset in self.load_asserts: op_name = 'LOAD_ASSERT' elif op == RETURN_VALUE: if offset in self.return_end_ifs: op_name = 'RETURN_END_IF' if offset in linestartoffsets: linestart = linestartoffsets[offset] else: linestart = None if offset not in replace: rv.append(Token(op_name, oparg, pattr, offset, linestart)) else: rv.append(Token(replace[offset], oparg, pattr, offset, linestart)) return rv, customize
def make_function(self, node, isLambda, nested=1, code_index=-2): """Dump function defintion, doc string, and function body.""" def build_param(ast, name, default): """build parameters: - handle defaults - handle format tuple parameters """ if self.version < 3.0: # if formal parameter is a tuple, the paramater name # starts with a dot (eg. '.1', '.2') if name.startswith('.'): # replace the name with the tuple-string name = self.get_tuple_parameter(ast, name) pass pass if default: if self.showast: print() print('--', name) print(default) print('--') pass result = '%s=' % name old_last_finish = self.last_finish self.last_finish = len(result) value = self.traverse(default, indent='') self.last_finish = old_last_finish result += value if result[-2:] == '= ': # default was 'LOAD_CONST None' result += 'None' return result else: return name # node[-1] == MAKE_FUNCTION_n args_node = node[-1] if isinstance(args_node.attr, tuple): defparams = node[:args_node.attr[0]] pos_args, kw_args, annotate_args = args_node.attr else: defparams = node[:args_node.attr] kw_args, annotate_args = (0, 0) pos_args = args_node.attr pass code = node[code_index].attr assert iscode(code) code = Code(code, self.scanner, self.currentclass) # add defaults values to parameter names argc = code.co_argcount paramnames = list(code.co_varnames[:argc]) # defaults are for last n parameters, thus reverse paramnames.reverse(); defparams.reverse() try: ast = self.build_ast(code._tokens, code._customize, isLambda = isLambda, noneInNames = ('None' in code.co_names)) except ParserError as p: self.write(str(p)) self.ERROR = p return # build parameters params = [build_param(ast, name, default) for name, default in zip_longest(paramnames, defparams, fillvalue=None)] params.reverse() # back to correct order if 4 & code.co_flags: # flag 2 -> variable number of args params.append('*%s' % code.co_varnames[argc]) argc += 1 if 8 & code.co_flags: # flag 3 -> keyword args params.append('**%s' % code.co_varnames[argc]) argc += 1 # dump parameter list (with default values) indent = self.indent if isLambda: self.write("lambda ", ", ".join(params)) else: self.write("(", ", ".join(params)) # self.println(indent, '#flags:\t', int(code.co_flags)) if kw_args > 0: if argc > 0: self.write(", *, ") else: self.write("*, ") for n in node: if n == 'pos_arg': continue self.preorder(n) break pass if isLambda: self.write(": ") else: self.println("):") if len(code.co_consts)>0 and code.co_consts[0] is not None and not isLambda: # ugly # docstring exists, dump it self.print_docstring(indent, code.co_consts[0]) code._tokens = None # save memory assert ast == 'stmts' all_globals = find_all_globals(ast, set()) for g in ((all_globals & self.mod_globs) | find_globals(ast, set())): self.println(self.indent, 'global ', g) self.mod_globs -= all_globals rn = ('None' in code.co_names) and not find_none(ast) self.gen_source(ast, code.co_name, code._customize, isLambda=isLambda, returnNone=rn) code._tokens = None; code._customize = None # save memory
def disassemble_generic(self, co, classname=None, code_objects={}): """ Convert code object <co> into a sequence of tokens. The below is based on (an older version?) of Python dis.disassemble_bytes(). """ # Container for tokens tokens = [] customize = {} self.code = code = array('B', co.co_code) codelen = len(code) self.build_lines_data(co) self.build_prev_op() self.code_objects = code_objects # self.lines contains (block,addrLastInstr) if classname: classname = '_' + classname.lstrip('_') + '__' def unmangle(name): if name.startswith(classname) and name[-2:] != '__': return name[len(classname) - 2:] return name free = [ unmangle(name) for name in (co.co_cellvars + co.co_freevars) ] names = [ unmangle(name) for name in co.co_names ] varnames = [ unmangle(name) for name in co.co_varnames ] else: free = co.co_cellvars + co.co_freevars names = co.co_names varnames = co.co_varnames pass # Scan for assertions. Later we will # turn 'LOAD_GLOBAL' to 'LOAD_ASSERT' for those # assertions self.load_asserts = set() for i in self.op_range(0, codelen): if self.code[i] == POP_JUMP_IF_TRUE and self.code[i+3] == LOAD_GLOBAL: if names[self.get_argument(i+3)] == 'AssertionError': self.load_asserts.add(i+3) # Get jump targets # Format: {target offset: [jump offsets]} jump_targets = self.find_jump_targets() # contains (code, [addrRefToCode]) last_stmt = self.next_stmt[0] i = self.next_stmt[last_stmt] replace = {} imports = self.all_instr(0, codelen, (IMPORT_NAME, IMPORT_FROM, IMPORT_STAR)) if len(imports) > 1: last_import = imports[0] for i in imports[1:]: if self.lines[last_import].next > i: if self.code[last_import] == IMPORT_NAME == self.code[i]: replace[i] = 'IMPORT_NAME_CONT' last_import = i # Initialize extended arg at 0. When extended arg op is encountered, # variable preserved for next cycle and added as arg for next op extended_arg = 0 for offset in self.op_range(0, codelen): # Add jump target tokens if offset in jump_targets: jump_idx = 0 for jump_offset in jump_targets[offset]: tokens.append(Token('COME_FROM', None, repr(jump_offset), offset='%s_%s' % (offset, jump_idx))) jump_idx += 1 pass pass op = code[offset] op_name = op3.opname[op] oparg = None; pattr = None if op >= op3.HAVE_ARGUMENT: oparg = self.get_argument(offset) + extended_arg extended_arg = 0 if op == op3.EXTENDED_ARG: extended_arg = oparg * scan.L65536 continue if op in op3.hasconst: const = co.co_consts[oparg] if not PYTHON3 and isinstance(const, str): if const in code_objects: const = code_objects[const] if iscode(const): oparg = const if const.co_name == '<lambda>': assert op_name == 'LOAD_CONST' op_name = 'LOAD_LAMBDA' elif const.co_name == '<genexpr>': op_name = 'LOAD_GENEXPR' elif const.co_name == '<dictcomp>': op_name = 'LOAD_DICTCOMP' elif const.co_name == '<setcomp>': op_name = 'LOAD_SETCOMP' elif const.co_name == '<listcomp>': op_name = 'LOAD_LISTCOMP' # verify() uses 'pattr' for comparison, since 'attr' # now holds Code(const) and thus can not be used # for comparison (todo: think about changing this) # pattr = 'code_object @ 0x%x %s->%s' %\ # (id(const), const.co_filename, const.co_name) pattr = '<code_object ' + const.co_name + '>' else: pattr = const elif op in op3.hasname: pattr = names[oparg] elif op in op3.hasjrel: pattr = repr(offset + 3 + oparg) elif op in op3.hasjabs: pattr = repr(oparg) elif op in op3.haslocal: pattr = varnames[oparg] elif op in op3.hascompare: pattr = op3.cmp_op[oparg] elif op in op3.hasfree: pattr = free[oparg] if op_name in ('BUILD_LIST', 'BUILD_TUPLE', 'BUILD_SET', 'BUILD_SLICE', 'UNPACK_SEQUENCE', 'MAKE_FUNCTION', 'CALL_FUNCTION', 'MAKE_CLOSURE', 'CALL_FUNCTION_VAR', 'CALL_FUNCTION_KW', 'CALL_FUNCTION_VAR_KW', 'RAISE_VARARGS' ): # CALL_FUNCTION OP renaming is done as a custom rule in parse3 if op_name not in ('CALL_FUNCTION', 'CALL_FUNCTION_VAR', 'CALL_FUNCTION_VAR_KW', 'CALL_FUNCTION_KW', ): op_name = '%s_%d' % (op_name, oparg) if op_name != 'BUILD_SLICE': customize[op_name] = oparg elif op_name == 'JUMP_ABSOLUTE': target = self.get_target(offset) if target < offset: if (offset in self.stmts and self.code[offset+3] not in (END_FINALLY, POP_BLOCK) and offset not in self.not_continue): op_name = 'CONTINUE' else: op_name = 'JUMP_BACK' elif op_name == 'LOAD_GLOBAL': if offset in self.load_asserts: op_name = 'LOAD_ASSERT' elif op_name == 'RETURN_VALUE': if offset in self.return_end_ifs: op_name = 'RETURN_END_IF' if offset in self.linestarts: linestart = self.linestarts[offset] else: linestart = None if offset not in replace: tokens.append(Token(op_name, oparg, pattr, offset, linestart)) else: tokens.append(Token(replace[offset], oparg, pattr, offset, linestart)) pass return tokens, customize
def disassemble_built_in(self, co, classname=None, code_objects={}): # Container for tokens tokens = [] customize = {} self.code = array('B', co.co_code) self.build_lines_data(co) self.build_prev_op() # Get jump targets # Format: {target offset: [jump offsets]} jump_targets = self.find_jump_targets() bytecode = dis.Bytecode(co) # self.lines contains (block,addrLastInstr) if classname: classname = '_' + classname.lstrip('_') + '__' def unmangle(name): if name.startswith(classname) and name[-2:] != '__': return name[len(classname) - 2:] return name # free = [ unmangle(name) for name in (co.co_cellvars + co.co_freevars) ] # names = [ unmangle(name) for name in co.co_names ] # varnames = [ unmangle(name) for name in co.co_varnames ] else: # free = co.co_cellvars + co.co_freevars # names = co.co_names # varnames = co.co_varnames pass # Scan for assertions. Later we will # turn 'LOAD_GLOBAL' to 'LOAD_ASSERT' for those # assertions self.load_asserts = set() bs = list(bytecode) n = len(bs) for i in range(n): inst = bs[i] if inst.opname == 'POP_JUMP_IF_TRUE' and i + 1 < n: next_inst = bs[i + 1] if (next_inst.opname == 'LOAD_GLOBAL' and next_inst.argval == 'AssertionError'): self.load_asserts.add(next_inst.offset) for inst in bytecode: if inst.offset in jump_targets: jump_idx = 0 for jump_offset in jump_targets[inst.offset]: tokens.append( Token('COME_FROM', None, repr(jump_offset), offset='%s_%s' % (inst.offset, jump_idx))) jump_idx += 1 pass pass pattr = inst.argrepr opname = inst.opname if opname in ['LOAD_CONST']: const = inst.argval if iscode(const): if const.co_name == '<lambda>': opname = 'LOAD_LAMBDA' elif const.co_name == '<genexpr>': opname = 'LOAD_GENEXPR' elif const.co_name == '<dictcomp>': opname = 'LOAD_DICTCOMP' elif const.co_name == '<setcomp>': opname = 'LOAD_SETCOMP' elif const.co_name == '<listcomp>': opname = 'LOAD_LISTCOMP' # verify() uses 'pattr' for comparison, since 'attr' # now holds Code(const) and thus can not be used # for comparison (todo: think about changing this) # pattr = 'code_object @ 0x%x %s->%s' %\ # (id(const), const.co_filename, const.co_name) pattr = '<code_object ' + const.co_name + '>' else: pattr = const pass elif opname in ('BUILD_LIST', 'BUILD_TUPLE', 'BUILD_SET', 'BUILD_SLICE', 'UNPACK_SEQUENCE', 'MAKE_FUNCTION', 'MAKE_CLOSURE', 'DUP_TOPX', 'RAISE_VARARGS'): # if opname == 'BUILD_TUPLE' and \ # self.code[self.prev[offset]] == LOAD_CLOSURE: # continue # else: # op_name = '%s_%d' % (op_name, oparg) # if opname != BUILD_SLICE: # customize[op_name] = oparg opname = '%s_%d' % (opname, inst.argval) if inst.opname != 'BUILD_SLICE': customize[opname] = inst.argval elif opname == 'JUMP_ABSOLUTE': pattr = inst.argval target = self.get_target(inst.offset) if target < inst.offset: if (inst.offset in self.stmts and self.code[inst.offset + 3] not in (END_FINALLY, POP_BLOCK) and offset not in self.not_continue): opname = 'CONTINUE' else: opname = 'JUMP_BACK' elif inst.offset in self.load_asserts: opname = 'LOAD_ASSERT' tokens.append( Token( type_=opname, attr=inst.argval, pattr=pattr, offset=inst.offset, linestart=inst.starts_line, )) pass return tokens, {}
def disassemble(self, co, classname=None, code_objects={}): """ Disassemble a code object, returning a list of 'Token'. The main part of this procedure is modelled after dis.disassemble(). """ rv = [] customize = {} Token = self.Token # shortcut self.code = array('B', co.co_code) for i in self.op_range(0, len(self.code)): if self.code[i] in (RETURN_VALUE, END_FINALLY): n = i + 1 self.code = array('B', co.co_code[:n]) self.prev = [0] # mapping addresses of instruction & argument for i in self.op_range(0, n): op = self.code[i] self.prev.append(i) if op >= HAVE_ARGUMENT: self.prev.append(i) self.prev.append(i) self.lines = [] linetuple = namedtuple('linetuple', ['l_no', 'next']) j = 0 # linestarts is a tuple of (offset, line number). # Turn that in a has that we can index linestarts = list(dis.findlinestarts(co)) linestartoffsets = {} for offset, lineno in linestarts: linestartoffsets[offset] = lineno (prev_start_byte, prev_line_no) = linestarts[0] for (start_byte, line_no) in linestarts[1:]: while j < start_byte: self.lines.append(linetuple(prev_line_no, start_byte)) j += 1 prev_line_no = start_byte while j < n: self.lines.append(linetuple(prev_line_no, n)) j += 1 # self.lines contains (block,addrLastInstr) if classname: classname = '_' + classname.lstrip('_') + '__' def unmangle(name): if name.startswith(classname) and name[-2:] != '__': return name[len(classname) - 2:] return name free = [ unmangle(name) for name in (co.co_cellvars + co.co_freevars) ] names = [unmangle(name) for name in co.co_names] varnames = [unmangle(name) for name in co.co_varnames] else: free = co.co_cellvars + co.co_freevars names = co.co_names varnames = co.co_varnames self.load_asserts = set() for i in self.op_range(0, n): if self.code[i] == PJIT and self.code[i + 3] == LOAD_GLOBAL: if names[self.get_argument(i + 3)] == 'AssertionError': self.load_asserts.add(i + 3) cf = self.find_jump_targets(self.code) # contains (code, [addrRefToCode]) last_stmt = self.next_stmt[0] i = self.next_stmt[last_stmt] replace = {} while i < n - 1: if self.lines[last_stmt].next > i: if self.code[last_stmt] == PRINT_ITEM: if self.code[i] == PRINT_ITEM: replace[i] = 'PRINT_ITEM_CONT' elif self.code[i] == PRINT_NEWLINE: replace[i] = 'PRINT_NEWLINE_CONT' last_stmt = i i = self.next_stmt[i] imports = self.all_instr(0, n, (IMPORT_NAME, IMPORT_FROM, IMPORT_STAR)) if len(imports) > 1: last_import = imports[0] for i in imports[1:]: if self.lines[last_import].next > i: if self.code[last_import] == IMPORT_NAME == self.code[i]: replace[i] = 'IMPORT_NAME_CONT' last_import = i extended_arg = 0 for offset in self.op_range(0, n): if offset in cf: k = 0 for j in cf[offset]: rv.append( Token('COME_FROM', None, repr(j), offset="%s_%d" % (offset, k))) k += 1 op = self.code[offset] op_name = opname[op] oparg = None pattr = None if op >= HAVE_ARGUMENT: oparg = self.get_argument(offset) + extended_arg extended_arg = 0 if op == EXTENDED_ARG: extended_arg = oparg * scan.L65536 continue if op in hasconst: const = co.co_consts[oparg] if iscode(const): oparg = const if const.co_name == '<lambda>': assert op_name == 'LOAD_CONST' op_name = 'LOAD_LAMBDA' elif const.co_name == '<genexpr>': op_name = 'LOAD_GENEXPR' elif const.co_name == '<dictcomp>': op_name = 'LOAD_DICTCOMP' elif const.co_name == '<setcomp>': op_name = 'LOAD_SETCOMP' # verify() uses 'pattr' for comparison, since 'attr' # now holds Code(const) and thus can not be used # for comparison (todo: think about changing this) # pattr = 'code_object @ 0x%x %s->%s' %\ # (id(const), const.co_filename, const.co_name) pattr = '<code_object ' + const.co_name + '>' else: pattr = const elif op in hasname: pattr = names[oparg] elif op in hasjrel: pattr = repr(offset + 3 + oparg) elif op in hasjabs: pattr = repr(oparg) elif op in haslocal: pattr = varnames[oparg] elif op in hascompare: pattr = cmp_op[oparg] elif op in hasfree: pattr = free[oparg] if op in (BUILD_LIST, BUILD_TUPLE, BUILD_SET, BUILD_SLICE, UNPACK_SEQUENCE, MAKE_FUNCTION, CALL_FUNCTION, MAKE_CLOSURE, CALL_FUNCTION_VAR, CALL_FUNCTION_KW, CALL_FUNCTION_VAR_KW, DUP_TOPX, RAISE_VARARGS): # CE - Hack for >= 2.5 # Now all values loaded via LOAD_CLOSURE are packed into # a tuple before calling MAKE_CLOSURE. if op == BUILD_TUPLE and \ self.code[self.prev[offset]] == LOAD_CLOSURE: continue else: op_name = '%s_%d' % (op_name, oparg) if op != BUILD_SLICE: customize[op_name] = oparg elif op == JA: target = self.get_target(offset) if target < offset: if offset in self.stmts and self.code[offset+3] not in (END_FINALLY, POP_BLOCK) \ and offset not in self.not_continue: op_name = 'CONTINUE' else: op_name = 'JUMP_BACK' elif op == LOAD_GLOBAL: if offset in self.load_asserts: op_name = 'LOAD_ASSERT' elif op == RETURN_VALUE: if offset in self.return_end_ifs: op_name = 'RETURN_END_IF' if offset in linestartoffsets: linestart = linestartoffsets[offset] else: linestart = None if offset not in replace: rv.append(Token(op_name, oparg, pattr, offset, linestart)) else: rv.append( Token(replace[offset], oparg, pattr, offset, linestart)) return rv, customize
def disassemble_built_in(self, co, classname=None, code_objects={}): # Container for tokens tokens = [] customize = {} self.code = array('B', co.co_code) self.build_lines_data(co) self.build_prev_op() # Get jump targets # Format: {target offset: [jump offsets]} jump_targets = self.find_jump_targets() bytecode = dis.Bytecode(co) # self.lines contains (block,addrLastInstr) if classname: classname = '_' + classname.lstrip('_') + '__' def unmangle(name): if name.startswith(classname) and name[-2:] != '__': return name[len(classname) - 2:] return name # free = [ unmangle(name) for name in (co.co_cellvars + co.co_freevars) ] # names = [ unmangle(name) for name in co.co_names ] # varnames = [ unmangle(name) for name in co.co_varnames ] else: # free = co.co_cellvars + co.co_freevars # names = co.co_names # varnames = co.co_varnames pass # Scan for assertions. Later we will # turn 'LOAD_GLOBAL' to 'LOAD_ASSERT' for those # assertions self.load_asserts = set() bs = list(bytecode) n = len(bs) for i in range(n): inst = bs[i] if inst.opname == 'POP_JUMP_IF_TRUE' and i+1 < n: next_inst = bs[i+1] if (next_inst.opname == 'LOAD_GLOBAL' and next_inst.argval == 'AssertionError'): self.load_asserts.add(next_inst.offset) for inst in bytecode: if inst.offset in jump_targets: jump_idx = 0 for jump_offset in jump_targets[inst.offset]: tokens.append(Token('COME_FROM', None, repr(jump_offset), offset='%s_%s' % (inst.offset, jump_idx))) jump_idx += 1 pass pass pattr = inst.argrepr opname = inst.opname if opname in ['LOAD_CONST']: const = inst.argval if iscode(const): if const.co_name == '<lambda>': opname = 'LOAD_LAMBDA' elif const.co_name == '<genexpr>': opname = 'LOAD_GENEXPR' elif const.co_name == '<dictcomp>': opname = 'LOAD_DICTCOMP' elif const.co_name == '<setcomp>': opname = 'LOAD_SETCOMP' elif const.co_name == '<listcomp>': opname = 'LOAD_LISTCOMP' # verify() uses 'pattr' for comparison, since 'attr' # now holds Code(const) and thus can not be used # for comparison (todo: think about changing this) # pattr = 'code_object @ 0x%x %s->%s' %\ # (id(const), const.co_filename, const.co_name) pattr = '<code_object ' + const.co_name + '>' else: pattr = const pass elif opname in ('BUILD_LIST', 'BUILD_TUPLE', 'BUILD_SET', 'BUILD_SLICE', 'UNPACK_SEQUENCE', 'MAKE_FUNCTION', 'MAKE_CLOSURE', 'DUP_TOPX', 'RAISE_VARARGS' ): # if opname == 'BUILD_TUPLE' and \ # self.code[self.prev[offset]] == LOAD_CLOSURE: # continue # else: # op_name = '%s_%d' % (op_name, oparg) # if opname != BUILD_SLICE: # customize[op_name] = oparg opname = '%s_%d' % (opname, inst.argval) if inst.opname != 'BUILD_SLICE': customize[opname] = inst.argval elif opname == 'JUMP_ABSOLUTE': pattr = inst.argval target = self.get_target(inst.offset) if target < inst.offset: if (inst.offset in self.stmts and self.code[inst.offset+3] not in (END_FINALLY, POP_BLOCK) and offset not in self.not_continue): opname = 'CONTINUE' else: opname = 'JUMP_BACK' elif inst.offset in self.load_asserts: opname = 'LOAD_ASSERT' tokens.append( Token( type_ = opname, attr = inst.argval, pattr = pattr, offset = inst.offset, linestart = inst.starts_line, ) ) pass return tokens, {}
def disassemble(self, co, classname=None, code_objects={}): # imoprt dis; dis.disassemble(co) # DEBUG # Container for tokens tokens = [] customize = {} self.code = array('B', co.co_code) self.build_lines_data(co) self.build_prev_op() bytecode = dis35.Bytecode(co) # self.lines contains (block,addrLastInstr) if classname: classname = '_' + classname.lstrip('_') + '__' def unmangle(name): if name.startswith(classname) and name[-2:] != '__': return name[len(classname) - 2:] return name else: pass # Scan for assertions. Later we will # turn 'LOAD_GLOBAL' to 'LOAD_ASSERT' for those # assertions self.load_asserts = set() bs = list(bytecode) n = len(bs) for i in range(n): inst = bs[i] if inst.opname == 'POP_JUMP_IF_TRUE' and i+1 < n: next_inst = bs[i+1] if (next_inst.opname == 'LOAD_GLOBAL' and next_inst.argval == 'AssertionError'): self.load_asserts.add(next_inst.offset) # Get jump targets # Format: {target offset: [jump offsets]} jump_targets = self.find_jump_targets() for inst in bytecode: if inst.offset in jump_targets: jump_idx = 0 for jump_offset in jump_targets[inst.offset]: tokens.append(Token('COME_FROM', None, repr(jump_offset), offset='%s_%s' % (inst.offset, jump_idx))) jump_idx += 1 pass pass pattr = inst.argrepr opname = inst.opname if opname in ['LOAD_CONST']: const = inst.argval if iscode(const): if const.co_name == '<lambda>': opname = 'LOAD_LAMBDA' elif const.co_name == '<genexpr>': opname = 'LOAD_GENEXPR' elif const.co_name == '<dictcomp>': opname = 'LOAD_DICTCOMP' elif const.co_name == '<setcomp>': opname = 'LOAD_SETCOMP' elif const.co_name == '<listcomp>': opname = 'LOAD_LISTCOMP' # verify() uses 'pattr' for comparison, since 'attr' # now holds Code(const) and thus can not be used # for comparison (todo: think about changing this) # pattr = 'code_object @ 0x%x %s->%s' %\ # (id(const), const.co_filename, const.co_name) pattr = '<code_object ' + const.co_name + '>' else: pattr = const pass elif opname == 'MAKE_FUNCTION': argc = inst.argval attr = ((argc & 0xFF), (argc >> 8) & 0xFF, (argc >> 16) & 0x7FFF) pos_args, name_pair_args, annotate_args = attr if name_pair_args > 0: opname = 'MAKE_FUNCTION_N%d' % name_pair_args pass if annotate_args > 0: opname = '%s_A_%d' % [op_name, annotate_args] pass opname = '%s_%d' % (opname, pos_args) pattr = ("%d positional, %d keyword pair, %d annotated" % (pos_args, name_pair_args, annotate_args)) tokens.append( Token( type_ = opname, attr = (pos_args, name_pair_args, annotate_args), pattr = pattr, offset = inst.offset, linestart = inst.starts_line) ) continue elif opname in ('BUILD_LIST', 'BUILD_TUPLE', 'BUILD_SET', 'BUILD_SLICE', 'BUILD_MAP', 'UNPACK_SEQUENCE', 'MAKE_CLOSURE', 'RAISE_VARARGS' ): pos_args = inst.argval if inst.opname != 'BUILD_SLICE': customize[opname] = pos_args pass opname = '%s_%d' % (opname, pos_args) elif opname == 'JUMP_ABSOLUTE': pattr = inst.argval target = self.get_target(inst.offset) if target < inst.offset: if (inst.offset in self.stmts and self.code[inst.offset+3] not in (END_FINALLY, POP_BLOCK) and inst.offset not in self.not_continue): opname = 'CONTINUE' else: opname = 'JUMP_BACK' elif inst.offset in self.load_asserts: opname = 'LOAD_ASSERT' tokens.append( Token( type_ = opname, attr = inst.argval, pattr = pattr, offset = inst.offset, linestart = inst.starts_line, ) ) pass return tokens, {}
def cmp_code_objects(version, code_obj1, code_obj2, name=''): """ Compare two code-objects. This is the main part of this module. """ # print code_obj1, type(code_obj2) assert iscode(code_obj1), \ "cmp_code_object first object type is %s, not code" % type(code_obj1) assert iscode(code_obj2), \ "cmp_code_object second object type is %s, not code" % type(code_obj2) # print dir(code_obj1) if isinstance(code_obj1, object): # new style classes (Python 2.2) # assume _both_ code objects to be new stle classes assert dir(code_obj1) == dir(code_obj2) else: # old style classes assert dir(code_obj1) == code_obj1.__members__ assert dir(code_obj2) == code_obj2.__members__ assert code_obj1.__members__ == code_obj2.__members__ if name == '__main__': name = code_obj1.co_name else: name = '%s.%s' % (name, code_obj1.co_name) if name == '.?': name = '__main__' if isinstance(code_obj1, object) and code_equal(code_obj1, code_obj2): # use the new style code-classes' __cmp__ method, which # should be faster and more sophisticated # if this compare fails, we use the old routine to # find out, what exactly is nor equal # if this compare succeds, simply return # return pass if isinstance(code_obj1, object): members = [x for x in dir(code_obj1) if x.startswith('co_')] else: members = dir(code_obj1) members.sort() # ; members.reverse() tokens1 = None for member in members: if member in __IGNORE_CODE_MEMBERS__: pass elif member == 'co_code': if version == 2.5: import uncompyle6.scanners.scanner25 as scan scanner = scan.Scanner25(version) elif version == 2.6: import uncompyle6.scanners.scanner26 as scan scanner = scan.Scanner26(version) elif version == 2.7: import uncompyle6.scanners.scanner27 as scan scanner = scan.Scanner27(version) elif version == 3.2: import uncompyle6.scanners.scanner32 as scan scanner = scan.Scanner32(version) elif version == 3.3: import uncompyle6.scanners.scanner33 as scan scanner = scan.Scanner33(version) elif version == 3.4: import uncompyle6.scanners.scanner34 as scan scanner = scan.Scanner34(version) global JUMP_OPs JUMP_OPs = list(scan.JUMP_OPs) + ['JUMP_BACK'] # use changed Token class # we (re)set this here to save exception handling, # which would get 'unubersichtlich' scanner.setTokenClass(Token) try: # disassemble both code-objects tokens1, customize = scanner.disassemble(code_obj1) del customize # save memory tokens2, customize = scanner.disassemble(code_obj2) del customize # save memory finally: scanner.resetTokenClass() # restore Token class targets1 = dis.findlabels(code_obj1.co_code) tokens1 = [t for t in tokens1 if t.type != 'COME_FROM'] tokens2 = [t for t in tokens2 if t.type != 'COME_FROM'] i1 = 0; i2 = 0 offset_map = {}; check_jumps = {} while i1 < len(tokens1): if i2 >= len(tokens2): if len(tokens1) == len(tokens2) + 2 \ and tokens1[-1].type == 'RETURN_VALUE' \ and tokens1[-2].type == 'LOAD_CONST' \ and tokens1[-2].pattr is None \ and tokens1[-3].type == 'RETURN_VALUE': break else: raise CmpErrorCodeLen(name, tokens1, tokens2) offset_map[tokens1[i1].offset] = tokens2[i2].offset for idx1, idx2, offset2 in check_jumps.get(tokens1[i1].offset, []): if offset2 != tokens2[i2].offset: raise CmpErrorCode(name, tokens1[idx1].offset, tokens1[idx1], tokens2[idx2], tokens1, tokens2) if tokens1[i1].type != tokens2[i2].type: if tokens1[i1].type == 'LOAD_CONST' == tokens2[i2].type: i = 1 while tokens1[i1+i].type == 'LOAD_CONST': i += 1 if tokens1[i1+i].type.startswith(('BUILD_TUPLE', 'BUILD_LIST')) \ and i == int(tokens1[i1+i].type.split('_')[-1]): t = tuple([ elem.pattr for elem in tokens1[i1:i1+i] ]) if t != tokens2[i2].pattr: raise CmpErrorCode(name, tokens1[i1].offset, tokens1[i1], tokens2[i2], tokens1, tokens2) i1 += i + 1 i2 += 1 continue elif i == 2 and tokens1[i1+i].type == 'ROT_TWO' and tokens2[i2+1].type == 'UNPACK_SEQUENCE_2': i1 += 3 i2 += 2 continue elif i == 2 and tokens1[i1+i].type in BIN_OP_FUNCS: f = BIN_OP_FUNCS[tokens1[i1+i].type] if f(tokens1[i1].pattr, tokens1[i1+1].pattr) == tokens2[i2].pattr: i1 += 3 i2 += 1 continue elif tokens1[i1].type == 'UNARY_NOT': if tokens2[i2].type == 'POP_JUMP_IF_TRUE': if tokens1[i1+1].type == 'POP_JUMP_IF_FALSE': i1 += 2 i2 += 1 continue elif tokens2[i2].type == 'POP_JUMP_IF_FALSE': if tokens1[i1+1].type == 'POP_JUMP_IF_TRUE': i1 += 2 i2 += 1 continue elif tokens1[i1].type in ('JUMP_FORWARD', 'JUMP_BACK') \ and tokens1[i1-1].type == 'RETURN_VALUE' \ and tokens2[i2-1].type in ('RETURN_VALUE', 'RETURN_END_IF') \ and int(tokens1[i1].offset) not in targets1: i1 += 1 continue elif tokens1[i1].type == 'JUMP_FORWARD' and tokens2[i2].type == 'JUMP_BACK' \ and tokens1[i1+1].type == 'JUMP_BACK' and tokens2[i2+1].type == 'JUMP_BACK' \ and int(tokens1[i1].pattr) == int(tokens1[i1].offset) + 3: if int(tokens1[i1].pattr) == int(tokens1[i1+1].offset): i1 += 2 i2 += 2 continue raise CmpErrorCode(name, tokens1[i1].offset, tokens1[i1], tokens2[i2], tokens1, tokens2) elif tokens1[i1].type in JUMP_OPs and tokens1[i1].pattr != tokens2[i2].pattr: dest1 = int(tokens1[i1].pattr) dest2 = int(tokens2[i2].pattr) if tokens1[i1].type == 'JUMP_BACK': if offset_map[dest1] != dest2: raise CmpErrorCode(name, tokens1[i1].offset, tokens1[i1], tokens2[i2], tokens1, tokens2) else: # import pdb; pdb.set_trace() if dest1 in check_jumps: check_jumps[dest1].append((i1, i2, dest2)) else: check_jumps[dest1] = [(i1, i2, dest2)] i1 += 1 i2 += 1 del tokens1, tokens2 # save memory elif member == 'co_consts': # partial optimization can make the co_consts look different, # so we'll just compare the code consts codes1 = ( c for c in code_obj1.co_consts if hasattr(c, 'co_consts') ) codes2 = ( c for c in code_obj2.co_consts if hasattr(c, 'co_consts') ) for c1, c2 in zip(codes1, codes2): cmp_code_objects(version, c1, c2, name=name) else: # all other members must be equal if getattr(code_obj1, member) != getattr(code_obj2, member): raise CmpErrorMember(name, member, getattr(code_obj1, member), getattr(code_obj2, member))