Esempio n. 1
0
def uncompyle(version, co, out=None, showasm=False, showast=False,
              timestamp=None, showgrammar=False, code_objects={}):
    """
    disassembles and deparses a given code block 'co'
    """

    assert iscode(co)

    # store final output stream for case of error
    real_out = out or sys.stdout
    print('# Python %s (decompiled from Python %s)' % (version, PYTHON_VERSION),
          file=real_out)
    if co.co_filename:
        print('# Embedded file name: %s' % co.co_filename,
              file=real_out)
    if timestamp:
        print('# Compiled at: %s' % datetime.datetime.fromtimestamp(timestamp),
              file=real_out)

    try:
        pysource.deparse_code(version, co, out, showasm, showast, showgrammar,
                              code_objects=code_objects)
    except pysource.SourceWalkerError as e:
        # deparsing failed
        if real_out != out:
            print(e, file=real_out)
        raise
Esempio n. 2
0
def python_parser(version, co, out=sys.stdout, showasm=False,
                  parser_debug=PARSER_DEFAULT_DEBUG):
    assert iscode(co)
    from uncompyle6.scanner import get_scanner
    scanner = get_scanner(version)
    tokens, customize = scanner.disassemble(co)
    if showasm:
        for t in tokens:
            print(t)

    p = get_python_parser(version, parser_debug)
    return parse(p, tokens, customize)
Esempio n. 3
0
def deparse_code(version,
                 co,
                 out=StringIO(),
                 showasm=False,
                 showast=False,
                 showgrammar=False):

    assert iscode(co)
    # store final output stream for case of error
    scanner = get_scanner(version)

    tokens, customize = scanner.disassemble(co)

    tokens, customize = scanner.disassemble(co)
    if showasm:
        for t in tokens:
            print(t)

    debug_parser = dict(PARSER_DEFAULT_DEBUG)
    debug_parser['reduce'] = showgrammar

    #  Build AST from disassembly.
    # deparsed = pysource.FragmentsWalker(out, scanner, showast=showast)
    deparsed = FragmentsWalker(version,
                               scanner,
                               showast=showast,
                               debug_parser=debug_parser)

    deparsed.ast = deparsed.build_ast(tokens, customize)

    assert deparsed.ast == 'stmts', 'Should have parsed grammar start'

    del tokens  # save memory

    # convert leading '__doc__ = "..." into doc string
    assert deparsed.ast == 'stmts'
    deparsed.mod_globs = pysource.find_globals(deparsed.ast, set())

    # Just when you think we've forgotten about what we
    # were supposed to to: Generate source from AST!
    deparsed.gen_source(deparsed.ast, co.co_name, customize)

    deparsed.set_pos_info(deparsed.ast, 0, len(deparsed.text))
    deparsed.fixup_parents(deparsed.ast, None)

    for g in deparsed.mod_globs:
        deparsed.write('# global %s ## Warning: Unused global' % g)
    if deparsed.ERROR:
        raise deparsed.ERROR

    return deparsed
Esempio n. 4
0
def python_parser(version, co, out=sys.stdout, showasm=False,
                  parser_debug=PARSER_DEFAULT_DEBUG):
    assert iscode(co)
    from uncompyle6.scanner import get_scanner
    scanner = get_scanner(version)
    tokens, customize = scanner.disassemble(co)
    if showasm:
        for t in tokens:
            print(t)

    # For heavy grammar debugging
    parser_debug = {'rules': True, 'transition': True, 'reduce' : True}
    p = get_python_parser(version, parser_debug)
    return parse(p, tokens, customize)
Esempio n. 5
0
    def listcomprehension_walk3(self, node, iter_index, code_index=-5):
        """List comprehensions the way they are done in Python3.
        They're more other comprehensions, e.g. set comprehensions
        See if we can combine code.
        """
        p = self.prec
        self.prec = 27
        code = node[code_index].attr

        assert iscode(code)
        # Or Code3
        code = Code(code, self.scanner, self.currentclass)
        # assert isinstance(code, Code)

        ast = self.build_ast(code._tokens, code._customize)
        self.customize(code._customize)
        ast = ast[0][0][0][0][0]

        n = ast[iter_index]
        assert n == 'list_iter'
        # find innermost node
        while n == 'list_iter':  # list_iter
            n = n[0]  # recurse one step
            if n == 'list_for':
                designator = n[2]
                n = n[3]
            elif n == 'list_if':
                # FIXME: just a guess
                designator = n[1]

                n = n[2]
            elif n == 'list_ifnot':
                # FIXME: just a guess
                designator = n[1]
                n = n[2]
        assert n == 'lc_body', ast

        self.preorder(n[0])
        self.write(' for ')
        start = len(self.f.getvalue())
        self.preorder(designator)
        self.set_pos_info(designator, start, len(self.f.getvalue()))
        self.write(' in ')
        start = len(self.f.getvalue())
        node[-3].parent = node
        self.preorder(node[-3])
        self.set_pos_info(node[-3], start, len(self.f.getvalue()))
        # self.preorder(ast[iter_index])
        self.prec = p
Esempio n. 6
0
    def listcomprehension_walk3(self, node, iter_index, code_index=-5):
        """List comprehensions the way they are done in Python3.
        They're more other comprehensions, e.g. set comprehensions
        See if we can combine code.
        """
        p = self.prec
        self.prec = 27
        code = node[code_index].attr

        assert iscode(code)
        # Or Code3
        code = Code(code, self.scanner, self.currentclass)

        ast = self.build_ast(code._tokens, code._customize)
        self.customize(code._customize)
        ast = ast[0][0][0][0][0]

        n = ast[iter_index]
        assert n == 'list_iter'

        # find innermost node
        while n == 'list_iter':
            n = n[0] # recurse one step
            if   n == 'list_for':
                designator = n[2]
                n = n[3]
            elif n == 'list_if':
                # FIXME: just a guess
                designator = n[1]

                n = n[2]
            elif n == 'list_if_not':
                # FIXME: just a guess
                designator = n[1]
                n = n[2]
        assert n == 'lc_body', ast

        self.preorder(n[0])
        self.write(' for ')
        start = len(self.f.getvalue())
        self.preorder(designator)
        self.set_pos_info(designator, start, len(self.f.getvalue()))
        self.write(' in ')
        start = len(self.f.getvalue())
        node[-3].parent = node
        self.preorder(node[-3])
        self.set_pos_info(node[-3], start, len(self.f.getvalue()))
        # self.preorder(ast[iter_index])
        self.prec = p
Esempio n. 7
0
def python_parser(version,
                  co,
                  out=sys.stdout,
                  showasm=False,
                  parser_debug=PARSER_DEFAULT_DEBUG):
    assert iscode(co)
    from uncompyle6.scanner import get_scanner
    scanner = get_scanner(version)
    tokens, customize = scanner.disassemble(co)
    if showasm:
        for t in tokens:
            print(t)

    p = get_python_parser(version, parser_debug)
    return parse(p, tokens, customize)
Esempio n. 8
0
def deparse_code(version, co, out=StringIO(), showasm=False, showast=False,
                 showgrammar=False):

    assert iscode(co)
    # store final output stream for case of error
    scanner = get_scanner(version)

    tokens, customize = scanner.disassemble(co)

    tokens, customize = scanner.disassemble(co)
    if showasm:
        for t in tokens:
            print(t)

    debug_parser = dict(PARSER_DEFAULT_DEBUG)
    if showgrammar:
        debug_parser['reduce'] = showgrammar
        debug_parser['errorstack'] = True

    #  Build AST from disassembly.
    # deparsed = pysource.FragmentsWalker(out, scanner, showast=showast)
    deparsed = FragmentsWalker(version, scanner, showast=showast, debug_parser=debug_parser)

    deparsed.ast = deparsed.build_ast(tokens, customize)

    assert deparsed.ast == 'stmts', 'Should have parsed grammar start'

    del tokens # save memory

    # convert leading '__doc__ = "..." into doc string
    assert deparsed.ast == 'stmts'
    deparsed.mod_globs = pysource.find_globals(deparsed.ast, set())

    # Just when you think we've forgotten about what we
    # were supposed to to: Generate source from AST!
    deparsed.gen_source(deparsed.ast, co.co_name, customize)

    deparsed.set_pos_info(deparsed.ast, 0, len(deparsed.text))
    deparsed.fixup_parents(deparsed.ast, None)

    for g in deparsed.mod_globs:
        deparsed.write('# global %s ## Warning: Unused global' % g)
    if deparsed.ERROR:
        raise deparsed.ERROR

    return deparsed
Esempio n. 9
0
def disco(version, co, out=None):
    """
    diassembles and deparses a given code block 'co'
    """

    assert iscode(co)

    # store final output stream for case of error
    real_out = out or sys.stdout
    print('# Python %s' % version, file=real_out)
    if co.co_filename:
        print('# Embedded file name: %s' % co.co_filename, file=real_out)

    scanner = get_scanner(version)
    tokens, customize = scanner.disassemble(co)

    for t in tokens:
        print(t, file=real_out)
    print(file=out)
Esempio n. 10
0
    def comprehension_walk(self, node, iter_index, code_index=-5):
        p = self.prec
        self.prec = 27
        if hasattr(node[code_index], 'attr'):
            code = node[code_index].attr
        elif hasattr(node[1][1], 'attr'):
            code = node[1][1].attr
        else:
            assert False

        assert iscode(code)
        code = Code(code, self.scanner, self.currentclass)

        ast = self.build_ast(code._tokens, code._customize)
        self.customize(code._customize)
        ast = ast[0][0][0]

        n = ast[iter_index]
        assert n == 'comp_iter'
        # find innermost node
        while n == 'comp_iter':
            n = n[0] # recurse one step
            if   n == 'comp_for':	n = n[3]
            elif n == 'comp_if':	n = n[2]
            elif n == 'comp_ifnot': n = n[2]
        assert n == 'comp_body', ast

        self.preorder(n[0])
        self.write(' for ')
        start = len(self.f.getvalue())
        designator = ast[iter_index-1]
        self.preorder(designator)
        self.set_pos_info(ast[iter_index-1], start, len(self.f.getvalue()))
        self.write(' in ')
        start = len(self.f.getvalue())
        node[-3].parent = node
        self.preorder(node[-3])
        self.set_pos_info(node[-3], start, len(self.f.getvalue()))
        start = len(self.f.getvalue())
        self.preorder(ast[iter_index])
        self.set_pos_info(iter_index, start, len(self.f.getvalue()))
        self.prec = p
Esempio n. 11
0
def disco(version, co, out=None):
    """
    diassembles and deparses a given code block 'co'
    """

    assert iscode(co)

    # store final output stream for case of error
    real_out = out or sys.stdout
    print('# Python %s' % version, file=real_out)
    if co.co_filename:
        print('# Embedded file name: %s' % co.co_filename,
              file=real_out)

    scanner = get_scanner(version)
    tokens, customize = scanner.disassemble(co)

    for t in tokens:
        print(t, file=real_out)
    print(file=out)
Esempio n. 12
0
def uncompyle(version,
              co,
              out=None,
              showasm=False,
              showast=False,
              timestamp=None,
              showgrammar=False,
              code_objects={}):
    """
    disassembles and deparses a given code block 'co'
    """

    assert iscode(co)

    # store final output stream for case of error
    real_out = out or sys.stdout
    print('# Python %s (decompiled from Python %s)' %
          (version, PYTHON_VERSION),
          file=real_out)
    if co.co_filename:
        print('# Embedded file name: %s' % co.co_filename, file=real_out)
    if timestamp:
        print('# Compiled at: %s' % datetime.datetime.fromtimestamp(timestamp),
              file=real_out)

    try:
        pysource.deparse_code(version,
                              co,
                              out,
                              showasm,
                              showast,
                              showgrammar,
                              code_objects=code_objects)
    except pysource.SourceWalkerError as e:
        # deparsing failed
        if real_out != out:
            print(e, file=real_out)
        raise
Esempio n. 13
0
    def comprehension_walk(self, node, iter_index):
        p = self.prec
        self.prec = 27
        code = node[-5].attr

        assert iscode(co)
        code = Code(code, self.scanner, self.currentclass)
        # assert isinstance(code, Code)

        ast = self.build_ast(code._tokens, code._customize)
        self.customize(code._customize)
        ast = ast[0][0][0]

        n = ast[iter_index]
        assert n == 'comp_iter'
        # find innermost node
        while n == 'comp_iter':
            n = n[0]  # recurse one step
            if n == 'comp_for': n = n[3]
            elif n == 'comp_if': n = n[2]
            elif n == 'comp_ifnot': n = n[2]
        assert n == 'comp_body', ast

        self.preorder(n[0])
        self.write(' for ')
        start = len(self.f.getvalue())
        designator = ast[iter_index - 1]
        self.preorder(designator)
        self.set_pos_info(ast[iter_index - 1], start, len(self.f.getvalue()))
        self.write(' in ')
        start = len(self.f.getvalue())
        node[-3].parent = node
        self.preorder(node[-3])
        self.set_pos_info(node[-3], start, len(self.f.getvalue()))
        start = len(self.f.getvalue())
        self.preorder(ast[iter_index])
        self.set_pos_info(iter_index, start, len(self.f.getvalue()))
        self.prec = p
Esempio n. 14
0
    def disassemble(self, co, classname=None, code_objects={}):
        """
        Disassemble a code object, returning a list of 'Token'.
        The main part of this procedure is modelled after
        dis.disassemble().
        """

        # import dis; dis.disassemble(co) # DEBUG
        rv = []
        customize = {}
        Token = self.Token # shortcut
        self.code = array('B', co.co_code)

        for i in self.op_range(0, len(self.code)):
            if self.code[i] in (RETURN_VALUE, END_FINALLY):
                n = i + 1
        self.code = array('B', co.co_code[:n])

        self.prev = [0]
        # mapping addresses of instruction & argument
        for i in self.op_range(0, n):
            op = self.code[i]
            self.prev.append(i)
            if op >= HAVE_ARGUMENT:
                self.prev.append(i)
                self.prev.append(i)

        self.lines = []
        linetuple = namedtuple('linetuple', ['l_no', 'next'])

        j = 0

        # linestarts is a tuple of (offset, line number).
        # Turn that in a has that we can index
        linestarts = list(dis.findlinestarts(co))
        linestartoffsets = {}
        for offset, lineno in linestarts:
            linestartoffsets[offset] = lineno

        (prev_start_byte, prev_line_no) = linestarts[0]
        for (start_byte, line_no) in linestarts[1:]:
            while j < start_byte:
                self.lines.append(linetuple(prev_line_no, start_byte))
                j += 1
            prev_line_no = start_byte
        while j < n:
            self.lines.append(linetuple(prev_line_no, n))
            j+=1
        # self.lines contains (block,addrLastInstr)
        if classname:
            classname = '_' + classname.lstrip('_') + '__'

            def unmangle(name):
                if name.startswith(classname) and name[-2:] != '__':
                    return name[len(classname) - 2:]
                return name

            free = [ unmangle(name) for name in (co.co_cellvars + co.co_freevars) ]
            names = [ unmangle(name) for name in co.co_names ]
            varnames = [ unmangle(name) for name in co.co_varnames ]
        else:
            free = co.co_cellvars + co.co_freevars
            names = co.co_names
            varnames = co.co_varnames

        self.load_asserts = set()
        for i in self.op_range(0, n):
            if self.code[i] == PJIT and self.code[i+3] == LOAD_GLOBAL:
                if names[self.get_argument(i+3)] == 'AssertionError':
                    self.load_asserts.add(i+3)

        cf = self.find_jump_targets(self.code)
        # contains (code, [addrRefToCode])
        last_stmt = self.next_stmt[0]
        i = self.next_stmt[last_stmt]
        replace = {}
        while i < n-1:
            if self.lines[last_stmt].next > i:
                if self.code[last_stmt] == PRINT_ITEM:
                    if self.code[i] == PRINT_ITEM:
                        replace[i] = 'PRINT_ITEM_CONT'
                    elif self.code[i] == PRINT_NEWLINE:
                        replace[i] = 'PRINT_NEWLINE_CONT'
            last_stmt = i
            i = self.next_stmt[i]

        imports = self.all_instr(0, n, (IMPORT_NAME, IMPORT_FROM, IMPORT_STAR))
        if len(imports) > 1:
            last_import = imports[0]
            for i in imports[1:]:
                if self.lines[last_import].next > i:
                    if self.code[last_import] == IMPORT_NAME == self.code[i]:
                        replace[i] = 'IMPORT_NAME_CONT'
                last_import = i

        extended_arg = 0
        for offset in self.op_range(0, n):
            if offset in cf:
                k = 0
                for j in cf[offset]:
                    rv.append(Token('COME_FROM', None, repr(j),
                                    offset="%s_%d" % (offset, k)))
                    k += 1

            op = self.code[offset]
            op_name = opname[op]

            oparg = None; pattr = None
            if op >= HAVE_ARGUMENT:
                oparg = self.get_argument(offset) + extended_arg
                extended_arg = 0
                if op == EXTENDED_ARG:
                    extended_arg = oparg * scan.L65536
                    continue
                if op in hasconst:
                    const = co.co_consts[oparg]
                    if iscode(const):
                        oparg = const
                        if const.co_name == '<lambda>':
                            assert op_name == 'LOAD_CONST'
                            op_name = 'LOAD_LAMBDA'
                        elif const.co_name == '<genexpr>':
                            op_name = 'LOAD_GENEXPR'
                        elif const.co_name == '<dictcomp>':
                            op_name = 'LOAD_DICTCOMP'
                        elif const.co_name == '<setcomp>':
                            op_name = 'LOAD_SETCOMP'
                        # verify() uses 'pattr' for comparison, since 'attr'
                        # now holds Code(const) and thus can not be used
                        # for comparison (todo: think about changing this)
                        # pattr = 'code_object @ 0x%x %s->%s' %\
                        # (id(const), const.co_filename, const.co_name)
                        pattr = '<code_object ' + const.co_name + '>'
                    else:
                        pattr = const
                elif op in hasname:
                    pattr = names[oparg]
                elif op in hasjrel:
                    pattr = repr(offset + 3 + oparg)
                elif op in hasjabs:
                    pattr = repr(oparg)
                elif op in haslocal:
                    pattr = varnames[oparg]
                elif op in hascompare:
                    pattr = cmp_op[oparg]
                elif op in hasfree:
                    pattr = free[oparg]

            if op in (BUILD_LIST, BUILD_TUPLE, BUILD_SET, BUILD_SLICE,
                            UNPACK_SEQUENCE,
                            MAKE_FUNCTION, CALL_FUNCTION, MAKE_CLOSURE,
                            CALL_FUNCTION_VAR, CALL_FUNCTION_KW,
                            CALL_FUNCTION_VAR_KW, DUP_TOPX, RAISE_VARARGS
                            ):
                # CE - Hack for >= 2.5
                #      Now all values loaded via LOAD_CLOSURE are packed into
                #      a tuple before calling MAKE_CLOSURE.
                if op == BUILD_TUPLE and \
                    self.code[self.prev[offset]] == LOAD_CLOSURE:
                    continue
                else:
                    op_name = '%s_%d' % (op_name, oparg)
                    if op != BUILD_SLICE:
                        customize[op_name] = oparg
            elif op == JA:
                target = self.get_target(offset)
                if target < offset:
                    if offset in self.stmts and self.code[offset+3] not in (END_FINALLY, POP_BLOCK) \
                     and offset not in self.not_continue:
                        op_name = 'CONTINUE'
                    else:
                        op_name = 'JUMP_BACK'

            elif op == LOAD_GLOBAL:
                if offset in self.load_asserts:
                    op_name = 'LOAD_ASSERT'
            elif op == RETURN_VALUE:
                if offset in self.return_end_ifs:
                    op_name = 'RETURN_END_IF'

            if offset in linestartoffsets:
                linestart = linestartoffsets[offset]
            else:
                linestart = None

            if offset not in replace:
                rv.append(Token(op_name, oparg, pattr, offset, linestart))
            else:
                rv.append(Token(replace[offset], oparg, pattr, offset, linestart))
        return rv, customize
Esempio n. 15
0
    def make_function(self, node, isLambda, nested=1, code_index=-2):
        """Dump function defintion, doc string, and function body."""

        def build_param(ast, name, default):
            """build parameters:
                - handle defaults
                - handle format tuple parameters
            """
            if self.version < 3.0:
                # if formal parameter is a tuple, the paramater name
                # starts with a dot (eg. '.1', '.2')
                if name.startswith('.'):
                    # replace the name with the tuple-string
                    name = self.get_tuple_parameter(ast, name)
                    pass
                pass

            if default:
                if self.showast:
                    print()
                    print('--', name)
                    print(default)
                    print('--')
                    pass
                result = '%s=' % name
                old_last_finish = self.last_finish
                self.last_finish = len(result)
                value = self.traverse(default, indent='')
                self.last_finish = old_last_finish
                result += value
                if result[-2:] == '= ':	# default was 'LOAD_CONST None'
                    result += 'None'
                return result
            else:
                return name

        # node[-1] == MAKE_FUNCTION_n

        args_node = node[-1]
        if isinstance(args_node.attr, tuple):
            defparams = node[:args_node.attr[0]]
            pos_args, kw_args, annotate_args  = args_node.attr
        else:
            defparams = node[:args_node.attr]
            kw_args, annotate_args  = (0, 0)
            pos_args = args_node.attr
            pass

        code = node[code_index].attr

        assert iscode(code)
        code = Code(code, self.scanner, self.currentclass)

        # add defaults values to parameter names
        argc = code.co_argcount
        paramnames = list(code.co_varnames[:argc])

        # defaults are for last n parameters, thus reverse
        paramnames.reverse(); defparams.reverse()

        try:
            ast = self.build_ast(code._tokens,
                                 code._customize,
                                 isLambda = isLambda,
                                 noneInNames = ('None' in code.co_names))
        except ParserError as p:
            self.write(str(p))
            self.ERROR = p
            return

        # build parameters

        params = [build_param(ast, name, default) for
                  name, default in zip_longest(paramnames, defparams, fillvalue=None)]

        params.reverse() # back to correct order

        if 4 & code.co_flags:	# flag 2 -> variable number of args
            params.append('*%s' % code.co_varnames[argc])
            argc += 1
        if 8 & code.co_flags:	# flag 3 -> keyword args
            params.append('**%s' % code.co_varnames[argc])
            argc += 1

        # dump parameter list (with default values)
        indent = self.indent
        if isLambda:
            self.write("lambda ", ", ".join(params))
        else:
            self.write("(", ", ".join(params))
            # self.println(indent, '#flags:\t', int(code.co_flags))

        if kw_args > 0:
            if argc > 0:
                self.write(", *, ")
            else:
                self.write("*, ")
            for n in node:
                if n == 'pos_arg':
                    continue
                self.preorder(n)
                break
            pass

        if isLambda:
            self.write(": ")
        else:
            self.println("):")

        if len(code.co_consts)>0 and code.co_consts[0] is not None and not isLambda: # ugly
            # docstring exists, dump it
            self.print_docstring(indent, code.co_consts[0])

        code._tokens = None # save memory
        assert ast == 'stmts'

        all_globals = find_all_globals(ast, set())
        for g in ((all_globals & self.mod_globs) | find_globals(ast, set())):
            self.println(self.indent, 'global ', g)
        self.mod_globs -= all_globals
        rn = ('None' in code.co_names) and not find_none(ast)
        self.gen_source(ast, code.co_name, code._customize, isLambda=isLambda,
                          returnNone=rn)
        code._tokens = None; code._customize = None # save memory
Esempio n. 16
0
    def disassemble_generic(self, co, classname=None, code_objects={}):
        """
        Convert code object <co> into a sequence of tokens.

        The below is based on (an older version?) of Python dis.disassemble_bytes().
        """
        # Container for tokens
        tokens = []
        customize = {}
        self.code = code = array('B', co.co_code)
        codelen = len(code)
        self.build_lines_data(co)
        self.build_prev_op()
        self.code_objects = code_objects

        # self.lines contains (block,addrLastInstr)
        if classname:
            classname = '_' + classname.lstrip('_') + '__'

            def unmangle(name):
                if name.startswith(classname) and name[-2:] != '__':
                    return name[len(classname) - 2:]
                return name

            free = [ unmangle(name) for name in (co.co_cellvars + co.co_freevars) ]
            names = [ unmangle(name) for name in co.co_names ]
            varnames = [ unmangle(name) for name in co.co_varnames ]
        else:
            free = co.co_cellvars + co.co_freevars
            names = co.co_names
            varnames = co.co_varnames
            pass

        # Scan for assertions. Later we will
        # turn 'LOAD_GLOBAL' to 'LOAD_ASSERT' for those
        # assertions

        self.load_asserts = set()
        for i in self.op_range(0, codelen):
            if self.code[i] == POP_JUMP_IF_TRUE and self.code[i+3] == LOAD_GLOBAL:
                if names[self.get_argument(i+3)] == 'AssertionError':
                    self.load_asserts.add(i+3)

        # Get jump targets
        # Format: {target offset: [jump offsets]}
        jump_targets = self.find_jump_targets()

        # contains (code, [addrRefToCode])
        last_stmt = self.next_stmt[0]
        i = self.next_stmt[last_stmt]
        replace = {}

        imports = self.all_instr(0, codelen, (IMPORT_NAME, IMPORT_FROM, IMPORT_STAR))
        if len(imports) > 1:
            last_import = imports[0]
            for i in imports[1:]:
                if self.lines[last_import].next > i:
                    if self.code[last_import] == IMPORT_NAME == self.code[i]:
                        replace[i] = 'IMPORT_NAME_CONT'
                last_import = i

        # Initialize extended arg at 0. When extended arg op is encountered,
        # variable preserved for next cycle and added as arg for next op
        extended_arg = 0

        for offset in self.op_range(0, codelen):
            # Add jump target tokens
            if offset in jump_targets:
                jump_idx = 0
                for jump_offset in jump_targets[offset]:
                    tokens.append(Token('COME_FROM', None, repr(jump_offset),
                                        offset='%s_%s' % (offset, jump_idx)))
                    jump_idx += 1
                    pass
                pass

            op = code[offset]
            op_name = op3.opname[op]

            oparg = None; pattr = None

            if op >= op3.HAVE_ARGUMENT:
                oparg = self.get_argument(offset) + extended_arg
                extended_arg = 0
                if op == op3.EXTENDED_ARG:
                    extended_arg = oparg * scan.L65536
                    continue
                if op in op3.hasconst:
                    const = co.co_consts[oparg]
                    if not PYTHON3 and isinstance(const, str):
                        if const in code_objects:
                            const = code_objects[const]
                    if iscode(const):
                        oparg = const
                        if const.co_name == '<lambda>':
                            assert op_name == 'LOAD_CONST'
                            op_name = 'LOAD_LAMBDA'
                        elif const.co_name == '<genexpr>':
                            op_name = 'LOAD_GENEXPR'
                        elif const.co_name == '<dictcomp>':
                            op_name = 'LOAD_DICTCOMP'
                        elif const.co_name == '<setcomp>':
                            op_name = 'LOAD_SETCOMP'
                        elif const.co_name == '<listcomp>':
                            op_name = 'LOAD_LISTCOMP'
                        # verify() uses 'pattr' for comparison, since 'attr'
                        # now holds Code(const) and thus can not be used
                        # for comparison (todo: think about changing this)
                        # pattr = 'code_object @ 0x%x %s->%s' %\
                        # (id(const), const.co_filename, const.co_name)
                        pattr = '<code_object ' + const.co_name + '>'
                    else:
                        pattr = const
                elif op in op3.hasname:
                    pattr = names[oparg]
                elif op in op3.hasjrel:
                    pattr = repr(offset + 3 + oparg)
                elif op in op3.hasjabs:
                    pattr = repr(oparg)
                elif op in op3.haslocal:
                    pattr = varnames[oparg]
                elif op in op3.hascompare:
                    pattr = op3.cmp_op[oparg]
                elif op in op3.hasfree:
                    pattr = free[oparg]

            if op_name in ('BUILD_LIST', 'BUILD_TUPLE', 'BUILD_SET', 'BUILD_SLICE',
                            'UNPACK_SEQUENCE',
                            'MAKE_FUNCTION', 'CALL_FUNCTION', 'MAKE_CLOSURE',
                            'CALL_FUNCTION_VAR', 'CALL_FUNCTION_KW',
                            'CALL_FUNCTION_VAR_KW', 'RAISE_VARARGS'
                            ):
                # CALL_FUNCTION OP renaming is done as a custom rule in parse3
                if op_name not in ('CALL_FUNCTION', 'CALL_FUNCTION_VAR',
                                   'CALL_FUNCTION_VAR_KW', 'CALL_FUNCTION_KW',
                                   ):
                    op_name = '%s_%d' % (op_name, oparg)
                    if op_name != 'BUILD_SLICE':
                        customize[op_name] = oparg
            elif op_name == 'JUMP_ABSOLUTE':
                target = self.get_target(offset)
                if target < offset:
                    if (offset in self.stmts
                        and self.code[offset+3] not in (END_FINALLY, POP_BLOCK)
                        and offset not in self.not_continue):
                        op_name = 'CONTINUE'
                    else:
                        op_name = 'JUMP_BACK'

            elif op_name == 'LOAD_GLOBAL':
                if offset in self.load_asserts:
                    op_name = 'LOAD_ASSERT'
            elif op_name == 'RETURN_VALUE':
                if offset in self.return_end_ifs:
                    op_name = 'RETURN_END_IF'

            if offset in self.linestarts:
                linestart = self.linestarts[offset]
            else:
                linestart = None

            if offset not in replace:
                tokens.append(Token(op_name, oparg, pattr, offset, linestart))
            else:
                tokens.append(Token(replace[offset], oparg, pattr, offset, linestart))
            pass
        return tokens, customize
Esempio n. 17
0
    def disassemble_built_in(self, co, classname=None, code_objects={}):
        # Container for tokens
        tokens = []
        customize = {}
        self.code = array('B', co.co_code)
        self.build_lines_data(co)
        self.build_prev_op()

        # Get jump targets
        # Format: {target offset: [jump offsets]}
        jump_targets = self.find_jump_targets()
        bytecode = dis.Bytecode(co)

        # self.lines contains (block,addrLastInstr)
        if classname:
            classname = '_' + classname.lstrip('_') + '__'

            def unmangle(name):
                if name.startswith(classname) and name[-2:] != '__':
                    return name[len(classname) - 2:]
                return name

            # free = [ unmangle(name) for name in (co.co_cellvars + co.co_freevars) ]
            # names = [ unmangle(name) for name in co.co_names ]
            # varnames = [ unmangle(name) for name in co.co_varnames ]
        else:
            # free = co.co_cellvars + co.co_freevars
            # names = co.co_names
            # varnames = co.co_varnames
            pass

        # Scan for assertions. Later we will
        # turn 'LOAD_GLOBAL' to 'LOAD_ASSERT' for those
        # assertions
        self.load_asserts = set()
        bs = list(bytecode)
        n = len(bs)
        for i in range(n):
            inst = bs[i]
            if inst.opname == 'POP_JUMP_IF_TRUE' and i + 1 < n:
                next_inst = bs[i + 1]
                if (next_inst.opname == 'LOAD_GLOBAL'
                        and next_inst.argval == 'AssertionError'):
                    self.load_asserts.add(next_inst.offset)

        for inst in bytecode:
            if inst.offset in jump_targets:
                jump_idx = 0
                for jump_offset in jump_targets[inst.offset]:
                    tokens.append(
                        Token('COME_FROM',
                              None,
                              repr(jump_offset),
                              offset='%s_%s' % (inst.offset, jump_idx)))
                    jump_idx += 1
                    pass
                pass

            pattr = inst.argrepr
            opname = inst.opname

            if opname in ['LOAD_CONST']:
                const = inst.argval
                if iscode(const):
                    if const.co_name == '<lambda>':
                        opname = 'LOAD_LAMBDA'
                    elif const.co_name == '<genexpr>':
                        opname = 'LOAD_GENEXPR'
                    elif const.co_name == '<dictcomp>':
                        opname = 'LOAD_DICTCOMP'
                    elif const.co_name == '<setcomp>':
                        opname = 'LOAD_SETCOMP'
                    elif const.co_name == '<listcomp>':
                        opname = 'LOAD_LISTCOMP'
                    # verify() uses 'pattr' for comparison, since 'attr'
                    # now holds Code(const) and thus can not be used
                    # for comparison (todo: think about changing this)
                    # pattr = 'code_object @ 0x%x %s->%s' %\
                    # (id(const), const.co_filename, const.co_name)
                    pattr = '<code_object ' + const.co_name + '>'
                else:
                    pattr = const
                    pass
            elif opname in ('BUILD_LIST', 'BUILD_TUPLE', 'BUILD_SET',
                            'BUILD_SLICE', 'UNPACK_SEQUENCE', 'MAKE_FUNCTION',
                            'MAKE_CLOSURE', 'DUP_TOPX', 'RAISE_VARARGS'):
                # if opname == 'BUILD_TUPLE' and \
                #     self.code[self.prev[offset]] == LOAD_CLOSURE:
                #     continue
                # else:
                #     op_name = '%s_%d' % (op_name, oparg)
                #     if opname != BUILD_SLICE:
                #         customize[op_name] = oparg
                opname = '%s_%d' % (opname, inst.argval)
                if inst.opname != 'BUILD_SLICE':
                    customize[opname] = inst.argval

            elif opname == 'JUMP_ABSOLUTE':
                pattr = inst.argval
                target = self.get_target(inst.offset)
                if target < inst.offset:
                    if (inst.offset in self.stmts
                            and self.code[inst.offset + 3]
                            not in (END_FINALLY, POP_BLOCK)
                            and offset not in self.not_continue):
                        opname = 'CONTINUE'
                    else:
                        opname = 'JUMP_BACK'

            elif inst.offset in self.load_asserts:
                opname = 'LOAD_ASSERT'

            tokens.append(
                Token(
                    type_=opname,
                    attr=inst.argval,
                    pattr=pattr,
                    offset=inst.offset,
                    linestart=inst.starts_line,
                ))
            pass
        return tokens, {}
Esempio n. 18
0
    def disassemble(self, co, classname=None, code_objects={}):
        """
        Disassemble a code object, returning a list of 'Token'.
        The main part of this procedure is modelled after
        dis.disassemble().
        """
        rv = []
        customize = {}
        Token = self.Token  # shortcut
        self.code = array('B', co.co_code)

        for i in self.op_range(0, len(self.code)):
            if self.code[i] in (RETURN_VALUE, END_FINALLY):
                n = i + 1
        self.code = array('B', co.co_code[:n])

        self.prev = [0]
        # mapping addresses of instruction & argument
        for i in self.op_range(0, n):
            op = self.code[i]
            self.prev.append(i)
            if op >= HAVE_ARGUMENT:
                self.prev.append(i)
                self.prev.append(i)

        self.lines = []
        linetuple = namedtuple('linetuple', ['l_no', 'next'])

        j = 0

        # linestarts is a tuple of (offset, line number).
        # Turn that in a has that we can index
        linestarts = list(dis.findlinestarts(co))
        linestartoffsets = {}
        for offset, lineno in linestarts:
            linestartoffsets[offset] = lineno

        (prev_start_byte, prev_line_no) = linestarts[0]
        for (start_byte, line_no) in linestarts[1:]:
            while j < start_byte:
                self.lines.append(linetuple(prev_line_no, start_byte))
                j += 1
            prev_line_no = start_byte
        while j < n:
            self.lines.append(linetuple(prev_line_no, n))
            j += 1
        # self.lines contains (block,addrLastInstr)
        if classname:
            classname = '_' + classname.lstrip('_') + '__'

            def unmangle(name):
                if name.startswith(classname) and name[-2:] != '__':
                    return name[len(classname) - 2:]
                return name

            free = [
                unmangle(name) for name in (co.co_cellvars + co.co_freevars)
            ]
            names = [unmangle(name) for name in co.co_names]
            varnames = [unmangle(name) for name in co.co_varnames]
        else:
            free = co.co_cellvars + co.co_freevars
            names = co.co_names
            varnames = co.co_varnames

        self.load_asserts = set()
        for i in self.op_range(0, n):
            if self.code[i] == PJIT and self.code[i + 3] == LOAD_GLOBAL:
                if names[self.get_argument(i + 3)] == 'AssertionError':
                    self.load_asserts.add(i + 3)

        cf = self.find_jump_targets(self.code)
        # contains (code, [addrRefToCode])
        last_stmt = self.next_stmt[0]
        i = self.next_stmt[last_stmt]
        replace = {}
        while i < n - 1:
            if self.lines[last_stmt].next > i:
                if self.code[last_stmt] == PRINT_ITEM:
                    if self.code[i] == PRINT_ITEM:
                        replace[i] = 'PRINT_ITEM_CONT'
                    elif self.code[i] == PRINT_NEWLINE:
                        replace[i] = 'PRINT_NEWLINE_CONT'
            last_stmt = i
            i = self.next_stmt[i]

        imports = self.all_instr(0, n, (IMPORT_NAME, IMPORT_FROM, IMPORT_STAR))
        if len(imports) > 1:
            last_import = imports[0]
            for i in imports[1:]:
                if self.lines[last_import].next > i:
                    if self.code[last_import] == IMPORT_NAME == self.code[i]:
                        replace[i] = 'IMPORT_NAME_CONT'
                last_import = i

        extended_arg = 0
        for offset in self.op_range(0, n):
            if offset in cf:
                k = 0
                for j in cf[offset]:
                    rv.append(
                        Token('COME_FROM',
                              None,
                              repr(j),
                              offset="%s_%d" % (offset, k)))
                    k += 1

            op = self.code[offset]
            op_name = opname[op]

            oparg = None
            pattr = None
            if op >= HAVE_ARGUMENT:
                oparg = self.get_argument(offset) + extended_arg
                extended_arg = 0
                if op == EXTENDED_ARG:
                    extended_arg = oparg * scan.L65536
                    continue
                if op in hasconst:
                    const = co.co_consts[oparg]
                    if iscode(const):
                        oparg = const
                        if const.co_name == '<lambda>':
                            assert op_name == 'LOAD_CONST'
                            op_name = 'LOAD_LAMBDA'
                        elif const.co_name == '<genexpr>':
                            op_name = 'LOAD_GENEXPR'
                        elif const.co_name == '<dictcomp>':
                            op_name = 'LOAD_DICTCOMP'
                        elif const.co_name == '<setcomp>':
                            op_name = 'LOAD_SETCOMP'
                        # verify() uses 'pattr' for comparison, since 'attr'
                        # now holds Code(const) and thus can not be used
                        # for comparison (todo: think about changing this)
                        # pattr = 'code_object @ 0x%x %s->%s' %\
                        # (id(const), const.co_filename, const.co_name)
                        pattr = '<code_object ' + const.co_name + '>'
                    else:
                        pattr = const
                elif op in hasname:
                    pattr = names[oparg]
                elif op in hasjrel:
                    pattr = repr(offset + 3 + oparg)
                elif op in hasjabs:
                    pattr = repr(oparg)
                elif op in haslocal:
                    pattr = varnames[oparg]
                elif op in hascompare:
                    pattr = cmp_op[oparg]
                elif op in hasfree:
                    pattr = free[oparg]

            if op in (BUILD_LIST, BUILD_TUPLE, BUILD_SET, BUILD_SLICE,
                      UNPACK_SEQUENCE, MAKE_FUNCTION, CALL_FUNCTION,
                      MAKE_CLOSURE, CALL_FUNCTION_VAR, CALL_FUNCTION_KW,
                      CALL_FUNCTION_VAR_KW, DUP_TOPX, RAISE_VARARGS):
                # CE - Hack for >= 2.5
                #      Now all values loaded via LOAD_CLOSURE are packed into
                #      a tuple before calling MAKE_CLOSURE.
                if op == BUILD_TUPLE and \
                    self.code[self.prev[offset]] == LOAD_CLOSURE:
                    continue
                else:
                    op_name = '%s_%d' % (op_name, oparg)
                    if op != BUILD_SLICE:
                        customize[op_name] = oparg
            elif op == JA:
                target = self.get_target(offset)
                if target < offset:
                    if offset in self.stmts and self.code[offset+3] not in (END_FINALLY, POP_BLOCK) \
                     and offset not in self.not_continue:
                        op_name = 'CONTINUE'
                    else:
                        op_name = 'JUMP_BACK'

            elif op == LOAD_GLOBAL:
                if offset in self.load_asserts:
                    op_name = 'LOAD_ASSERT'
            elif op == RETURN_VALUE:
                if offset in self.return_end_ifs:
                    op_name = 'RETURN_END_IF'

            if offset in linestartoffsets:
                linestart = linestartoffsets[offset]
            else:
                linestart = None

            if offset not in replace:
                rv.append(Token(op_name, oparg, pattr, offset, linestart))
            else:
                rv.append(
                    Token(replace[offset], oparg, pattr, offset, linestart))
        return rv, customize
Esempio n. 19
0
    def disassemble_built_in(self, co, classname=None,
                             code_objects={}):
        # Container for tokens
        tokens = []
        customize = {}
        self.code = array('B', co.co_code)
        self.build_lines_data(co)
        self.build_prev_op()

        # Get jump targets
        # Format: {target offset: [jump offsets]}
        jump_targets = self.find_jump_targets()
        bytecode = dis.Bytecode(co)

        # self.lines contains (block,addrLastInstr)
        if classname:
            classname = '_' + classname.lstrip('_') + '__'

            def unmangle(name):
                if name.startswith(classname) and name[-2:] != '__':
                    return name[len(classname) - 2:]
                return name

            # free = [ unmangle(name) for name in (co.co_cellvars + co.co_freevars) ]
            # names = [ unmangle(name) for name in co.co_names ]
            # varnames = [ unmangle(name) for name in co.co_varnames ]
        else:
            # free = co.co_cellvars + co.co_freevars
            # names = co.co_names
            # varnames = co.co_varnames
            pass

        # Scan for assertions. Later we will
        # turn 'LOAD_GLOBAL' to 'LOAD_ASSERT' for those
        # assertions
        self.load_asserts = set()
        bs = list(bytecode)
        n = len(bs)
        for i in range(n):
            inst = bs[i]
            if inst.opname == 'POP_JUMP_IF_TRUE' and  i+1 < n:
                next_inst = bs[i+1]
                if (next_inst.opname == 'LOAD_GLOBAL' and
                    next_inst.argval == 'AssertionError'):
                    self.load_asserts.add(next_inst.offset)

        for inst in bytecode:
            if inst.offset in jump_targets:
                jump_idx = 0
                for jump_offset in jump_targets[inst.offset]:
                    tokens.append(Token('COME_FROM', None, repr(jump_offset),
                                        offset='%s_%s' % (inst.offset, jump_idx)))
                    jump_idx += 1
                    pass
                pass

            pattr =  inst.argrepr
            opname = inst.opname

            if opname in ['LOAD_CONST']:
                const = inst.argval
                if iscode(const):
                    if const.co_name == '<lambda>':
                        opname = 'LOAD_LAMBDA'
                    elif const.co_name == '<genexpr>':
                        opname = 'LOAD_GENEXPR'
                    elif const.co_name == '<dictcomp>':
                        opname = 'LOAD_DICTCOMP'
                    elif const.co_name == '<setcomp>':
                        opname = 'LOAD_SETCOMP'
                    elif const.co_name == '<listcomp>':
                        opname = 'LOAD_LISTCOMP'
                    # verify() uses 'pattr' for comparison, since 'attr'
                    # now holds Code(const) and thus can not be used
                    # for comparison (todo: think about changing this)
                    # pattr = 'code_object @ 0x%x %s->%s' %\
                    # (id(const), const.co_filename, const.co_name)
                    pattr = '<code_object ' + const.co_name + '>'
                else:
                    pattr = const
                    pass
            elif opname in ('BUILD_LIST', 'BUILD_TUPLE', 'BUILD_SET', 'BUILD_SLICE',
                            'UNPACK_SEQUENCE',
                            'MAKE_FUNCTION', 'MAKE_CLOSURE',
                            'DUP_TOPX', 'RAISE_VARARGS'
                            ):
                # if opname == 'BUILD_TUPLE' and \
                #     self.code[self.prev[offset]] == LOAD_CLOSURE:
                #     continue
                # else:
                #     op_name = '%s_%d' % (op_name, oparg)
                #     if opname != BUILD_SLICE:
                #         customize[op_name] = oparg
                opname = '%s_%d' % (opname, inst.argval)
                if inst.opname != 'BUILD_SLICE':
                    customize[opname] = inst.argval

            elif opname == 'JUMP_ABSOLUTE':
                pattr = inst.argval
                target = self.get_target(inst.offset)
                if target < inst.offset:
                    if (inst.offset in self.stmts and
                        self.code[inst.offset+3] not in (END_FINALLY, POP_BLOCK)
                        and offset not in self.not_continue):
                        opname = 'CONTINUE'
                    else:
                        opname = 'JUMP_BACK'

            elif inst.offset in self.load_asserts:
                opname = 'LOAD_ASSERT'

            tokens.append(
                Token(
                    type_ = opname,
                    attr = inst.argval,
                    pattr = pattr,
                    offset = inst.offset,
                    linestart = inst.starts_line,
                    )
                )
            pass
        return tokens, {}
Esempio n. 20
0
    def disassemble(self, co, classname=None,
                    code_objects={}):

        # imoprt dis; dis.disassemble(co) # DEBUG

        # Container for tokens
        tokens = []

        customize = {}
        self.code = array('B', co.co_code)
        self.build_lines_data(co)
        self.build_prev_op()

        bytecode = dis35.Bytecode(co)

        # self.lines contains (block,addrLastInstr)
        if classname:
            classname = '_' + classname.lstrip('_') + '__'

            def unmangle(name):
                if name.startswith(classname) and name[-2:] != '__':
                    return name[len(classname) - 2:]
                return name
        else:
            pass

        # Scan for assertions. Later we will
        # turn 'LOAD_GLOBAL' to 'LOAD_ASSERT' for those
        # assertions
        self.load_asserts = set()
        bs = list(bytecode)
        n = len(bs)
        for i in range(n):
            inst = bs[i]

            if inst.opname == 'POP_JUMP_IF_TRUE' and i+1 < n:
                next_inst = bs[i+1]
                if (next_inst.opname == 'LOAD_GLOBAL' and
                    next_inst.argval == 'AssertionError'):
                    self.load_asserts.add(next_inst.offset)

        # Get jump targets
        # Format: {target offset: [jump offsets]}
        jump_targets = self.find_jump_targets()

        for inst in bytecode:
            if inst.offset in jump_targets:
                jump_idx = 0
                for jump_offset in jump_targets[inst.offset]:
                    tokens.append(Token('COME_FROM', None, repr(jump_offset),
                                        offset='%s_%s' % (inst.offset, jump_idx)))
                    jump_idx += 1
                    pass
                pass

            pattr =  inst.argrepr
            opname = inst.opname

            if opname in ['LOAD_CONST']:
                const = inst.argval
                if iscode(const):
                    if const.co_name == '<lambda>':
                        opname = 'LOAD_LAMBDA'
                    elif const.co_name == '<genexpr>':
                        opname = 'LOAD_GENEXPR'
                    elif const.co_name == '<dictcomp>':
                        opname = 'LOAD_DICTCOMP'
                    elif const.co_name == '<setcomp>':
                        opname = 'LOAD_SETCOMP'
                    elif const.co_name == '<listcomp>':
                        opname = 'LOAD_LISTCOMP'
                    # verify() uses 'pattr' for comparison, since 'attr'
                    # now holds Code(const) and thus can not be used
                    # for comparison (todo: think about changing this)
                    # pattr = 'code_object @ 0x%x %s->%s' %\
                    # (id(const), const.co_filename, const.co_name)
                    pattr = '<code_object ' + const.co_name + '>'
                else:
                    pattr = const
                    pass
            elif opname == 'MAKE_FUNCTION':
                argc = inst.argval
                attr = ((argc & 0xFF), (argc >> 8) & 0xFF, (argc >> 16) & 0x7FFF)
                pos_args, name_pair_args, annotate_args = attr
                if name_pair_args > 0:
                    opname = 'MAKE_FUNCTION_N%d' % name_pair_args
                    pass
                if annotate_args > 0:
                    opname = '%s_A_%d' % [op_name, annotate_args]
                    pass
                opname = '%s_%d' % (opname, pos_args)
                pattr = ("%d positional, %d keyword pair, %d annotated" %
                             (pos_args, name_pair_args, annotate_args))
                tokens.append(
                    Token(
                        type_ = opname,
                        attr = (pos_args, name_pair_args, annotate_args),
                        pattr = pattr,
                        offset = inst.offset,
                        linestart = inst.starts_line)
                    )
                continue
            elif opname in ('BUILD_LIST', 'BUILD_TUPLE', 'BUILD_SET', 'BUILD_SLICE',
                            'BUILD_MAP', 'UNPACK_SEQUENCE', 'MAKE_CLOSURE',
                            'RAISE_VARARGS'
                            ):
                pos_args = inst.argval
                if inst.opname != 'BUILD_SLICE':
                    customize[opname] = pos_args
                    pass
                opname = '%s_%d' % (opname, pos_args)
            elif opname == 'JUMP_ABSOLUTE':
                pattr = inst.argval
                target = self.get_target(inst.offset)
                if target < inst.offset:
                    if (inst.offset in self.stmts and
                        self.code[inst.offset+3] not in (END_FINALLY, POP_BLOCK)
                        and inst.offset not in self.not_continue):
                        opname = 'CONTINUE'
                    else:
                        opname = 'JUMP_BACK'

            elif inst.offset in self.load_asserts:
                opname = 'LOAD_ASSERT'

            tokens.append(
                Token(
                    type_ = opname,
                    attr = inst.argval,
                    pattr = pattr,
                    offset = inst.offset,
                    linestart = inst.starts_line,
                    )
                )
            pass
        return tokens, {}
Esempio n. 21
0
def cmp_code_objects(version, code_obj1, code_obj2, name=''):
    """
    Compare two code-objects.

    This is the main part of this module.
    """
    # print code_obj1, type(code_obj2)
    assert iscode(code_obj1), \
      "cmp_code_object first object type is %s, not code" % type(code_obj1)
    assert iscode(code_obj2), \
      "cmp_code_object second object type is %s, not code" % type(code_obj2)
    # print dir(code_obj1)
    if isinstance(code_obj1, object):
        # new style classes (Python 2.2)
        # assume _both_ code objects to be new stle classes
        assert dir(code_obj1) == dir(code_obj2)
    else:
        # old style classes
        assert dir(code_obj1) == code_obj1.__members__
        assert dir(code_obj2) == code_obj2.__members__
        assert code_obj1.__members__ == code_obj2.__members__

    if name == '__main__':
        name = code_obj1.co_name
    else:
        name = '%s.%s' % (name, code_obj1.co_name)
        if name == '.?': name = '__main__'

    if isinstance(code_obj1, object) and code_equal(code_obj1, code_obj2):
        # use the new style code-classes' __cmp__ method, which
        # should be faster and more sophisticated
        # if this compare fails, we use the old routine to
        # find out, what exactly is nor equal
        # if this compare succeds, simply return
        # return
        pass

    if isinstance(code_obj1, object):
        members = [x for x in dir(code_obj1) if x.startswith('co_')]
    else:
        members = dir(code_obj1)
    members.sort()  # ; members.reverse()

    tokens1 = None
    for member in members:
        if member in __IGNORE_CODE_MEMBERS__:
            pass
        elif member == 'co_code':
            if version == 2.5:
                import uncompyle6.scanners.scanner25 as scan
                scanner = scan.Scanner25(version)
            elif version == 2.6:
                import uncompyle6.scanners.scanner26 as scan
                scanner = scan.Scanner26(version)
            elif version == 2.7:
                import uncompyle6.scanners.scanner27 as scan
                scanner = scan.Scanner27(version)
            elif version == 3.2:
                import uncompyle6.scanners.scanner32 as scan
                scanner = scan.Scanner32(version)
            elif version == 3.3:
                import uncompyle6.scanners.scanner33 as scan
                scanner = scan.Scanner33(version)
            elif version == 3.4:
                import uncompyle6.scanners.scanner34 as scan
                scanner = scan.Scanner34(version)

            global JUMP_OPs
            JUMP_OPs = list(scan.JUMP_OPs) + ['JUMP_BACK']

            # use changed Token class
            #   we (re)set this here to save exception handling,
            #   which would get 'unubersichtlich'
            scanner.setTokenClass(Token)
            try:
                # disassemble both code-objects
                tokens1, customize = scanner.disassemble(code_obj1)
                del customize # save memory
                tokens2, customize = scanner.disassemble(code_obj2)
                del customize # save memory
            finally:
                scanner.resetTokenClass() # restore Token class

            targets1 = dis.findlabels(code_obj1.co_code)
            tokens1 = [t for t in tokens1 if t.type != 'COME_FROM']
            tokens2 = [t for t in tokens2 if t.type != 'COME_FROM']

            i1 = 0; i2 = 0
            offset_map = {}; check_jumps = {}
            while i1 < len(tokens1):
                if i2 >= len(tokens2):
                    if len(tokens1) == len(tokens2) + 2 \
                          and tokens1[-1].type == 'RETURN_VALUE' \
                          and tokens1[-2].type == 'LOAD_CONST' \
                          and tokens1[-2].pattr is None \
                          and tokens1[-3].type == 'RETURN_VALUE':
                        break
                    else:
                        raise CmpErrorCodeLen(name, tokens1, tokens2)

                offset_map[tokens1[i1].offset] = tokens2[i2].offset

                for idx1, idx2, offset2 in check_jumps.get(tokens1[i1].offset, []):
                    if offset2 != tokens2[i2].offset:
                        raise CmpErrorCode(name, tokens1[idx1].offset, tokens1[idx1],
                                   tokens2[idx2], tokens1, tokens2)

                if tokens1[i1].type != tokens2[i2].type:
                    if tokens1[i1].type == 'LOAD_CONST' == tokens2[i2].type:
                        i = 1
                        while tokens1[i1+i].type == 'LOAD_CONST':
                            i += 1
                        if tokens1[i1+i].type.startswith(('BUILD_TUPLE', 'BUILD_LIST')) \
                              and i == int(tokens1[i1+i].type.split('_')[-1]):
                            t = tuple([ elem.pattr for elem in tokens1[i1:i1+i] ])
                            if t != tokens2[i2].pattr:
                                raise CmpErrorCode(name, tokens1[i1].offset, tokens1[i1],
                                           tokens2[i2], tokens1, tokens2)
                            i1 += i + 1
                            i2 += 1
                            continue
                        elif i == 2 and tokens1[i1+i].type == 'ROT_TWO' and tokens2[i2+1].type == 'UNPACK_SEQUENCE_2':
                            i1 += 3
                            i2 += 2
                            continue
                        elif i == 2 and tokens1[i1+i].type in BIN_OP_FUNCS:
                            f = BIN_OP_FUNCS[tokens1[i1+i].type]
                            if f(tokens1[i1].pattr, tokens1[i1+1].pattr) == tokens2[i2].pattr:
                                i1 += 3
                                i2 += 1
                                continue
                    elif tokens1[i1].type == 'UNARY_NOT':
                        if tokens2[i2].type == 'POP_JUMP_IF_TRUE':
                            if tokens1[i1+1].type == 'POP_JUMP_IF_FALSE':
                                i1 += 2
                                i2 += 1
                                continue
                        elif tokens2[i2].type == 'POP_JUMP_IF_FALSE':
                            if tokens1[i1+1].type == 'POP_JUMP_IF_TRUE':
                                i1 += 2
                                i2 += 1
                                continue
                    elif tokens1[i1].type in ('JUMP_FORWARD', 'JUMP_BACK') \
                          and tokens1[i1-1].type == 'RETURN_VALUE' \
                          and tokens2[i2-1].type in ('RETURN_VALUE', 'RETURN_END_IF') \
                          and int(tokens1[i1].offset) not in targets1:
                        i1 += 1
                        continue
                    elif tokens1[i1].type == 'JUMP_FORWARD' and tokens2[i2].type == 'JUMP_BACK' \
                          and tokens1[i1+1].type == 'JUMP_BACK' and tokens2[i2+1].type == 'JUMP_BACK' \
                          and int(tokens1[i1].pattr) == int(tokens1[i1].offset) + 3:
                        if int(tokens1[i1].pattr) == int(tokens1[i1+1].offset):
                            i1 += 2
                            i2 += 2
                            continue

                    raise CmpErrorCode(name, tokens1[i1].offset, tokens1[i1],
                               tokens2[i2], tokens1, tokens2)
                elif tokens1[i1].type in JUMP_OPs and tokens1[i1].pattr != tokens2[i2].pattr:
                    dest1 = int(tokens1[i1].pattr)
                    dest2 = int(tokens2[i2].pattr)
                    if tokens1[i1].type == 'JUMP_BACK':
                        if offset_map[dest1] != dest2:
                            raise CmpErrorCode(name, tokens1[i1].offset, tokens1[i1],
                                       tokens2[i2], tokens1, tokens2)
                    else:
                        # import pdb; pdb.set_trace()
                        if dest1 in check_jumps:
                            check_jumps[dest1].append((i1, i2, dest2))
                        else:
                            check_jumps[dest1] = [(i1, i2, dest2)]

                i1 += 1
                i2 += 1
            del tokens1, tokens2 # save memory
        elif member == 'co_consts':
            # partial optimization can make the co_consts look different,
            #   so we'll just compare the code consts
            codes1 = ( c for c in code_obj1.co_consts if hasattr(c, 'co_consts') )
            codes2 = ( c for c in code_obj2.co_consts if hasattr(c, 'co_consts') )

            for c1, c2 in zip(codes1, codes2):
                cmp_code_objects(version, c1, c2, name=name)
        else:
            # all other members must be equal
            if getattr(code_obj1, member) != getattr(code_obj2, member):
                raise CmpErrorMember(name, member,
                             getattr(code_obj1, member),
                             getattr(code_obj2, member))