Example #1
0
    def is_async_fn(node):
        code_node = node[0][0]
        for n in node[0]:
            if hasattr(n, "attr") and iscode(n.attr):
                code_node = n
                break
            pass
        pass

        is_code = hasattr(code_node, "attr") and iscode(code_node.attr)
        return is_code and co_flags_is_async(code_node.attr.co_flags)
Example #2
0
    def is_async_fn(node):
        code_node = node[0][0]
        for n in node[0]:
            if hasattr(n, "attr") and iscode(n.attr):
                code_node = n
                break
            pass
        pass

        is_code = hasattr(code_node, "attr") and iscode(code_node.attr)
        return is_code and (code_node.attr.co_flags
                            & (COMPILER_FLAG_BIT["COROUTINE"]
                               | COMPILER_FLAG_BIT["ITERABLE_COROUTINE"]
                               | COMPILER_FLAG_BIT["ASYNC_GENERATOR"]))
Example #3
0
def uncompyle(version, co, out=None, showasm=False, showast=False,
              timestamp=None, showgrammar=False, code_objects={}):
    """
    disassembles and deparses a given code block 'co'
    """

    assert iscode(co)

    # store final output stream for case of error
    real_out = out or sys.stdout
    print('# Python bytecode %s (decompiled from Python %s)' % (version, PYTHON_VERSION),
          file=real_out)
    if co.co_filename:
        print('# Embedded file name: %s' % co.co_filename,
              file=real_out)
    if timestamp:
        print('# Compiled at: %s' % datetime.datetime.fromtimestamp(timestamp),
              file=real_out)

    try:
        pysource.deparse_code(version, co, out, showasm, showast, showgrammar,
                              code_objects=code_objects)
    except pysource.SourceWalkerError as e:
        # deparsing failed
        print("\n")
        print(co.co_filename)
        if real_out != out:
            print("\n", file=real_out)
            print(e, file=real_out)
Example #4
0
def disco_loop(disasm, queue, real_out):
    while len(queue) > 0:
        co = queue.popleft()
        if co.co_name != '<module>':
            print('\n# %s line %d of %s' %
                      (co.co_name, co.co_firstlineno, co.co_filename),
                      file=real_out)
        tokens, customize = disasm(co)
        for t in tokens:
            if iscode(t.pattr):
                queue.append(t.pattr)
            elif iscode(t.attr):
                queue.append(t.attr)
            print(t, file=real_out)
            pass
        pass
Example #5
0
    def n_function_def(node):
        code_node = node[0][0]
        for n in node[0]:
            if hasattr(n, 'attr') and iscode(n.attr):
                code_node = n
                break
            pass
        pass

        is_code = hasattr(code_node, 'attr') and iscode(code_node.attr)
        if (is_code and
            (code_node.attr.co_flags & COMPILER_FLAG_BIT['COROUTINE'])):
            self.template_engine(('\n\n%|async def %c\n', -2), node)
        else:
            self.template_engine(('\n\n%|def %c\n', -2), node)
        self.prune()
Example #6
0
def disco_loop(opc,
               version,
               queue,
               real_out,
               dup_lines=False,
               show_bytes=False):
    """Disassembles a queue of code objects. If we discover
    another code object which will be found in co_consts, we add
    the new code to the list. Note that the order of code discovery
    is in the order of first encountered which is not amenable for
    the format used by a disassembler where code objects should
    be defined before using them in other functions.
    However this is not recursive and will overall lead to less
    memory consumption at run time.
    """

    while len(queue) > 0:
        co = queue.popleft()
        if co.co_name not in ("<module>", "?"):
            real_out.write("\n" + format_code_info(co, version) + "\n")

        bytecode = Bytecode(co, opc, dup_lines=dup_lines)
        real_out.write(bytecode.dis(show_bytes=show_bytes) + "\n")

        for c in co.co_consts:
            if iscode(c):
                queue.append(c)
            pass
        pass
Example #7
0
def disco(bytecode_version, co, timestamp, out=sys.stdout,
          is_pypy=False, magic_int=None, source_size=None,
          header=True, asm_format=False, show_bytes=False,
          dup_lines=False):
    """
    diassembles and deparses a given code block 'co'
    """

    assert iscode(co)

    show_module_header(bytecode_version, co, timestamp, out,
                       is_pypy, magic_int, source_size, header,
                        show_filename=False)

    # store final output stream for case of error
    real_out = out or sys.stdout

    if co.co_filename and not asm_format:
        real_out.write(format_code_info(co, bytecode_version) + "\n")
        pass

    opc = get_opcode(bytecode_version, is_pypy)

    if asm_format:
        disco_loop_asm_format(opc, bytecode_version, co, real_out,
                              {}, set([]))
    else:
        queue = deque([co])
        disco_loop(opc, bytecode_version, queue, real_out,
                   show_bytes=show_bytes)
Example #8
0
def python_parser(version, co, out=sys.stdout, showasm=False,
                  parser_debug=PARSER_DEFAULT_DEBUG, is_pypy=False):
    """
    Parse a code object to an abstract syntax tree representation.

    :param version:         The python version this code is from as a float, for
                            example 2.6, 2.7, 3.2, 3.3, 3.4, 3.5 etc.
    :param co:              The code object to parse.
    :param out:             File like object to write the output to.
    :param showasm:         Flag which determines whether the disassembled and
                            ingested code is written to sys.stdout or not.
    :param parser_debug:    dict containing debug flags for the spark parser.

    :return: Abstract syntax tree representation of the code object.
    """

    assert iscode(co)
    from uncompyle6.scanner import get_scanner
    scanner = get_scanner(version, is_pypy)
    tokens, customize = scanner.ingest(co)
    maybe_show_asm(showasm, tokens)

    # For heavy grammar debugging
    # parser_debug = {'rules': True, 'transition': True, 'reduce' : True,
    #                 'showstack': 'full'}
    p = get_python_parser(version, parser_debug)
    return parse(p, tokens, customize)
Example #9
0
def disco_loop(opc, version, queue, real_out, dup_lines=False,
               show_bytes=False):
    """Disassembles a queue of code objects. If we discover
    another code object which will be found in co_consts, we add
    the new code to the list. Note that the order of code discovery
    is in the order of first encountered which is not amenable for
    the format used by a disassembler where code objects should
    be defined before using them in other functions.
    However this is not recursive and will overall lead to less
    memory consumption at run time.
    """

    while len(queue) > 0:
        co = queue.popleft()
        if co.co_name not in ('<module>', '?'):
            real_out.write("\n" + format_code_info(co, version) + "\n")

        bytecode = Bytecode(co, opc, dup_lines=dup_lines)
        real_out.write(bytecode.dis(show_bytes=show_bytes) + "\n")

        for c in co.co_consts:
            if iscode(c):
                queue.append(c)
            pass
        pass
Example #10
0
def python_parser(version, co, out=sys.stdout, showasm=False,
                  parser_debug=PARSER_DEFAULT_DEBUG, is_pypy=False):
    """
    Parse a code object to an abstract syntax tree representation.

    :param version:         The python version this code is from as a float, for
                            example 2.6, 2.7, 3.2, 3.3, 3.4, 3.5 etc.
    :param co:              The code object to parse.
    :param out:             File like object to write the output to.
    :param showasm:         Flag which determines whether the disassembled and
                            ingested code is written to sys.stdout or not.
    :param parser_debug:    dict containing debug flags for the spark parser.

    :return: Abstract syntax tree representation of the code object.
    """

    assert iscode(co)
    from uncompyle6.scanner import get_scanner
    scanner = get_scanner(version, is_pypy)
    tokens, customize = scanner.ingest(co)
    maybe_show_asm(showasm, tokens)

    # For heavy grammar debugging
    # parser_debug = {'rules': True, 'transition': True, 'reduce' : True,
    #                 'showstack': 'full'}
    p = get_python_parser(version, parser_debug)
    return parse(p, tokens, customize)
Example #11
0
def uncompyle(
        version, co, out=None, showasm=False, showast=False,
        timestamp=None, showgrammar=False, code_objects={},
        is_pypy=False, magic_int=None):
    """
    disassembles and deparses a given code block 'co'
    """
    assert iscode(co)

    # store final output stream for case of error
    real_out = out or sys.stdout
    co_pypy_str = 'PyPy ' if is_pypy else ''
    run_pypy_str = 'PyPy ' if IS_PYPY else ''
    print('# %sPython bytecode %s%s disassembled from %sPython %s' %
          (co_pypy_str, version,
           " (%d)" % magic_int if magic_int else "",
           run_pypy_str, PYTHON_VERSION),
          file=real_out)
    if co.co_filename:
        print('# Embedded file name: %s' % co.co_filename,
              file=real_out)
    if timestamp:
        print('# Compiled at: %s' % datetime.datetime.fromtimestamp(timestamp),
              file=real_out)

    try:
        pysource.deparse_code(version, co, out, showasm, showast, showgrammar,
                              code_objects=code_objects, is_pypy=is_pypy)
    except pysource.SourceWalkerError as e:
        # deparsing failed
        print("\n")
        print(co.co_filename)
        if real_out != out:
            print("\n", file=real_out)
            print(e, file=real_out)
Example #12
0
def disco_loop(disasm, queue, real_out):
    while len(queue) > 0:
        co = queue.popleft()
        if co.co_name != '<module>':
            print('\n# %s line %d of %s' %
                  (co.co_name, co.co_firstlineno, co.co_filename),
                  file=real_out)
        tokens, customize = disasm(co)
        for t in tokens:
            if iscode(t.pattr):
                queue.append(t.pattr)
            elif iscode(t.attr):
                queue.append(t.attr)
            print(t, file=real_out)
            pass
        pass
Example #13
0
def find_code_node(node, start: int):
    for i in range(-start, len(node) + 1):
        if node[-i].kind == "LOAD_CODE":
            code_node = node[-i]
            assert iscode(code_node.attr)
            return code_node
        pass
    assert False, "did not find code node starting at %d in %s" % (start, node)
Example #14
0
def align_deparse_code(version, co, out=sys.stderr, showasm=False, showast=False,
                 showgrammar=False, code_objects={}, compile_mode='exec', is_pypy=False):
    """
    ingests and deparses a given code block 'co'
    """

    assert iscode(co)
    # store final output stream for case of error
    scanner = get_scanner(version, is_pypy=is_pypy)

    tokens, customize = scanner.ingest(co, code_objects=code_objects)
    maybe_show_asm(showasm, tokens)

    debug_parser = dict(PARSER_DEFAULT_DEBUG)
    if showgrammar:
        debug_parser['reduce'] = showgrammar
        debug_parser['errorstack'] = True

    #  Build AST from disassembly.
    deparsed = AligningWalker(version, scanner, out, showast=showast,
                            debug_parser=debug_parser, compile_mode=compile_mode,
                            is_pypy = is_pypy)

    isTopLevel = co.co_name == '<module>'
    deparsed.ast = deparsed.build_ast(tokens, customize, isTopLevel=isTopLevel)

    assert deparsed.ast == 'stmts', 'Should have parsed grammar start'

    del tokens # save memory

    deparsed.mod_globs = find_globals(deparsed.ast, set())

    # convert leading '__doc__ = "..." into doc string
    try:
        if deparsed.ast[0][0] == ASSIGN_DOC_STRING(co.co_consts[0]):
            deparsed.print_docstring('', co.co_consts[0])
            del deparsed.ast[0]
        if deparsed.ast[-1] == RETURN_NONE:
            deparsed.ast.pop() # remove last node
            # todo: if empty, add 'pass'
    except:
        pass

    # What we've been waiting for: Generate source from AST!
    deparsed.gen_source(deparsed.ast, co.co_name, customize)

    for g in deparsed.mod_globs:
        deparsed.write('# global %s ## Warning: Unused global' % g)

    if deparsed.ERROR:
        raise SourceWalkerError("Deparsing stopped due to parse error")
    return deparsed
Example #15
0
            def n_function_def(node):
                if self.version >= 3.6:
                    code_node = node[0][0]
                else:
                    code_node = node[0][1]

                is_code = hasattr(code_node, 'attr') and iscode(code_node.attr)
                if (is_code and (code_node.attr.co_flags
                                 & COMPILER_FLAG_BIT['COROUTINE'])):
                    self.template_engine(('\n\n%|async def %c\n', -2), node)
                else:
                    self.template_engine(('\n\n%|def %c\n', -2), node)
                self.prune()
Example #16
0
def number_loop(queue, mappings, opc):
    while len(queue) > 0:
        code1 = queue.popleft()
        code2 = queue.popleft()
        assert code1.co_name == code2.co_name
        linestarts_orig = findlinestarts(code1)
        linestarts_uncompiled = list(findlinestarts(code2))
        mappings += [[line, offset2line(offset, linestarts_uncompiled)]
                     for offset, line in linestarts_orig]
        bytecode1 = Bytecode(code1, opc)
        bytecode2 = Bytecode(code2, opc)
        instr2s = bytecode2.get_instructions(code2)
        seen = set([code1.co_name])
        for instr in bytecode1.get_instructions(code1):
            next_code1 = None
            if iscode(instr.argval):
                next_code1 = instr.argval
            if next_code1:
                next_code2 = None
                while not next_code2:
                    try:
                        instr2 = next(instr2s)
                        if iscode(instr2.argval):
                            next_code2 = instr2.argval
                            pass
                    except StopIteration:
                        break
                    pass
                if next_code2:
                    assert next_code1.co_name == next_code2.co_name
                    if next_code1.co_name not in seen:
                        seen.add(next_code1.co_name)
                        queue.append(next_code1)
                        queue.append(next_code2)
                        pass
                    pass
            pass
        pass
Example #17
0
def number_loop(queue, mappings, opc):
    while len(queue) > 0:
        code1 = queue.popleft()
        code2 = queue.popleft()
        assert code1.co_name == code2.co_name
        linestarts_orig = findlinestarts(code1)
        linestarts_uncompiled = list(findlinestarts(code2))
        mappings += [[line, offset2line(offset, linestarts_uncompiled)] for offset, line in linestarts_orig]
        bytecode1 = Bytecode(code1, opc)
        bytecode2 = Bytecode(code2, opc)
        instr2s = bytecode2.get_instructions(code2)
        seen = set([code1.co_name])
        for instr in bytecode1.get_instructions(code1):
            next_code1 = None
            if iscode(instr.argval):
                next_code1 = instr.argval
            if next_code1:
                next_code2 = None
                while not next_code2:
                    try:
                        instr2 = next(instr2s)
                        if iscode(instr2.argval):
                            next_code2 = instr2.argval
                            pass
                    except StopIteration:
                        break
                    pass
                if next_code2:
                    assert next_code1.co_name == next_code2.co_name
                    if next_code1.co_name not in seen:
                        seen.add(next_code1.co_name)
                        queue.append(next_code1)
                        queue.append(next_code2)
                        pass
                    pass
            pass
        pass
Example #18
0
def disco(
    bytecode_version,
    co,
    timestamp,
    out=sys.stdout,
    is_pypy=False,
    magic_int=None,
    source_size=None,
    header=True,
    asm_format=False,
    show_bytes=False,
    dup_lines=False,
):
    """
    diassembles and deparses a given code block 'co'
    """

    assert iscode(co)

    show_module_header(
        bytecode_version,
        co,
        timestamp,
        out,
        is_pypy,
        magic_int,
        source_size,
        header,
        show_filename=False,
    )

    # store final output stream for case of error
    real_out = out or sys.stdout

    if co.co_filename and not asm_format:
        real_out.write(format_code_info(co, bytecode_version) + "\n")
        pass

    opc = get_opcode(bytecode_version, is_pypy)

    if asm_format:
        disco_loop_asm_format(opc, bytecode_version, co, real_out, {}, set([]))
    else:
        queue = deque([co])
        disco_loop(opc,
                   bytecode_version,
                   queue,
                   real_out,
                   show_bytes=show_bytes)
Example #19
0
    def n_function_def(node):
        n0 = node[0]
        is_code = False
        for i in list(range(len(n0) - 2, -1, -1)):
            code_node = n0[i]
            if hasattr(code_node, 'attr') and iscode(code_node.attr):
                is_code = True
                break

        if (is_code and
            (code_node.attr.co_flags & COMPILER_FLAG_BIT['COROUTINE'])):
            self.template_engine(('\n\n%|async def %c\n', -2), node)
        else:
            self.template_engine(('\n\n%|def %c\n', -2), node)
        self.prune()
Example #20
0
def disco_loop_asm_format(opc, version, co, real_out):
    """Produces disassembly in a format more conducive to
    automatic assembly by producing inner modules before they are
    used by outer ones. Since this is recusive, we'll
    use more stack space at runtime.
    """
    for c in co.co_consts:
        if iscode(c):
            disco_loop_asm_format(opc, version, c, real_out)
        pass

    if co.co_name != '<module>' or co.co_filename:
        real_out.write("\n" + format_code_info(co, version) + "\n")

    bytecode = Bytecode(co, opc)
    real_out.write(bytecode.dis(asm_format=True) + "\n")
Example #21
0
def disco_loop_asm_format(opc, version, co, real_out):
    """Produces disassembly in a format more conducive to
    automatic assembly by producing inner modules before they are
    used by outer ones. Since this is recusive, we'll
    use more stack space at runtime.
    """
    for c in co.co_consts:
        if iscode(c):
            disco_loop_asm_format(opc, version, c, real_out)
        pass

    if co.co_name != '<module>' or co.co_filename:
        real_out.write("\n" + format_code_info(co, version) + "\n")

    bytecode = Bytecode(co, opc)
    real_out.write(bytecode.dis(asm_format=True) + "\n")
Example #22
0
def decompile(bytecode_version,
              co,
              out=None,
              showasm=None,
              showast=False,
              timestamp=None,
              showgrammar=False,
              code_objects={},
              source_size=None,
              is_pypy=False,
              magic_int=None):
    """
    ingests and deparses a given code block 'co'
    """
    assert iscode(co)

    # store final output stream for case of error
    real_out = out or sys.stdout
    co_pypy_str = 'PyPy ' if is_pypy else ''
    run_pypy_str = 'PyPy ' if IS_PYPY else ''
    print('# uncompyle6 version %s\n'
          '# %sPython bytecode %s%s\n# Decompiled from: %sPython %s' %
          (VERSION, co_pypy_str, bytecode_version,
           " (%d)" % magic_int if magic_int else "", run_pypy_str, '\n# '.join(
               sys.version.split('\n'))),
          file=real_out)
    if co.co_filename:
        print('# Embedded file name: %s' % co.co_filename, file=real_out)
    if timestamp:
        print('# Compiled at: %s' % datetime.datetime.fromtimestamp(timestamp),
              file=real_out)
    if source_size:
        print('# Size of source mod 2**32: %d bytes' % source_size,
              file=real_out)

    try:
        pysource.deparse_code(bytecode_version,
                              co,
                              out,
                              showasm,
                              showast,
                              showgrammar,
                              code_objects=code_objects,
                              is_pypy=is_pypy)
    except pysource.SourceWalkerError as e:
        # deparsing failed
        raise pysource.SourceWalkerError(str(e))
Example #23
0
def disco(version, co, out=None, is_pypy=False):
    """
    diassembles and deparses a given code block 'co'
    """

    assert iscode(co)

    # store final output stream for case of error
    real_out = out or sys.stdout
    print('# Python %s' % version, file=real_out)
    if co.co_filename:
        print('# Embedded file name: %s' % co.co_filename, file=real_out)

    scanner = get_scanner(version, is_pypy=is_pypy)

    queue = deque([co])
    disco_loop(scanner.ingest, queue, real_out)
Example #24
0
def disco(bytecode_version,
          co,
          timestamp,
          out=sys.stdout,
          is_pypy=False,
          magic_int=None,
          source_size=None,
          header=True,
          asm_format=False,
          dup_lines=False):
    """
    diassembles and deparses a given code block 'co'
    """

    assert iscode(co)

    # store final output stream for case of error
    real_out = out or sys.stdout
    co_pypy_str = 'PyPy ' if is_pypy else ''
    run_pypy_str = 'PyPy ' if IS_PYPY else ''
    if header:
        real_out.write(('# pydisasm version %s\n# %sPython bytecode %s%s'
                        '\n# Disassembled from %sPython %s\n') %
                       (VERSION, co_pypy_str, bytecode_version,
                        " (%d)" % magic_int if magic_int else "", run_pypy_str,
                        '\n# '.join(sys.version.split('\n'))))
    if timestamp > 0:
        value = datetime.datetime.fromtimestamp(timestamp)
        real_out.write('# Timestamp in code: %d' % timestamp)
        real_out.write(value.strftime(' (%Y-%m-%d %H:%M:%S)\n'))
    if source_size:
        real_out.write('# Source code size mod 2**32: %d bytes\n' %
                       source_size)

    if co.co_filename and not asm_format:
        real_out.write(format_code_info(co, bytecode_version) + "\n")
        pass

    opc = get_opcode(bytecode_version, is_pypy)

    if asm_format:
        disco_loop_asm_format(opc, bytecode_version, co, real_out)
    else:
        queue = deque([co])
        disco_loop(opc, bytecode_version, queue, real_out)
Example #25
0
def disco(version, co, out=None, is_pypy=False):
    """
    diassembles and deparses a given code block 'co'
    """

    assert iscode(co)

    # store final output stream for case of error
    real_out = out or sys.stdout
    print('# Python %s' % version, file=real_out)
    if co.co_filename:
        print('# Embedded file name: %s' % co.co_filename,
              file=real_out)

    scanner = get_scanner(version, is_pypy=is_pypy)

    queue = deque([co])
    disco_loop(scanner.disassemble, queue, real_out)
Example #26
0
def disco(bytecode_version, co, timestamp, out=sys.stdout,
          is_pypy=False, magic_int=None, source_size=None,
          header=True, asm_format=False):
    """
    diassembles and deparses a given code block 'co'
    """

    assert iscode(co)

    # store final output stream for case of error
    real_out = out or sys.stdout
    co_pypy_str = 'PyPy ' if is_pypy else ''
    run_pypy_str = 'PyPy ' if IS_PYPY else ''
    if header:
        real_out.write(('# pydisasm version %s\n# %sPython bytecode %s%s'
                   '\n# Disassembled from %sPython %s\n') %
                  (VERSION, co_pypy_str, bytecode_version,
                   " (%d)" % magic_int if magic_int else "",
                   run_pypy_str, '\n# '.join(sys.version.split('\n'))))
    if timestamp > 0:
        value = datetime.datetime.fromtimestamp(timestamp)
        real_out.write('# Timestamp in code: %d' % timestamp)
        real_out.write(value.strftime(' (%Y-%m-%d %H:%M:%S)\n')
)
    if source_size:
        real_out.write('# Source code size mod 2**32: %d bytes\n' % source_size)

    if co.co_filename and not asm_format:
        real_out.write(format_code_info(co, bytecode_version) + "\n")
        pass

    opc = get_opcode(bytecode_version, is_pypy)

    if asm_format:
        disco_loop_asm_format(opc, bytecode_version, co, real_out)
    else:
        queue = deque([co])
        disco_loop(opc, bytecode_version, queue, real_out)
Example #27
0
def decompile(
        bytecode_version, co, out=None, showasm=None, showast=False,
        timestamp=None, showgrammar=False, code_objects={},
        source_size=None, is_pypy=False, magic_int=None):
    """
    ingests and deparses a given code block 'co'
    """
    assert iscode(co)

    # store final output stream for case of error
    real_out = out or sys.stdout
    co_pypy_str = 'PyPy ' if is_pypy else ''
    run_pypy_str = 'PyPy ' if IS_PYPY else ''
    print('# uncompyle6 version %s\n'
          '# %sPython bytecode %s%s\n# Decompiled from: %sPython %s' %
          (VERSION, co_pypy_str, bytecode_version,
           " (%d)" % magic_int if magic_int else "",
           run_pypy_str, '\n# '.join(sys.version.split('\n'))),
           file=real_out)
    if co.co_filename:
        print('# Embedded file name: %s' % co.co_filename,
              file=real_out)
    if timestamp:
        print('# Compiled at: %s' % datetime.datetime.fromtimestamp(timestamp),
              file=real_out)
    if source_size:
        print('# Size of source mod 2**32: %d bytes' % source_size,
               file=real_out)

    try:
        pysource.deparse_code(bytecode_version, co, out, showasm, showast,
                              showgrammar, code_objects=code_objects,
                              is_pypy=is_pypy)
    except pysource.SourceWalkerError as e:
        # deparsing failed
        raise pysource.SourceWalkerError(str(e))
def make_function3(self, node, isLambda, nested=1, codeNode=None):
    """Dump function definition, doc string, and function body in
      Python version 3.0 and above
    """

    # For Python 3.3, the evaluation stack in MAKE_FUNCTION is:

    # * default argument objects in positional order
    # * pairs of name and default argument, with the name just below
    #   the object on the stack, for keyword-only parameters
    # * parameter annotation objects
    # * a tuple listing the parameter names for the annotations
    #   (only if there are ony annotation objects)
    # * the code associated with the function (at TOS1)
    # * the qualified name of the function (at TOS)

    # For Python 3.0 .. 3.2 the evaluation stack is:
    # The function object is defined to have argc default parameters,
    # which are found below TOS.
    # * first come positional args in the order they are given in the source,
    # * next come the keyword args in the order they given in the source,
    # * finally is the code associated with the function (at TOS)
    #
    # Note: There is no qualified name at TOS

    # MAKE_CLOSURE adds an additional closure slot

    # Thank you, Python, for a such a well-thought out system that has
    # changed 4 or so times.

    def build_param(ast, name, default):
        """build parameters:
            - handle defaults
            - handle format tuple parameters
        """
        if default:
            value = self.traverse(default, indent='')
            maybe_show_ast_param_default(self.showast, name, value)
            result = '%s=%s' % (name, value)
            if result[-2:] == '= ':  # default was 'LOAD_CONST None'
                result += 'None'
            return result
        else:
            return name

    # MAKE_FUNCTION_... or MAKE_CLOSURE_...
    assert node[-1].type.startswith('MAKE_')

    # Python 3.3+ adds a qualified name at TOS (-1)
    # moving down the LOAD_LAMBDA instruction
    if 3.0 <= self.version <= 3.2:
        lambda_index = -2
    elif 3.03 <= self.version:
        lambda_index = -3
    else:
        lambda_index = None

    args_node = node[-1]
    if isinstance(args_node.attr, tuple):
        pos_args, kw_args, annotate_argc = args_node.attr
        if self.version <= 3.3 and len(
                node) > 2 and node[lambda_index] != 'LOAD_LAMBDA':
            # args are after kwargs; kwargs are bundled as one node
            defparams = node[1:args_node.attr[0] + 1]
        else:
            # args are before kwargs; kwags as bundled as one node
            defparams = node[:args_node.attr[0]]
    else:
        if self.version < 3.6:
            defparams = node[:args_node.attr]
        else:
            default, kw, annotate, closure = args_node.attr
            # FIXME: start here for Python 3.6 and above:
            defparams = []
            # if default:
            #     defparams = node[-(2 +  kw + annotate  + closure)]
            # else:
            #     defparams = []

        kw_args = 0
        pass

    if lambda_index and isLambda and iscode(node[lambda_index].attr):
        assert node[lambda_index].type == 'LOAD_LAMBDA'
        code = node[lambda_index].attr
    else:
        code = codeNode.attr

    assert iscode(code)
    code = Code(code, self.scanner, self.currentclass)

    # add defaults values to parameter names
    argc = code.co_argcount
    paramnames = list(code.co_varnames[:argc])

    # defaults are for last n parameters, thus reverse
    if not 3.0 <= self.version <= 3.1:
        paramnames.reverse()
        defparams.reverse()

    try:
        ast = self.build_ast(code._tokens,
                             code._customize,
                             isLambda=isLambda,
                             noneInNames=('None' in code.co_names))
    except ParserError as p:
        self.write(str(p))
        self.ERROR = p
        return

    kw_pairs = args_node.attr[1] if self.version >= 3.0 else 0

    # build parameters
    params = [
        build_param(ast, name, d)
        for name, d in zip_longest(paramnames, defparams, fillvalue=None)
    ]

    if not 3.0 <= self.version <= 3.1:
        params.reverse()  # back to correct order

    if code_has_star_arg(code):
        if self.version > 3.0:
            params.append('*%s' % code.co_varnames[argc + kw_pairs])
        else:
            params.append('*%s' % code.co_varnames[argc])
        argc += 1

    # dump parameter list (with default values)
    if isLambda:
        self.write("lambda ", ", ".join(params))
    else:
        self.write("(", ", ".join(params))
    # self.println(indent, '#flags:\t', int(code.co_flags))

    if kw_args > 0:
        if not (4 & code.co_flags):
            if argc > 0:
                self.write(", *, ")
            else:
                self.write("*, ")
            pass
        else:
            self.write(", ")

        if not 3.0 <= self.version <= 3.2:
            for n in node:
                if n == 'pos_arg':
                    continue
                elif self.version >= 3.4 and not (n.type
                                                  in ('kwargs', 'kwarg')):
                    continue
                else:
                    self.preorder(n)
                break
        else:
            kwargs = node[0]
            last = len(kwargs) - 1
            i = 0
            for n in node[0]:
                if n == 'kwarg':
                    self.write('%s=' % n[0].pattr)
                    self.preorder(n[1])
                    if i < last:
                        self.write(', ')
                    i += 1
                    pass
                pass
            pass
        pass

    if code_has_star_star_arg(code):
        if argc > 0:
            self.write(', ')
        self.write('**%s' % code.co_varnames[argc + kw_pairs])

    if isLambda:
        self.write(": ")
    else:
        self.println("):")

    if len(code.co_consts
           ) > 0 and code.co_consts[0] is not None and not isLambda:  # ugly
        # docstring exists, dump it
        print_docstring(self, self.indent, code.co_consts[0])

    code._tokens = None  # save memory
    assert ast == 'stmts'

    all_globals = find_all_globals(ast, set())
    for g in ((all_globals & self.mod_globs) | find_globals(ast, set())):
        self.println(self.indent, 'global ', g)
    self.mod_globs -= all_globals
    has_none = 'None' in code.co_names
    rn = has_none and not find_none(ast)
    self.gen_source(ast,
                    code.co_name,
                    code._customize,
                    isLambda=isLambda,
                    returnNone=rn)
    code._tokens = None
    code._customize = None  # save memory
def make_function2(self, node, isLambda, nested=1, codeNode=None):
    """
    Dump function defintion, doc string, and function body.
    This code is specialied for Python 2.
    """

    # FIXME: call make_function3 if we are self.version >= 3.0
    # and then simplify the below.

    def build_param(ast, name, default):
        """build parameters:
            - handle defaults
            - handle format tuple parameters
        """
        # if formal parameter is a tuple, the paramater name
        # starts with a dot (eg. '.1', '.2')
        if name.startswith('.'):
            # replace the name with the tuple-string
            name = self.get_tuple_parameter(ast, name)
            pass

        if default:
            value = self.traverse(default, indent='')
            maybe_show_ast_param_default(self.showast, name, value)
            result = '%s=%s' % (name, value)
            if result[-2:] == '= ':  # default was 'LOAD_CONST None'
                result += 'None'
            return result
        else:
            return name

    # MAKE_FUNCTION_... or MAKE_CLOSURE_...
    assert node[-1].type.startswith('MAKE_')

    args_node = node[-1]
    if isinstance(args_node.attr, tuple):
        # positional args are after kwargs
        defparams = node[1:args_node.attr[0] + 1]
        pos_args, kw_args, annotate_argc = args_node.attr
    else:
        defparams = node[:args_node.attr]
        kw_args = 0
        annotate_argc = 0
        pass

    lambda_index = None

    if lambda_index and isLambda and iscode(node[lambda_index].attr):
        assert node[lambda_index].type == 'LOAD_LAMBDA'
        code = node[lambda_index].attr
    else:
        code = codeNode.attr

    assert iscode(code)
    code = Code(code, self.scanner, self.currentclass)

    # add defaults values to parameter names
    argc = code.co_argcount
    paramnames = list(code.co_varnames[:argc])

    # defaults are for last n parameters, thus reverse
    paramnames.reverse()
    defparams.reverse()

    try:
        ast = self.build_ast(code._tokens,
                             code._customize,
                             isLambda=isLambda,
                             noneInNames=('None' in code.co_names))
    except ParserError as p:
        self.write(str(p))
        self.ERROR = p
        return

    kw_pairs = args_node.attr[1] if self.version >= 3.0 else 0
    indent = self.indent

    # build parameters
    params = [
        build_param(ast, name, default)
        for name, default in zip_longest(paramnames, defparams, fillvalue=None)
    ]
    params.reverse()  # back to correct order

    if code_has_star_arg(code):
        params.append('*%s' % code.co_varnames[argc])
        argc += 1

    # dump parameter list (with default values)
    if isLambda:
        self.write("lambda ", ", ".join(params))
    else:
        self.write("(", ", ".join(params))

    if kw_args > 0:
        if not (4 & code.co_flags):
            if argc > 0:
                self.write(", *, ")
            else:
                self.write("*, ")
            pass
        else:
            self.write(", ")

        for n in node:
            if n == 'pos_arg':
                continue
            else:
                self.preorder(n)
            break
        pass

    if code_has_star_star_arg(code):
        if argc > 0:
            self.write(', ')
        if argc + kw_pairs > 0:
            self.write('**%s' % code.co_varnames[argc + kw_pairs])

    if isLambda:
        self.write(": ")
    else:
        self.println("):")

    if len(code.co_consts
           ) > 0 and code.co_consts[0] is not None and not isLambda:  # ugly
        # docstring exists, dump it
        print_docstring(self, indent, code.co_consts[0])

    code._tokens = None  # save memory
    assert ast == 'stmts'

    all_globals = find_all_globals(ast, set())
    for g in ((all_globals & self.mod_globs) | find_globals(ast, set())):
        self.println(self.indent, 'global ', g)
    self.mod_globs -= all_globals
    has_none = 'None' in code.co_names
    rn = has_none and not find_none(ast)
    self.gen_source(ast,
                    code.co_name,
                    code._customize,
                    isLambda=isLambda,
                    returnNone=rn)
    code._tokens = None
    code._customize = None  # save memory
Example #30
0
    def ingest(self, co, classname=None, code_objects={}, show_asm=None):
        """
        Pick out tokens from an uncompyle6 code object, and transform them,
        returning a list of uncompyle6 'Token's.

        The transformations are made to assist the deparsing grammar.
        Specificially:
           -  various types of LOAD_CONST's are categorized in terms of what they load
           -  COME_FROM instructions are added to assist parsing control structures
           -  MAKE_FUNCTION and FUNCTION_CALLS append the number of positional arguments

        Also, when we encounter certain tokens, we add them to a set which will cause custom
        grammar rules. Specifically, variable arg tokens like MAKE_FUNCTION or BUILD_LIST
        cause specific rules for the specific number of arguments they take.
        """

        show_asm = self.show_asm if not show_asm else show_asm
        # show_asm = 'after'
        if show_asm in ('both', 'before'):
            from xdis.bytecode import Bytecode
            bytecode = Bytecode(co, self.opc)
            for instr in bytecode.get_instructions(co):
                print(instr._disassemble())

        # Container for tokens
        tokens = []

        customize = {}
        if self.is_pypy:
            customize['PyPy'] = 1

        Token = self.Token  # shortcut

        n = self.setup_code(co)

        self.build_lines_data(co, n)
        self.build_prev_op(n)

        free, names, varnames = self.unmangle_code_names(co, classname)
        self.names = names

        # Scan for assertions. Later we will
        # turn 'LOAD_GLOBAL' to 'LOAD_ASSERT'.
        # 'LOAD_ASSERT' is used in assert statements.
        self.load_asserts = set()
        for i in self.op_range(0, n):
            # We need to detect the difference between:
            #   raise AssertionError
            #  and
            #   assert ...
            # Below we use the heuristic that it is preceded by a POP_JUMP.
            # however we could also use followed by RAISE_VARARGS
            # or for PyPy there may be a JUMP_IF_NOT_DEBUG before.
            # FIXME: remove uses of PJIF, and PJIT
            if self.is_pypy:
                have_pop_jump = self.code[i] in (self.opc.PJIF, self.opc.PJIT)
            else:
                have_pop_jump = self.code[i] == self.opc.PJIT

            if have_pop_jump and self.code[i + 3] == self.opc.LOAD_GLOBAL:
                if names[self.get_argument(i + 3)] == 'AssertionError':
                    self.load_asserts.add(i + 3)

        jump_targets = self.find_jump_targets(show_asm)
        # contains (code, [addrRefToCode])

        last_stmt = self.next_stmt[0]
        i = self.next_stmt[last_stmt]
        replace = {}
        while i < n - 1:
            if self.lines[last_stmt].next > i:
                # Distinguish "print ..." from "print ...,"
                if self.code[last_stmt] == self.opc.PRINT_ITEM:
                    if self.code[i] == self.opc.PRINT_ITEM:
                        replace[i] = 'PRINT_ITEM_CONT'
                    elif self.code[i] == self.opc.PRINT_NEWLINE:
                        replace[i] = 'PRINT_NEWLINE_CONT'
            last_stmt = i
            i = self.next_stmt[i]

        extended_arg = 0
        for offset in self.op_range(0, n):
            if offset in jump_targets:
                jump_idx = 0
                # We want to process COME_FROMs to the same offset to be in *descending*
                # offset order so we have the larger range or biggest instruction interval
                # last. (I think they are sorted in increasing order, but for safety
                # we sort them). That way, specific COME_FROM tags will match up
                # properly. For example, a "loop" with an "if" nested in it should have the
                # "loop" tag last so the grammar rule matches that properly.
                # last_offset = -1
                for jump_offset in sorted(jump_targets[offset], reverse=True):
                    # if jump_offset == last_offset:
                    #     continue
                    # last_offset = jump_offset
                    come_from_name = 'COME_FROM'
                    op_name = self.opc.opname[self.code[jump_offset]]
                    if op_name.startswith('SETUP_') and self.version == 2.7:
                        come_from_type = op_name[len('SETUP_'):]
                        if come_from_type not in ('LOOP', 'EXCEPT'):
                            come_from_name = 'COME_FROM_%s' % come_from_type
                        pass
                    tokens.append(
                        Token(come_from_name,
                              None,
                              repr(jump_offset),
                              offset="%s_%d" % (offset, jump_idx),
                              has_arg=True))
                    jump_idx += 1

            op = self.code[offset]
            op_name = self.opc.opname[op]

            oparg = None
            pattr = None
            has_arg = op_has_argument(op, self.opc)
            if has_arg:
                oparg = self.get_argument(offset) + extended_arg
                extended_arg = 0
                if op == self.opc.EXTENDED_ARG:
                    extended_arg = oparg * scan.L65536
                    continue
                if op in self.opc.hasconst:
                    const = co.co_consts[oparg]
                    if iscode(const):
                        oparg = const
                        if const.co_name == '<lambda>':
                            assert op_name == 'LOAD_CONST'
                            op_name = 'LOAD_LAMBDA'
                        elif const.co_name == '<genexpr>':
                            op_name = 'LOAD_GENEXPR'
                        elif const.co_name == '<dictcomp>':
                            op_name = 'LOAD_DICTCOMP'
                        elif const.co_name == '<setcomp>':
                            op_name = 'LOAD_SETCOMP'
                        # verify() uses 'pattr' for comparison, since 'attr'
                        # now holds Code(const) and thus can not be used
                        # for comparison (todo: think about changing this)
                        # pattr = 'code_object @ 0x%x %s->%s' %\
                        # (id(const), const.co_filename, const.co_name)
                        pattr = '<code_object ' + const.co_name + '>'
                    else:
                        pattr = const
                elif op in self.opc.hasname:
                    pattr = names[oparg]
                elif op in self.opc.hasjrel:
                    #  use instead: hasattr(self, 'patch_continue'): ?
                    if self.version == 2.7:
                        self.patch_continue(tokens, offset, op)
                    pattr = repr(offset + 3 + oparg)
                elif op in self.opc.hasjabs:
                    # use instead: hasattr(self, 'patch_continue'): ?
                    if self.version == 2.7:
                        self.patch_continue(tokens, offset, op)
                    pattr = repr(oparg)
                elif op in self.opc.haslocal:
                    pattr = varnames[oparg]
                elif op in self.opc.hascompare:
                    pattr = self.opc.cmp_op[oparg]
                elif op in self.opc.hasfree:
                    pattr = free[oparg]

            if op in self.varargs_ops:
                # CE - Hack for >= 2.5
                #      Now all values loaded via LOAD_CLOSURE are packed into
                #      a tuple before calling MAKE_CLOSURE.
                if op == self.opc.BUILD_TUPLE and \
                    self.code[self.prev[offset]] == self.opc.LOAD_CLOSURE:
                    continue
                else:
                    if self.is_pypy and not oparg and op_name == 'BUILD_MAP':
                        op_name = 'BUILD_MAP_n'
                    else:
                        op_name = '%s_%d' % (op_name, oparg)
                    if op != self.opc.BUILD_SLICE:
                        customize[op_name] = oparg
            elif self.is_pypy and op_name in ('LOOKUP_METHOD',
                                              'JUMP_IF_NOT_DEBUG',
                                              'SETUP_EXCEPT', 'SETUP_FINALLY'):
                # The value in the dict is in special cases in semantic actions, such
                # as CALL_FUNCTION. The value is not used in these cases, so we put
                # in arbitrary value 0.
                customize[op_name] = 0
            elif op == self.opc.JUMP_ABSOLUTE:
                # Further classify JUMP_ABSOLUTE into backward jumps
                # which are used in loops, and "CONTINUE" jumps which
                # may appear in a "continue" statement.  The loop-type
                # and continue-type jumps will help us classify loop
                # boundaries The continue-type jumps help us get
                # "continue" statements with would otherwise be turned
                # into a "pass" statement because JUMPs are sometimes
                # ignored in rules as just boundary overhead. In
                # comprehensions we might sometimes classify JUMP_BACK
                # as CONTINUE, but that's okay since we add a grammar
                # rule for that.
                target = self.get_target(offset)
                if target <= offset:
                    if (offset in self.stmts and self.code[offset + 3]
                            not in (self.opc.END_FINALLY, self.opc.POP_BLOCK)
                            and offset not in self.not_continue):
                        op_name = 'CONTINUE'
                    else:
                        op_name = 'JUMP_BACK'

            elif op == self.opc.LOAD_GLOBAL:
                if offset in self.load_asserts:
                    op_name = 'LOAD_ASSERT'
            elif op == self.opc.RETURN_VALUE:
                if offset in self.return_end_ifs:
                    op_name = 'RETURN_END_IF'

            if offset in self.linestartoffsets:
                linestart = self.linestartoffsets[offset]
            else:
                linestart = None

            if offset not in replace:
                tokens.append(
                    Token(op_name, oparg, pattr, offset, linestart, op,
                          has_arg, self.opc))
            else:
                tokens.append(
                    Token(replace[offset], oparg, pattr, offset, linestart, op,
                          has_arg, self.opc))
                pass
            pass

        if show_asm in ('both', 'after'):
            for t in tokens:
                print(t.format(line_prefix='L.'))
            print()
        return tokens, customize
Example #31
0
    def disassemble(self, co, classname=None, code_objects={}, show_asm=None):
        """
        Pick out tokens from an uncompyle6 code object, and transform them,
        returning a list of uncompyle6 'Token's.

        The tranformations are made to assist the deparsing grammar.
        Specificially:
           -  various types of LOAD_CONST's are categorized in terms of what they load
           -  COME_FROM instructions are added to assist parsing control structures
           -  MAKE_FUNCTION and FUNCTION_CALLS append the number of positional arguments

        Also, when we encounter certain tokens, we add them to a set which will cause custom
        grammar rules. Specifically, variable arg tokens like MAKE_FUNCTION or BUILD_LIST
        cause specific rules for the specific number of arguments they take.
        """

        show_asm = self.show_asm if not show_asm else show_asm
        # show_asm = 'before'
        if show_asm in ('both', 'before'):
            from xdis.bytecode import Bytecode
            bytecode = Bytecode(co, self.opc)
            for instr in bytecode.get_instructions(co):
                print(instr._disassemble())

        # Container for tokens
        tokens = []

        customize = {}
        if self.is_pypy:
            customize['PyPy'] = 1;

        Token = self.Token # shortcut

        n = self.setup_code(co)

        self.build_lines_data(co, n)
        self.build_prev_op(n)

        # self.lines contains (block,addrLastInstr)
        if classname:
            classname = '_' + classname.lstrip('_') + '__'

            def unmangle(name):
                if name.startswith(classname) and name[-2:] != '__':
                    return name[len(classname) - 2:]
                return name

            free = [ unmangle(name) for name in (co.co_cellvars + co.co_freevars) ]
            names = [ unmangle(name) for name in co.co_names ]
            varnames = [ unmangle(name) for name in co.co_varnames ]
        else:
            free = co.co_cellvars + co.co_freevars
            names = co.co_names
            varnames = co.co_varnames
        self.names = names

        # Scan for assertions. Later we will
        # turn 'LOAD_GLOBAL' to 'LOAD_ASSERT'.
        # 'LOAD_ASSERT' is used in assert statements.
        self.load_asserts = set()
        for i in self.op_range(0, n):
            # We need to detect the difference between:
            #   raise AssertionError
            #  and
            #   assert ...
            # Below we use the heuristic that it is preceded by a POP_JUMP.
            # however we could also use followed by RAISE_VARARGS
            # or for PyPy there may be a JUMP_IF_NOT_DEBUG before.
            # FIXME: remove uses of PJIF, and PJIT
            if self.is_pypy:
                have_pop_jump = self.code[i] in (self.opc.PJIF,
                                                 self.opc.PJIT)
            else:
                have_pop_jump = self.code[i] == self.opc.PJIT

            if have_pop_jump and self.code[i+3] == self.opc.LOAD_GLOBAL:
                if names[self.get_argument(i+3)] == 'AssertionError':
                    self.load_asserts.add(i+3)

        cf = self.find_jump_targets()
        # contains (code, [addrRefToCode])
        last_stmt = self.next_stmt[0]
        i = self.next_stmt[last_stmt]
        replace = {}
        while i < n-1:
            if self.lines[last_stmt].next > i:
                if self.code[last_stmt] == self.opc.PRINT_ITEM:
                    if self.code[i] == self.opc.PRINT_ITEM:
                        replace[i] = 'PRINT_ITEM_CONT'
                    elif self.code[i] == self.opc.PRINT_NEWLINE:
                        replace[i] = 'PRINT_NEWLINE_CONT'
            last_stmt = i
            i = self.next_stmt[i]

        extended_arg = 0
        for offset in self.op_range(0, n):
            if offset in cf:
                k = 0
                for j in cf[offset]:
                    tokens.append(Token(
                        'COME_FROM', None, repr(j),
                        offset="%s_%d" % (offset, k),
                        has_arg = True))
                    k += 1

            op = self.code[offset]
            opname = self.opc.opname[op]

            oparg = None; pattr = None
            has_arg = (op >= self.opc.HAVE_ARGUMENT)
            if has_arg:
                oparg = self.get_argument(offset) + extended_arg
                extended_arg = 0
                if op == self.opc.EXTENDED_ARG:
                    extended_arg = oparg * scan.L65536
                    continue
                if op in self.opc.hasconst:
                    const = co.co_consts[oparg]
                    if iscode(const):
                        oparg = const
                        if const.co_name == '<lambda>':
                            assert opname == 'LOAD_CONST'
                            opname = 'LOAD_LAMBDA'
                        elif const.co_name == '<genexpr>':
                            opname = 'LOAD_GENEXPR'
                        elif const.co_name == '<dictcomp>':
                            opname = 'LOAD_DICTCOMP'
                        elif const.co_name == '<setcomp>':
                            opname = 'LOAD_SETCOMP'
                        # verify() uses 'pattr' for comparison, since 'attr'
                        # now holds Code(const) and thus can not be used
                        # for comparison (todo: think about changing this)
                        # pattr = 'code_object @ 0x%x %s->%s' %\
                        # (id(const), const.co_filename, const.co_name)
                        pattr = '<code_object ' + const.co_name + '>'
                    else:
                        pattr = const
                elif op in self.opc.hasname:
                    pattr = names[oparg]
                elif op in self.opc.hasjrel:
                    #  use instead: hasattr(self, 'patch_continue'): ?
                    if self.version == 2.7:
                        self.patch_continue(tokens, offset, op)
                    pattr = repr(offset + 3 + oparg)
                elif op in self.opc.hasjabs:
                    # use instead: hasattr(self, 'patch_continue'): ?
                    if self.version == 2.7:
                        self.patch_continue(tokens, offset, op)
                    pattr = repr(oparg)
                elif op in self.opc.haslocal:
                    pattr = varnames[oparg]
                elif op in self.opc.hascompare:
                    pattr = self.opc.cmp_op[oparg]
                elif op in self.opc.hasfree:
                    pattr = free[oparg]

            if op in self.varargs_ops:
                # CE - Hack for >= 2.5
                #      Now all values loaded via LOAD_CLOSURE are packed into
                #      a tuple before calling MAKE_CLOSURE.
                if op == self.opc.BUILD_TUPLE and \
                    self.code[self.prev[offset]] == self.opc.LOAD_CLOSURE:
                    continue
                else:
                    if self.is_pypy and not oparg and opname == 'BUILD_MAP':
                        opname = 'BUILD_MAP_n'
                    else:
                        opname = '%s_%d' % (opname, oparg)
                    if op != self.opc.BUILD_SLICE:
                        customize[opname] = oparg
            elif self.is_pypy and opname in ('LOOKUP_METHOD',
                                             'JUMP_IF_NOT_DEBUG',
                                             'SETUP_EXCEPT',
                                             'SETUP_FINALLY'):
                # The value in the dict is in special cases in semantic actions, such
                # as CALL_FUNCTION. The value is not used in these cases, so we put
                # in arbitrary value 0.
                customize[opname] = 0
            elif op == self.opc.JUMP_ABSOLUTE:
                # Further classify JUMP_ABSOLUTE into backward jumps
                # which are used in loops, and "CONTINUE" jumps which
                # may appear in a "continue" statement.  The loop-type
                # and continue-type jumps will help us classify loop
                # boundaries The continue-type jumps help us get
                # "continue" statements with would otherwise be turned
                # into a "pass" statement because JUMPs are sometimes
                # ignored in rules as just boundary overhead. In
                # comprehensions we might sometimes classify JUMP_BACK
                # as CONTINUE, but that's okay since we add a grammar
                # rule for that.
                target = self.get_target(offset)
                if target <= offset:
                    if (offset in self.stmts
                        and self.code[offset+3] not in (self.opc.END_FINALLY,
                                                        self.opc.POP_BLOCK)
                        and offset not in self.not_continue):
                        opname = 'CONTINUE'
                    else:
                        opname = 'JUMP_BACK'

            elif op == self.opc.LOAD_GLOBAL:
                if offset in self.load_asserts:
                    opname = 'LOAD_ASSERT'
            elif op == self.opc.RETURN_VALUE:
                if offset in self.return_end_ifs:
                    opname = 'RETURN_END_IF'

            if offset in self.linestartoffsets:
                linestart = self.linestartoffsets[offset]
            else:
                linestart = None

            if offset not in replace:
                tokens.append(Token(
                    opname, oparg, pattr, offset, linestart, op,
                    has_arg, self.opc))
            else:
                tokens.append(Token(
                    replace[offset], oparg, pattr, offset, linestart,
                    op, has_arg, self.opc))
                pass
            pass

        if show_asm in ('both', 'after'):
            for t in tokens:
                print(t)
            print()
        return tokens, customize
Example #32
0
    def disassemble(self, co, classname=None, code_objects={}, show_asm=None):
        """
        Pick out tokens from an uncompyle6 code object, and transform them,
        returning a list of uncompyle6 'Token's.

        The tranformations are made to assist the deparsing grammar.
        Specificially:
           -  various types of LOAD_CONST's are categorized in terms of what they load
           -  COME_FROM instructions are added to assist parsing control structures
           -  MAKE_FUNCTION and FUNCTION_CALLS append the number of positional arguments

        Also, when we encounter certain tokens, we add them to a set which will cause custom
        grammar rules. Specifically, variable arg tokens like MAKE_FUNCTION or BUILD_LIST
        cause specific rules for the specific number of arguments they take.
        """

        show_asm = self.show_asm if not show_asm else show_asm
        # show_asm = 'both'
        if show_asm in ('both', 'before'):
            bytecode = Bytecode(co, self.opc)
            for instr in bytecode.get_instructions(co):
                print(instr._disassemble())

        # Container for tokens
        tokens = []

        customize = {}
        if self.is_pypy:
            customize['PyPy'] = 1;

        self.code = array('B', co.co_code)
        self.build_lines_data(co)
        self.build_prev_op()

        bytecode = Bytecode(co, self.opc)

        # FIXME: put as its own method?
        # Scan for assertions. Later we will
        # turn 'LOAD_GLOBAL' to 'LOAD_ASSERT'.
        # 'LOAD_ASSERT' is used in assert statements.
        self.load_asserts = set()
        bs = list(bytecode)
        n = len(bs)
        for i in range(n):
            inst = bs[i]

            # We need to detect the difference between
            # "raise AssertionError" and "assert"
            # If we have a JUMP_FORWARD after the
            # RAISE_VARARGS then we have a "raise" statement
            # else we have an "assert" statement.
            if inst.opname == 'POP_JUMP_IF_TRUE' and i+1 < n:
                next_inst = bs[i+1]
                if (next_inst.opname == 'LOAD_GLOBAL' and
                    next_inst.argval == 'AssertionError'):
                    for j in range(i+2, n):
                        raise_inst = bs[j]
                        if raise_inst.opname.startswith('RAISE_VARARGS'):
                            if j+1 >= n or bs[j+1].opname != 'JUMP_FORWARD':
                                self.load_asserts.add(next_inst.offset)
                                pass
                            break
                    pass
                pass

        # Get jump targets
        # Format: {target offset: [jump offsets]}
        jump_targets = self.find_jump_targets()

        for inst in bytecode:

            argval = inst.argval
            if inst.offset in jump_targets:
                jump_idx = 0
                for jump_offset in jump_targets[inst.offset]:
                    tokens.append(Token('COME_FROM', None, repr(jump_offset),
                                        offset='%s_%s' % (inst.offset, jump_idx),
                                        has_arg = True, opc=self.opc))
                    jump_idx += 1
                    pass
                pass

            pattr =  inst.argrepr
            opname = inst.opname
            op = inst.opcode

            if opname in ['LOAD_CONST']:
                const = inst.argval
                if iscode(const):
                    if const.co_name == '<lambda>':
                        opname = 'LOAD_LAMBDA'
                    elif const.co_name == '<genexpr>':
                        opname = 'LOAD_GENEXPR'
                    elif const.co_name == '<dictcomp>':
                        opname = 'LOAD_DICTCOMP'
                    elif const.co_name == '<setcomp>':
                        opname = 'LOAD_SETCOMP'
                    elif const.co_name == '<listcomp>':
                        opname = 'LOAD_LISTCOMP'
                    # verify() uses 'pattr' for comparison, since 'attr'
                    # now holds Code(const) and thus can not be used
                    # for comparison (todo: think about changing this)
                    # pattr = 'code_object @ 0x%x %s->%s' %\
                    # (id(const), const.co_filename, const.co_name)
                    pattr = '<code_object ' + const.co_name + '>'
                else:
                    pattr = const
                    pass
            elif opname in ('MAKE_FUNCTION', 'MAKE_CLOSURE'):
                pos_args, name_pair_args, annotate_args = parse_fn_counts(inst.argval)
                if name_pair_args > 0:
                    opname = '%s_N%d' % (opname, name_pair_args)
                    pass
                if annotate_args > 0:
                    opname = '%s_A_%d' % [opname, annotate_args]
                    pass
                opname = '%s_%d' % (opname, pos_args)
                pattr = ("%d positional, %d keyword pair, %d annotated" %
                             (pos_args, name_pair_args, annotate_args))
                tokens.append(
                    Token(
                        type_ = opname,
                        attr = (pos_args, name_pair_args, annotate_args),
                        pattr = pattr,
                        offset = inst.offset,
                        linestart = inst.starts_line,
                        op = op,
                        has_arg = op_has_argument(op, op3),
                        opc = self.opc
                    )
                )
                continue
            elif op in self.varargs_ops:
                pos_args = inst.argval
                if self.is_pypy and not pos_args and opname == 'BUILD_MAP':
                    opname = 'BUILD_MAP_n'
                else:
                    opname = '%s_%d' % (opname, pos_args)
            elif self.is_pypy and opname in ('CALL_METHOD', 'JUMP_IF_NOT_DEBUG'):
                # The value in the dict is in special cases in semantic actions, such
                # as CALL_FUNCTION. The value is not used in these cases, so we put
                # in arbitrary value 0.
                customize[opname] = 0
            elif opname == 'UNPACK_EX':
                # FIXME: try with scanner and parser by
                # changing inst.argval
                before_args = inst.argval & 0xFF
                after_args = (inst.argval >> 8) & 0xff
                pattr = "%d before vararg, %d after" % (before_args, after_args)
                argval = (before_args, after_args)
                opname = '%s_%d+%d' % (opname, before_args, after_args)
            elif op == self.opc.JUMP_ABSOLUTE:
                # Further classify JUMP_ABSOLUTE into backward jumps
                # which are used in loops, and "CONTINUE" jumps which
                # may appear in a "continue" statement.  The loop-type
                # and continue-type jumps will help us classify loop
                # boundaries The continue-type jumps help us get
                # "continue" statements with would otherwise be turned
                # into a "pass" statement because JUMPs are sometimes
                # ignored in rules as just boundary overhead. In
                # comprehensions we might sometimes classify JUMP_BACK
                # as CONTINUE, but that's okay since we add a grammar
                # rule for that.
                pattr = inst.argval
                target = self.get_target(inst.offset)
                if target <= inst.offset:
                    next_opname = self.opname[self.code[inst.offset+3]]
                    if (inst.offset in self.stmts and
                        next_opname not in ('END_FINALLY', 'POP_BLOCK')
                        and inst.offset not in self.not_continue):
                        opname = 'CONTINUE'
                    else:
                        opname = 'JUMP_BACK'
                        # FIXME: this is a hack to catch stuff like:
                        #   if x: continue
                        # the "continue" is not on a new line.
                        # There are other situations were we don't catch
                        # CONTINUE as well.
                        if tokens[-1].type == 'JUMP_BACK':
                            tokens[-1].type = intern('CONTINUE')

            elif op == self.opc.RETURN_VALUE:
                if inst.offset in self.return_end_ifs:
                    opname = 'RETURN_END_IF'
            elif inst.offset in self.load_asserts:
                opname = 'LOAD_ASSERT'

            tokens.append(
                Token(
                    type_ = opname,
                    attr = argval,
                    pattr = pattr,
                    offset = inst.offset,
                    linestart = inst.starts_line,
                    op = op,
                    has_arg = (op >= op3.HAVE_ARGUMENT),
                    opc = self.opc
                    )
                )
            pass

        if show_asm in ('both', 'after'):
            for t in tokens:
                print(t)
            print()
        return tokens, customize
def make_function3_annotate(self,
                            node,
                            is_lambda,
                            nested=1,
                            code_node=None,
                            annotate_last=-1):
    """
    Dump function defintion, doc string, and function
    body. This code is specialized for Python 3"""
    def build_param(ast, name, default):
        """build parameters:
            - handle defaults
            - handle format tuple parameters
        """
        if default:
            value = self.traverse(default, indent='')
            maybe_show_tree_param_default(self, name, value)
            result = '%s=%s' % (name, value)
            if result[-2:] == '= ':  # default was 'LOAD_CONST None'
                result += 'None'
            return result
        else:
            return name

    # MAKE_FUNCTION_... or MAKE_CLOSURE_...
    assert node[-1].kind.startswith('MAKE_')

    annotate_tuple = None
    for annotate_last in range(len(node) - 1, -1, -1):
        if node[annotate_last] == 'annotate_tuple':
            annotate_tuple = node[annotate_last]
            break
    annotate_args = {}

    if (annotate_tuple == 'annotate_tuple'
            and annotate_tuple[0] in ('LOAD_CONST', 'LOAD_NAME')
            and isinstance(annotate_tuple[0].attr, tuple)):
        annotate_tup = annotate_tuple[0].attr
        i = -1
        j = annotate_last - 1
        l = -len(node)
        while j >= l and node[j].kind in ('annotate_arg', 'annotate_tuple'):
            annotate_args[annotate_tup[i]] = node[j][0]
            i -= 1
            j -= 1

    args_node = node[-1]
    if isinstance(args_node.attr, tuple):
        # positional args are before kwargs
        defparams = node[:args_node.attr[0]]
        pos_args, kw_args, annotate_argc = args_node.attr
        if 'return' in annotate_args.keys():
            annotate_argc = len(annotate_args) - 1
    else:
        defparams = node[:args_node.attr]
        kw_args = 0
        annotate_argc = 0
        pass

    annotate_dict = {}

    for name in annotate_args.keys():
        n = self.traverse(annotate_args[name], indent='')
        annotate_dict[name] = n

    if 3.0 <= self.version <= 3.2:
        lambda_index = -2
    elif 3.03 <= self.version:
        lambda_index = -3
    else:
        lambda_index = None

    if lambda_index and is_lambda and iscode(node[lambda_index].attr):
        assert node[lambda_index].kind == 'LOAD_LAMBDA'
        code = node[lambda_index].attr
    else:
        code = code_node.attr

    assert iscode(code)
    code = Code(code, self.scanner, self.currentclass)

    # add defaults values to parameter names
    argc = code.co_argcount
    kwonlyargcount = code.co_kwonlyargcount

    paramnames = list(code.co_varnames[:argc])
    if kwonlyargcount > 0:
        kwargs = list(code.co_varnames[argc:argc + kwonlyargcount])

    try:
        ast = self.build_ast(code._tokens,
                             code._customize,
                             is_lambda=is_lambda,
                             noneInNames=('None' in code.co_names))
    except (ParserError, ParserError2) as p:
        self.write(str(p))
        if not self.tolerate_errors:
            self.ERROR = p
        return

    kw_pairs = args_node.attr[1]
    indent = self.indent

    if is_lambda:
        self.write("lambda ")
    else:
        self.write("(")

    last_line = self.f.getvalue().split("\n")[-1]
    l = len(last_line)
    indent = ' ' * l
    line_number = self.line_number

    i = len(paramnames) - len(defparams)
    suffix = ''

    for param in paramnames[:i]:
        self.write(suffix, param)
        suffix = ', '
        if param in annotate_dict:
            self.write(': %s' % annotate_dict[param])
            if (line_number != self.line_number):
                suffix = ",\n" + indent
                line_number = self.line_number
            # value, string = annotate_args[param]
            # if string:
            #     self.write(': "%s"' % value)
            # else:
            #     self.write(': %s' % value)

    suffix = ', ' if i > 0 else ''
    for n in node:
        if n == 'pos_arg':
            self.write(suffix)
            param = paramnames[i]
            self.write(param)
            if param in annotate_args:
                aa = annotate_args[param]
                if isinstance(aa, tuple):
                    aa = aa[0]
                    self.write(': "%s"' % aa)
                elif isinstance(aa, SyntaxTree):
                    self.write(': ')
                    self.preorder(aa)

            self.write('=')
            i += 1
            self.preorder(n)
            if (line_number != self.line_number):
                suffix = ",\n" + indent
                line_number = self.line_number
            else:
                suffix = ', '

    if code_has_star_arg(code):
        star_arg = code.co_varnames[argc + kwonlyargcount]
        if annotate_dict and star_arg in annotate_dict:
            self.write(suffix, '*%s: %s' % (star_arg, annotate_dict[star_arg]))
        else:
            self.write(suffix, '*%s' % star_arg)
        argc += 1

    # self.println(indent, '#flags:\t', int(code.co_flags))
    ends_in_comma = False
    if kwonlyargcount > 0:
        if not code_has_star_arg(code):
            if argc > 0:
                self.write(", *, ")
            else:
                self.write("*, ")
            pass
            ends_in_comma = True
        else:
            if argc > 0:
                self.write(", ")
                ends_in_comma = True

        kw_args = [None] * kwonlyargcount

        for n in node:
            if n == 'kwargs':
                n = n[0]
            if n == 'kwarg':
                name = eval(n[0].pattr)
                idx = kwargs.index(name)
                default = self.traverse(n[1], indent='')
                if annotate_dict and name in annotate_dict:
                    kw_args[idx] = '%s: %s=%s' % (name, annotate_dict[name],
                                                  default)
                else:
                    kw_args[idx] = '%s=%s' % (name, default)
                pass
            pass

        # handling other args
        other_kw = [c == None for c in kw_args]
        for i, flag in enumerate(other_kw):
            if flag:
                n = kwargs[i]
                if n in annotate_dict:
                    kw_args[i] = "%s: %s" % (n, annotate_dict[n])
                else:
                    kw_args[i] = "%s" % n

        self.write(', '.join(kw_args))
        ends_in_comma = False

    else:
        if argc == 0:
            ends_in_comma = True

    if code_has_star_star_arg(code):
        if not ends_in_comma:
            self.write(', ')
        star_star_arg = code.co_varnames[argc + kwonlyargcount]
        if annotate_dict and star_star_arg in annotate_dict:
            self.write('**%s: %s' %
                       (star_star_arg, annotate_dict[star_star_arg]))
        else:
            self.write('**%s' % star_star_arg)

    if is_lambda:
        self.write(": ")
    else:
        self.write(')')
        if 'return' in annotate_tuple[0].attr:
            if (line_number != self.line_number) and not no_paramnames:
                self.write("\n" + indent)
                line_number = self.line_number
            self.write(' -> ')
            # value, string = annotate_args['return']
            # if string:
            #     self.write(' -> "%s"' % value)
            # else:
            #     self.write(' -> %s' % value)
            self.preorder(node[annotate_last - 1])

        self.println(":")

    if (len(code.co_consts) > 0 and code.co_consts[0] is not None
            and not is_lambda):  # ugly
        # docstring exists, dump it
        print_docstring(self, self.indent, code.co_consts[0])

    code._tokens = None  # save memory
    assert ast == 'stmts'

    all_globals = find_all_globals(ast, set())
    globals, nonlocals = find_globals_and_nonlocals(ast, set(), set(), code,
                                                    self.version)
    for g in sorted((all_globals & self.mod_globs) | globals):
        self.println(self.indent, 'global ', g)
    for nl in sorted(nonlocals):
        self.println(self.indent, 'nonlocal ', nl)
    self.mod_globs -= all_globals
    has_none = 'None' in code.co_names
    rn = has_none and not find_none(ast)
    self.gen_source(ast,
                    code.co_name,
                    code._customize,
                    is_lambda=is_lambda,
                    returnNone=rn)
    code._tokens = code._customize = None  # save memory
Example #34
0
    def n_classdef3(node):
        # class definition ('class X(A,B,C):')
        cclass = self.currentclass

        # Pick out various needed bits of information
        # * class_name - the name of the class
        # * subclass_info - the parameters to the class  e.g.
        #      class Foo(bar, baz)
        #               ----------
        # * subclass_code - the code for the subclass body
        subclass_info = None
        if node == 'classdefdeco2':
            if self.version >= 3.6:
                class_name = node[1][1].pattr
            elif self.version <= 3.3:
                class_name = node[2][0].pattr
            else:
                class_name = node[1][2].pattr
            build_class = node
        else:
            build_class = node[0]
            if self.version >= 3.6:
                if build_class == 'build_class_kw':
                    mkfunc = build_class[1]
                    assert mkfunc == 'mkfunc'
                    subclass_info = build_class
                    if hasattr(mkfunc[0], 'attr') and iscode(mkfunc[0].attr):
                        subclass_code = mkfunc[0].attr
                    else:
                        assert mkfunc[0] == 'load_closure'
                        subclass_code = mkfunc[1].attr
                        assert iscode(subclass_code)
                if build_class[1][0] == 'load_closure':
                    code_node = build_class[1][1]
                else:
                    code_node = build_class[1][0]
                class_name = code_node.attr.co_name
            else:
                class_name = node[1][0].pattr
                build_class = node[0]

        assert 'mkfunc' == build_class[1]
        mkfunc = build_class[1]
        if mkfunc[0] in ('kwargs', 'no_kwargs'):
            if 3.0 <= self.version <= 3.2:
                for n in mkfunc:
                    if hasattr(n, 'attr') and iscode(n.attr):
                        subclass_code = n.attr
                        break
                    elif n == 'expr':
                        subclass_code = n[0].attr
                    pass
                pass
            else:
                for n in mkfunc:
                    if hasattr(n, 'attr') and iscode(n.attr):
                        subclass_code = n.attr
                        break
                    pass
                pass
            if node == 'classdefdeco2':
                subclass_info = node
            else:
                subclass_info = node[0]
        elif build_class[1][0] == 'load_closure':
            # Python 3 with closures not functions
            load_closure = build_class[1]
            if hasattr(load_closure[-3], 'attr'):
                # Python 3.3 classes with closures work like this.
                # Note have to test before 3.2 case because
                # index -2 also has an attr.
                subclass_code = load_closure[-3].attr
            elif hasattr(load_closure[-2], 'attr'):
                # Python 3.2 works like this
                subclass_code = load_closure[-2].attr
            else:
                raise 'Internal Error n_classdef: cannot find class body'
            if hasattr(build_class[3], '__len__'):
                if not subclass_info:
                    subclass_info = build_class[3]
            elif hasattr(build_class[2], '__len__'):
                subclass_info = build_class[2]
            else:
                raise 'Internal Error n_classdef: cannot superclass name'
        elif self.version >= 3.6 and node == 'classdefdeco2':
            subclass_info = node
            subclass_code = build_class[1][0].attr
        elif not subclass_info:
            if mkfunc[0] in ('no_kwargs', 'kwargs'):
                subclass_code = mkfunc[1].attr
            else:
                subclass_code = mkfunc[0].attr
            if node == 'classdefdeco2':
                subclass_info = node
            else:
                subclass_info = node[0]

        if (node == 'classdefdeco2'):
            self.write('\n')
        else:
            self.write('\n\n')

        self.currentclass = str(class_name)
        self.write(self.indent, 'class ', self.currentclass)

        self.print_super_classes3(subclass_info)
        self.println(':')

        # class body
        self.indent_more()
        self.build_class(subclass_code)
        self.indent_less()

        self.currentclass = cclass
        if len(self.param_stack) > 1:
            self.write('\n\n')
        else:
            self.write('\n\n\n')

        self.prune()
Example #35
0
def cmp_code_objects(version, is_pypy, code_obj1, code_obj2, verify, name=""):
    """
    Compare two code-objects.

    This is the main part of this module.
    """
    # print code_obj1, type(code_obj2)
    assert iscode(
        code_obj1
    ), "cmp_code_object first object type is %s, not code" % type(code_obj1)
    assert iscode(
        code_obj2
    ), "cmp_code_object second object type is %s, not code" % type(code_obj2)
    # print dir(code_obj1)
    if isinstance(code_obj1, object):
        # new style classes (Python 2.2)
        # assume _both_ code objects to be new stle classes
        assert dir(code_obj1) == dir(code_obj2)
    else:
        # old style classes
        assert dir(code_obj1) == code_obj1.__members__
        assert dir(code_obj2) == code_obj2.__members__
        assert code_obj1.__members__ == code_obj2.__members__

    if name == "__main__":
        name = code_obj1.co_name
    else:
        name = "%s.%s" % (name, code_obj1.co_name)
        if name == ".?":
            name = "__main__"

    if isinstance(code_obj1, object) and code_equal(code_obj1, code_obj2):
        # use the new style code-classes' __cmp__ method, which
        # should be faster and more sophisticated
        # if this compare fails, we use the old routine to
        # find out, what exactly is nor equal
        # if this compare succeds, simply return
        # return
        pass

    if isinstance(code_obj1, object):
        members = [x for x in dir(code_obj1) if x.startswith("co_")]
    else:
        members = dir(code_obj1)
    members.sort()  # ; members.reverse()

    tokens1 = None
    for member in members:
        if member in __IGNORE_CODE_MEMBERS__ or verify != "verify":
            pass
        elif member == "co_code":
            if verify != "strong":
                continue
            scanner = get_scanner(version, is_pypy, show_asm=False)

            global JUMP_OPS
            JUMP_OPS = list(scan.JUMP_OPS) + ["JUMP_BACK"]

            # use changed Token class
            # We (re)set this here to save exception handling,
            # which would get confusing.
            scanner.setTokenClass(Token)
            try:
                # ingest both code-objects
                tokens1, customize = scanner.ingest(code_obj1)
                del customize  # save memory
                tokens2, customize = scanner.ingest(code_obj2)
                del customize  # save memory
            finally:
                scanner.resetTokenClass()  # restore Token class

            targets1 = dis.findlabels(code_obj1.co_code)
            tokens1 = [t for t in tokens1 if t.kind != "COME_FROM"]
            tokens2 = [t for t in tokens2 if t.kind != "COME_FROM"]

            i1 = 0
            i2 = 0
            offset_map = {}
            check_jumps = {}
            while i1 < len(tokens1):
                if i2 >= len(tokens2):
                    if (len(tokens1) == len(tokens2) + 2
                            and tokens1[-1].kind == "RETURN_VALUE"
                            and tokens1[-2].kind == "LOAD_CONST"
                            and tokens1[-2].pattr is None
                            and tokens1[-3].kind == "RETURN_VALUE"):
                        break
                    else:
                        raise CmpErrorCodeLen(name, tokens1, tokens2)

                offset_map[tokens1[i1].offset] = tokens2[i2].offset

                for idx1, idx2, offset2 in check_jumps.get(
                        tokens1[i1].offset, []):
                    if offset2 != tokens2[i2].offset:
                        raise CmpErrorCode(
                            name,
                            tokens1[idx1].offset,
                            tokens1[idx1],
                            tokens2[idx2],
                            tokens1,
                            tokens2,
                        )

                if tokens1[i1].kind != tokens2[i2].kind:
                    if tokens1[i1].kind == "LOAD_CONST" == tokens2[i2].kind:
                        i = 1
                        while tokens1[i1 + i].kind == "LOAD_CONST":
                            i += 1
                        if tokens1[i1 + i].kind.startswith(
                            ("BUILD_TUPLE", "BUILD_LIST")) and i == int(
                                tokens1[i1 + i].kind.split("_")[-1]):
                            t = tuple(
                                [elem.pattr for elem in tokens1[i1:i1 + i]])
                            if t != tokens2[i2].pattr:
                                raise CmpErrorCode(
                                    name,
                                    tokens1[i1].offset,
                                    tokens1[i1],
                                    tokens2[i2],
                                    tokens1,
                                    tokens2,
                                )
                            i1 += i + 1
                            i2 += 1
                            continue
                        elif (i == 2 and tokens1[i1 + i].kind == "ROT_TWO"
                              and tokens2[i2 + 1].kind == "UNPACK_SEQUENCE_2"):
                            i1 += 3
                            i2 += 2
                            continue
                        elif i == 2 and tokens1[i1 + i].kind in BIN_OP_FUNCS:
                            f = BIN_OP_FUNCS[tokens1[i1 + i].kind]
                            if (f(tokens1[i1].pattr,
                                  tokens1[i1 + 1].pattr) == tokens2[i2].pattr):
                                i1 += 3
                                i2 += 1
                                continue
                    elif tokens1[i1].kind == "UNARY_NOT":
                        if tokens2[i2].kind == "POP_JUMP_IF_TRUE":
                            if tokens1[i1 + 1].kind == "POP_JUMP_IF_FALSE":
                                i1 += 2
                                i2 += 1
                                continue
                        elif tokens2[i2].kind == "POP_JUMP_IF_FALSE":
                            if tokens1[i1 + 1].kind == "POP_JUMP_IF_TRUE":
                                i1 += 2
                                i2 += 1
                                continue
                    elif (tokens1[i1].kind in ("JUMP_FORWARD", "JUMP_BACK")
                          and tokens1[i1 - 1].kind == "RETURN_VALUE"
                          and tokens2[i2 - 1].kind
                          in ("RETURN_VALUE", "RETURN_END_IF")
                          and int(tokens1[i1].offset) not in targets1):
                        i1 += 1
                        continue
                    elif (tokens1[i1].kind == "JUMP_BACK"
                          and tokens2[i2].kind == "CONTINUE"):
                        # FIXME: should make sure that offset is inside loop, not outside of it
                        i1 += 2
                        i2 += 2
                        continue
                    elif (tokens1[i1].kind == "JUMP_FORWARD"
                          and tokens2[i2].kind == "JUMP_BACK"
                          and tokens1[i1 + 1].kind == "JUMP_BACK"
                          and tokens2[i2 + 1].kind == "JUMP_BACK"
                          and int(tokens1[i1].pattr)
                          == int(tokens1[i1].offset) + 3):
                        if int(tokens1[i1].pattr) == int(tokens1[i1 +
                                                                 1].offset):
                            i1 += 2
                            i2 += 2
                            continue
                    elif (tokens1[i1].kind == "LOAD_NAME"
                          and tokens2[i2].kind == "LOAD_CONST"
                          and tokens1[i1].pattr == "None"
                          and tokens2[i2].pattr is None):
                        pass
                    elif (tokens1[i1].kind == "LOAD_GLOBAL"
                          and tokens2[i2].kind == "LOAD_NAME"
                          and tokens1[i1].pattr == tokens2[i2].pattr):
                        pass
                    elif (tokens1[i1].kind == "LOAD_ASSERT"
                          and tokens2[i2].kind == "LOAD_NAME"
                          and tokens1[i1].pattr == tokens2[i2].pattr):
                        pass
                    elif (tokens1[i1].kind == "RETURN_VALUE"
                          and tokens2[i2].kind == "RETURN_END_IF"):
                        pass
                    elif (tokens1[i1].kind == "BUILD_TUPLE_0"
                          and tokens2[i2].pattr == ()):
                        pass
                    else:
                        raise CmpErrorCode(
                            name,
                            tokens1[i1].offset,
                            tokens1[i1],
                            tokens2[i2],
                            tokens1,
                            tokens2,
                        )
                elif (tokens1[i1].kind in JUMP_OPS
                      and tokens1[i1].pattr != tokens2[i2].pattr):
                    if tokens1[i1].kind == "JUMP_BACK":
                        dest1 = int(tokens1[i1].pattr)
                        dest2 = int(tokens2[i2].pattr)
                        if offset_map[dest1] != dest2:
                            raise CmpErrorCode(
                                name,
                                tokens1[i1].offset,
                                tokens1[i1],
                                tokens2[i2],
                                tokens1,
                                tokens2,
                            )
                    else:
                        # import pdb; pdb.set_trace()
                        try:
                            dest1 = int(tokens1[i1].pattr)
                            if dest1 in check_jumps:
                                check_jumps[dest1].append((i1, i2, dest2))
                            else:
                                check_jumps[dest1] = [(i1, i2, dest2)]
                        except:
                            pass

                i1 += 1
                i2 += 1
            del tokens1, tokens2  # save memory
        elif member == "co_consts":
            # partial optimization can make the co_consts look different,
            #   so we'll just compare the code consts
            codes1 = (c for c in code_obj1.co_consts
                      if hasattr(c, "co_consts"))
            codes2 = (c for c in code_obj2.co_consts
                      if hasattr(c, "co_consts"))

            for c1, c2 in zip(codes1, codes2):
                cmp_code_objects(version, is_pypy, c1, c2, verify, name=name)
        elif member == "co_flags":
            flags1 = code_obj1.co_flags
            flags2 = code_obj2.co_flags
            if is_pypy:
                # For PYPY for now we don't care about PYPY_SOURCE_IS_UTF8:
                flags2 &= ~0x0100  # PYPY_SOURCE_IS_UTF8
            # We also don't care about COROUTINE or GENERATOR for now
            flags1 &= ~0x000000A0
            flags2 &= ~0x000000A0
            if flags1 != flags2:
                raise CmpErrorMember(name, "co_flags", pretty_flags(flags1),
                                     pretty_flags(flags2))
        else:
            # all other members must be equal
            if getattr(code_obj1, member) != getattr(code_obj2, member):
                raise CmpErrorMember(name, member, getattr(code_obj1, member),
                                     getattr(code_obj2, member))
def make_function3(self, node, isLambda, nested=1, codeNode=None):
    """Dump function definition, doc string, and function body."""

    # FIXME: call make_function3 if we are self.version >= 3.0
    # and then simplify the below.

    def build_param(ast, name, default):
        """build parameters:
            - handle defaults
            - handle format tuple parameters
        """
        if default:
            value = self.traverse(default, indent='')
            maybe_show_ast_param_default(self.showast, name, value)
            result = '%s=%s' % (name, value)
            if result[-2:] == '= ':  # default was 'LOAD_CONST None'
                result += 'None'
            return result
        else:
            return name

    # MAKE_FUNCTION_... or MAKE_CLOSURE_...
    assert node[-1].type.startswith('MAKE_')

    args_node = node[-1]
    if isinstance(args_node.attr, tuple):
        if self.version <= 3.3 and len(node) > 2 and node[-3] != 'LOAD_LAMBDA':
            # positional args are after kwargs
            defparams = node[1:args_node.attr[0] + 1]
        else:
            # positional args are before kwargs
            defparams = node[:args_node.attr[0]]
        pos_args, kw_args, annotate_argc = args_node.attr
    else:
        defparams = node[:args_node.attr]
        kw_args = 0
        pass

    if 3.0 <= self.version <= 3.2:
        lambda_index = -2
    elif 3.03 <= self.version:
        lambda_index = -3
    else:
        lambda_index = None

    if lambda_index and isLambda and iscode(node[lambda_index].attr):
        assert node[lambda_index].type == 'LOAD_LAMBDA'
        code = node[lambda_index].attr
    else:
        code = codeNode.attr

    assert iscode(code)
    code = Code(code, self.scanner, self.currentclass)

    # add defaults values to parameter names
    argc = code.co_argcount
    paramnames = list(code.co_varnames[:argc])

    # defaults are for last n parameters, thus reverse
    if not 3.0 <= self.version <= 3.2:
        paramnames.reverse()
        defparams.reverse()

    try:
        ast = self.build_ast(code._tokens,
                             code._customize,
                             isLambda=isLambda,
                             noneInNames=('None' in code.co_names))
    except ParserError as p:
        self.write(str(p))
        self.ERROR = p
        return

    kw_pairs = args_node.attr[1] if self.version >= 3.0 else 0
    indent = self.indent

    # build parameters
    if self.version != 3.2:
        params = [
            build_param(ast, name, default) for name, default in zip_longest(
                paramnames, defparams, fillvalue=None)
        ]
        params.reverse()  # back to correct order

        if code_has_star_arg(code):
            if self.version > 3.0:
                params.append('*%s' % code.co_varnames[argc + kw_pairs])
            else:
                params.append('*%s' % code.co_varnames[argc])
            argc += 1

        # dump parameter list (with default values)
        if isLambda:
            self.write("lambda ", ", ".join(params))
        else:
            self.write("(", ", ".join(params))
        # self.println(indent, '#flags:\t', int(code.co_flags))

    else:
        if isLambda:
            self.write("lambda ")
        else:
            self.write("(")
            pass

        last_line = self.f.getvalue().split("\n")[-1]
        l = len(last_line)
        indent = ' ' * l
        line_number = self.line_number

        if code_has_star_arg(code):
            self.write('*%s' % code.co_varnames[argc + kw_pairs])
            argc += 1

        i = len(paramnames) - len(defparams)
        self.write(", ".join(paramnames[:i]))
        suffix = ', ' if i > 0 else ''
        for n in node:
            if n == 'pos_arg':
                self.write(suffix)
                self.write(paramnames[i] + '=')
                i += 1
                self.preorder(n)
                if (line_number != self.line_number):
                    suffix = ",\n" + indent
                    line_number = self.line_number
                else:
                    suffix = ', '

    if kw_args > 0:
        if not (4 & code.co_flags):
            if argc > 0:
                self.write(", *, ")
            else:
                self.write("*, ")
            pass
        else:
            self.write(", ")

        if not 3.0 <= self.version <= 3.2:
            for n in node:
                if n == 'pos_arg':
                    continue
                elif self.version >= 3.4 and n.type != 'kwargs':
                    continue
                else:
                    self.preorder(n)
                break
        else:
            kwargs = node[0]
            last = len(kwargs) - 1
            i = 0
            for n in node[0]:
                if n == 'kwarg':
                    self.write('%s=' % n[0].pattr)
                    self.preorder(n[1])
                    if i < last:
                        self.write(', ')
                    i += 1
                    pass
                pass
            pass
        pass

    if code_has_star_star_arg(code):
        if argc > 0:
            self.write(', ')
        self.write('**%s' % code.co_varnames[argc + kw_pairs])

    if isLambda:
        self.write(": ")
    else:
        self.println("):")

    if len(code.co_consts
           ) > 0 and code.co_consts[0] is not None and not isLambda:  # ugly
        # docstring exists, dump it
        print_docstring(self, self.indent, code.co_consts[0])

    code._tokens = None  # save memory
    assert ast == 'stmts'

    all_globals = find_all_globals(ast, set())
    for g in ((all_globals & self.mod_globs) | find_globals(ast, set())):
        self.println(self.indent, 'global ', g)
    self.mod_globs -= all_globals
    has_none = 'None' in code.co_names
    rn = has_none and not find_none(ast)
    self.gen_source(ast,
                    code.co_name,
                    code._customize,
                    isLambda=isLambda,
                    returnNone=rn)
    code._tokens = None
    code._customize = None  # save memory
Example #37
0
def cmp_code_objects(version, is_pypy, code_obj1, code_obj2, name=''):
    """
    Compare two code-objects.

    This is the main part of this module.
    """
    # print code_obj1, type(code_obj2)
    assert iscode(code_obj1), \
      "cmp_code_object first object type is %s, not code" % type(code_obj1)
    assert iscode(code_obj2), \
      "cmp_code_object second object type is %s, not code" % type(code_obj2)
    # print dir(code_obj1)
    if isinstance(code_obj1, object):
        # new style classes (Python 2.2)
        # assume _both_ code objects to be new stle classes
        assert dir(code_obj1) == dir(code_obj2)
    else:
        # old style classes
        assert dir(code_obj1) == code_obj1.__members__
        assert dir(code_obj2) == code_obj2.__members__
        assert code_obj1.__members__ == code_obj2.__members__

    if name == '__main__':
        name = code_obj1.co_name
    else:
        name = '%s.%s' % (name, code_obj1.co_name)
        if name == '.?': name = '__main__'

    if isinstance(code_obj1, object) and code_equal(code_obj1, code_obj2):
        # use the new style code-classes' __cmp__ method, which
        # should be faster and more sophisticated
        # if this compare fails, we use the old routine to
        # find out, what exactly is nor equal
        # if this compare succeds, simply return
        # return
        pass

    if isinstance(code_obj1, object):
        members = [x for x in dir(code_obj1) if x.startswith('co_')]
    else:
        members = dir(code_obj1)
    members.sort()  # ; members.reverse()

    tokens1 = None
    for member in members:
        if member in __IGNORE_CODE_MEMBERS__:
            pass
        elif member == 'co_code':
            if version == 2.3:
                import uncompyle6.scanners.scanner23 as scan
                scanner = scan.Scanner26()
            elif version == 2.4:
                import uncompyle6.scanners.scanner24 as scan
                scanner = scan.Scanner25()
            elif version == 2.5:
                import uncompyle6.scanners.scanner25 as scan
                scanner = scan.Scanner25()
            elif version == 2.6:
                import uncompyle6.scanners.scanner26 as scan
                scanner = scan.Scanner26()
            elif version == 2.7:
                if is_pypy:
                    import uncompyle6.scanners.pypy27 as scan
                    scanner = scan.ScannerPyPy27(show_asm=False)
                else:
                    import uncompyle6.scanners.scanner27 as scan
                    scanner = scan.Scanner27()
            elif version == 3.2:
                if is_pypy:
                    import uncompyle6.scanners.pypy32 as scan
                    scanner = scan.ScannerPyPy32()
                else:
                    import uncompyle6.scanners.scanner32 as scan
                    scanner = scan.Scanner32()
            elif version == 3.3:
                import uncompyle6.scanners.scanner33 as scan
                scanner = scan.Scanner33()
            elif version == 3.4:
                import uncompyle6.scanners.scanner34 as scan
                scanner = scan.Scanner34()
            elif version == 3.5:
                import uncompyle6.scanners.scanner35 as scan
                scanner = scan.Scanner35()
            elif version == 3.6:
                import uncompyle6.scanners.scanner36 as scan
                scanner = scan.Scanner36()

            global JUMP_OPs
            JUMP_OPs = list(scan.JUMP_OPs) + ['JUMP_BACK']

            # use changed Token class
            # We (re)set this here to save exception handling,
            # which would get confusing.
            scanner.setTokenClass(Token)
            try:
                # disassemble both code-objects
                tokens1, customize = scanner.disassemble(code_obj1)
                del customize # save memory
                tokens2, customize = scanner.disassemble(code_obj2)
                del customize # save memory
            finally:
                scanner.resetTokenClass() # restore Token class

            targets1 = dis.findlabels(code_obj1.co_code)
            tokens1 = [t for t in tokens1 if t.type != 'COME_FROM']
            tokens2 = [t for t in tokens2 if t.type != 'COME_FROM']

            i1 = 0; i2 = 0
            offset_map = {}; check_jumps = {}
            while i1 < len(tokens1):
                if i2 >= len(tokens2):
                    if len(tokens1) == len(tokens2) + 2 \
                          and tokens1[-1].type == 'RETURN_VALUE' \
                          and tokens1[-2].type == 'LOAD_CONST' \
                          and tokens1[-2].pattr is None \
                          and tokens1[-3].type == 'RETURN_VALUE':
                        break
                    else:
                        raise CmpErrorCodeLen(name, tokens1, tokens2)

                offset_map[tokens1[i1].offset] = tokens2[i2].offset

                for idx1, idx2, offset2 in check_jumps.get(tokens1[i1].offset, []):
                    if offset2 != tokens2[i2].offset:
                        raise CmpErrorCode(name, tokens1[idx1].offset, tokens1[idx1],
                                   tokens2[idx2], tokens1, tokens2)

                if tokens1[i1].type != tokens2[i2].type:
                    if tokens1[i1].type == 'LOAD_CONST' == tokens2[i2].type:
                        i = 1
                        while tokens1[i1+i].type == 'LOAD_CONST':
                            i += 1
                        if tokens1[i1+i].type.startswith(('BUILD_TUPLE', 'BUILD_LIST')) \
                              and i == int(tokens1[i1+i].type.split('_')[-1]):
                            t = tuple([ elem.pattr for elem in tokens1[i1:i1+i] ])
                            if t != tokens2[i2].pattr:
                                raise CmpErrorCode(name, tokens1[i1].offset, tokens1[i1],
                                           tokens2[i2], tokens1, tokens2)
                            i1 += i + 1
                            i2 += 1
                            continue
                        elif i == 2 and tokens1[i1+i].type == 'ROT_TWO' and tokens2[i2+1].type == 'UNPACK_SEQUENCE_2':
                            i1 += 3
                            i2 += 2
                            continue
                        elif i == 2 and tokens1[i1+i].type in BIN_OP_FUNCS:
                            f = BIN_OP_FUNCS[tokens1[i1+i].type]
                            if f(tokens1[i1].pattr, tokens1[i1+1].pattr) == tokens2[i2].pattr:
                                i1 += 3
                                i2 += 1
                                continue
                    elif tokens1[i1].type == 'UNARY_NOT':
                        if tokens2[i2].type == 'POP_JUMP_IF_TRUE':
                            if tokens1[i1+1].type == 'POP_JUMP_IF_FALSE':
                                i1 += 2
                                i2 += 1
                                continue
                        elif tokens2[i2].type == 'POP_JUMP_IF_FALSE':
                            if tokens1[i1+1].type == 'POP_JUMP_IF_TRUE':
                                i1 += 2
                                i2 += 1
                                continue
                    elif tokens1[i1].type in ('JUMP_FORWARD', 'JUMP_BACK') \
                          and tokens1[i1-1].type == 'RETURN_VALUE' \
                          and tokens2[i2-1].type in ('RETURN_VALUE', 'RETURN_END_IF') \
                          and int(tokens1[i1].offset) not in targets1:
                        i1 += 1
                        continue
                    elif tokens1[i1].type == 'JUMP_FORWARD' and tokens2[i2].type == 'JUMP_BACK' \
                          and tokens1[i1+1].type == 'JUMP_BACK' and tokens2[i2+1].type == 'JUMP_BACK' \
                          and int(tokens1[i1].pattr) == int(tokens1[i1].offset) + 3:
                        if int(tokens1[i1].pattr) == int(tokens1[i1+1].offset):
                            i1 += 2
                            i2 += 2
                            continue

                    raise CmpErrorCode(name, tokens1[i1].offset, tokens1[i1],
                               tokens2[i2], tokens1, tokens2)
                elif tokens1[i1].type in JUMP_OPs and tokens1[i1].pattr != tokens2[i2].pattr:
                    dest1 = int(tokens1[i1].pattr)
                    dest2 = int(tokens2[i2].pattr)
                    if tokens1[i1].type == 'JUMP_BACK':
                        if offset_map[dest1] != dest2:
                            raise CmpErrorCode(name, tokens1[i1].offset, tokens1[i1],
                                       tokens2[i2], tokens1, tokens2)
                    else:
                        # import pdb; pdb.set_trace()
                        if dest1 in check_jumps:
                            check_jumps[dest1].append((i1, i2, dest2))
                        else:
                            check_jumps[dest1] = [(i1, i2, dest2)]

                i1 += 1
                i2 += 1
            del tokens1, tokens2 # save memory
        elif member == 'co_consts':
            # partial optimization can make the co_consts look different,
            #   so we'll just compare the code consts
            codes1 = ( c for c in code_obj1.co_consts if hasattr(c, 'co_consts') )
            codes2 = ( c for c in code_obj2.co_consts if hasattr(c, 'co_consts') )

            for c1, c2 in zip(codes1, codes2):
                cmp_code_objects(version, is_pypy, c1, c2, name=name)
        else:
            # all other members must be equal
            if getattr(code_obj1, member) != getattr(code_obj2, member):
                raise CmpErrorMember(name, member,
                             getattr(code_obj1, member),
                             getattr(code_obj2, member))
Example #38
0
    def disassemble(self, co, classname=None, code_objects={}, show_asm=None):
        """
        Disassemble a Python 2 code object, returning a list of 'Token'.
        Various tranformations are made to assist the deparsing grammar.
        For example:
           -  various types of LOAD_CONST's are categorized in terms of what they load
           -  COME_FROM instructions are added to assist parsing control structures
           -  MAKE_FUNCTION and FUNCTION_CALLS append the number of positional aruments
        The main part of this procedure is modelled after
        dis.disassemble().
        """

        show_asm = self.show_asm if not show_asm else show_asm
        # show_asm = 'before'
        if show_asm in ('both', 'before'):
            from xdis.bytecode import Bytecode
            bytecode = Bytecode(co, self.opc)
            for instr in bytecode.get_instructions(co):
                print(instr._disassemble())

        # Container for tokens
        tokens = []

        customize = {}
        Token = self.Token # shortcut

        n = self.setup_code(co)

        self.build_lines_data(co, n)
        self.build_prev_op(n)

        # self.lines contains (block,addrLastInstr)
        if classname:
            classname = '_' + classname.lstrip('_') + '__'

            def unmangle(name):
                if name.startswith(classname) and name[-2:] != '__':
                    return name[len(classname) - 2:]
                return name

            free = [ unmangle(name) for name in (co.co_cellvars + co.co_freevars) ]
            names = [ unmangle(name) for name in co.co_names ]
            varnames = [ unmangle(name) for name in co.co_varnames ]
        else:
            free = co.co_cellvars + co.co_freevars
            names = co.co_names
            varnames = co.co_varnames
        self.names = names

        # Scan for assertions. Later we will
        # turn 'LOAD_GLOBAL' to 'LOAD_ASSERT'.
        # 'LOAD_ASSERT' is used in assert statements.
        self.load_asserts = set()
        for i in self.op_range(0, n):
            # We need to detect the difference between
            # "raise AssertionError" and
            # "assert"
            if self.code[i] == self.opc.PJIT and self.code[i+3] == self.opc.LOAD_GLOBAL:
                if names[self.get_argument(i+3)] == 'AssertionError':
                    self.load_asserts.add(i+3)

        cf = self.find_jump_targets()
        # contains (code, [addrRefToCode])
        last_stmt = self.next_stmt[0]
        i = self.next_stmt[last_stmt]
        replace = {}
        while i < n-1:
            if self.lines[last_stmt].next > i:
                if self.code[last_stmt] == self.opc.PRINT_ITEM:
                    if self.code[i] == self.opc.PRINT_ITEM:
                        replace[i] = 'PRINT_ITEM_CONT'
                    elif self.code[i] == self.opc.PRINT_NEWLINE:
                        replace[i] = 'PRINT_NEWLINE_CONT'
            last_stmt = i
            i = self.next_stmt[i]

        extended_arg = 0
        for offset in self.op_range(0, n):
            if offset in cf:
                k = 0
                for j in cf[offset]:
                    tokens.append(Token(
                        'COME_FROM', None, repr(j),
                        offset="%s_%d" % (offset, k),
                        has_arg = True))
                    k += 1

            op = self.code[offset]
            opname = self.opc.opname[op]

            oparg = None; pattr = None
            has_arg = (op >= self.opc.HAVE_ARGUMENT)
            if has_arg:
                oparg = self.get_argument(offset) + extended_arg
                extended_arg = 0
                if op == self.opc.EXTENDED_ARG:
                    extended_arg = oparg * scan.L65536
                    continue
                if op in self.opc.hasconst:
                    const = co.co_consts[oparg]
                    if iscode(const):
                        oparg = const
                        if const.co_name == '<lambda>':
                            assert opname == 'LOAD_CONST'
                            opname = 'LOAD_LAMBDA'
                        elif const.co_name == '<genexpr>':
                            opname = 'LOAD_GENEXPR'
                        elif const.co_name == '<dictcomp>':
                            opname = 'LOAD_DICTCOMP'
                        elif const.co_name == '<setcomp>':
                            opname = 'LOAD_SETCOMP'
                        # verify() uses 'pattr' for comparison, since 'attr'
                        # now holds Code(const) and thus can not be used
                        # for comparison (todo: think about changing this)
                        # pattr = 'code_object @ 0x%x %s->%s' %\
                        # (id(const), const.co_filename, const.co_name)
                        pattr = '<code_object ' + const.co_name + '>'
                    else:
                        pattr = const
                elif op in self.opc.hasname:
                    pattr = names[oparg]
                elif op in self.opc.hasjrel:
                    pattr = repr(offset + 3 + oparg)
                elif op in self.opc.hasjabs:
                    pattr = repr(oparg)
                elif op in self.opc.haslocal:
                    pattr = varnames[oparg]
                elif op in self.opc.hascompare:
                    pattr = self.opc.cmp_op[oparg]
                elif op in self.opc.hasfree:
                    pattr = free[oparg]

            if op in self.varargs_ops:
                # CE - Hack for >= 2.5
                #      Now all values loaded via LOAD_CLOSURE are packed into
                #      a tuple before calling MAKE_CLOSURE.
                if op == self.opc.BUILD_TUPLE and \
                    self.code[self.prev[offset]] == self.opc.LOAD_CLOSURE:
                    continue
                else:
                    opname = '%s_%d' % (opname, oparg)
                    if op != self.opc.BUILD_SLICE:
                        customize[opname] = oparg
            elif op == self.opc.JUMP_ABSOLUTE:
                target = self.get_target(offset)
                if target < offset:
                    if (offset in self.stmts
                        and self.code[offset+3] not in (self.opc.END_FINALLY,
                                                        self.opc.POP_BLOCK)
                        and offset not in self.not_continue):
                        opname = 'CONTINUE'
                    else:
                        opname = 'JUMP_BACK'

            elif op == self.opc.LOAD_GLOBAL:
                if offset in self.load_asserts:
                    opname = 'LOAD_ASSERT'
            elif op == self.opc.RETURN_VALUE:
                if offset in self.return_end_ifs:
                    opname = 'RETURN_END_IF'

            if offset in self.linestartoffsets:
                linestart = self.linestartoffsets[offset]
            else:
                linestart = None

            if offset not in replace:
                tokens.append(Token(
                    opname, oparg, pattr, offset, linestart, op, has_arg))
            else:
                tokens.append(Token(
                    replace[offset], oparg, pattr, offset, linestart, op, has_arg))
                pass
            pass

        if show_asm in ('both', 'after'):
            for t in tokens:
                print(t.format())
            print()
        return tokens, customize
def make_function3_annotate(self,
                            node,
                            isLambda,
                            nested=1,
                            codeNode=None,
                            annotate_last=-1):
    """
    Dump function defintion, doc string, and function
    body. This code is specialized for Python 3"""
    def build_param(ast, name, default):
        """build parameters:
            - handle defaults
            - handle format tuple parameters
        """
        if default:
            value = self.traverse(default, indent='')
            maybe_show_ast_param_default(self.showast, name, value)
            result = '%s=%s' % (name, value)
            if result[-2:] == '= ':  # default was 'LOAD_CONST None'
                result += 'None'
            return result
        else:
            return name

    # MAKE_FUNCTION_... or MAKE_CLOSURE_...
    assert node[-1].type.startswith('MAKE_')

    annotate_tuple = None
    for annotate_last in range(len(node) - 1, -1, -1):
        if node[annotate_last] == 'annotate_tuple':
            annotate_tuple = node[annotate_last]
            break
    annotate_args = {}

    if (annotate_tuple == 'annotate_tuple'
            and annotate_tuple[0] in ('LOAD_CONST', 'LOAD_NAME')
            and isinstance(annotate_tuple[0].attr, tuple)):
        annotate_tup = annotate_tuple[0].attr
        i = -1
        j = annotate_last - 1
        l = -len(node)
        while j >= l and node[j].type in ('annotate_arg' 'annotate_tuple'):
            annotate_args[annotate_tup[i]] = node[j][0]
            i -= 1
            j -= 1

    args_node = node[-1]
    if isinstance(args_node.attr, tuple):
        # positional args are before kwargs
        defparams = node[:args_node.attr[0]]
        pos_args, kw_args, annotate_argc = args_node.attr
        if 'return' in annotate_args.keys():
            annotate_argc = len(annotate_args) - 1
    else:
        defparams = node[:args_node.attr]
        kw_args = 0
        annotate_argc = 0
        pass

    if 3.0 <= self.version <= 3.2:
        lambda_index = -2
    elif 3.03 <= self.version:
        lambda_index = -3
    else:
        lambda_index = None

    if lambda_index and isLambda and iscode(node[lambda_index].attr):
        assert node[lambda_index].type == 'LOAD_LAMBDA'
        code = node[lambda_index].attr
    else:
        code = codeNode.attr

    assert iscode(code)
    code = Code(code, self.scanner, self.currentclass)

    # add defaults values to parameter names
    argc = code.co_argcount
    paramnames = list(code.co_varnames[:argc])

    try:
        ast = self.build_ast(code._tokens,
                             code._customize,
                             isLambda=isLambda,
                             noneInNames=('None' in code.co_names))
    except ParserError as p:
        self.write(str(p))
        self.ERROR = p
        return

    kw_pairs = args_node.attr[1]
    indent = self.indent

    if isLambda:
        self.write("lambda ")
    else:
        self.write("(")

    last_line = self.f.getvalue().split("\n")[-1]
    l = len(last_line)
    indent = ' ' * l
    line_number = self.line_number

    if code_has_star_arg(code):
        self.write('*%s' % code.co_varnames[argc + kw_pairs])
        argc += 1

    i = len(paramnames) - len(defparams)
    suffix = ''

    no_paramnames = len(paramnames[:i]) == 0

    for param in paramnames[:i]:
        self.write(suffix, param)
        suffix = ', '
        if param in annotate_tuple[0].attr:
            p = annotate_tuple[0].attr.index(param)
            self.write(': ')
            self.preorder(node[p])
            if (line_number != self.line_number):
                suffix = ",\n" + indent
                line_number = self.line_number
            # value, string = annotate_args[param]
            # if string:
            #     self.write(': "%s"' % value)
            # else:
            #     self.write(': %s' % value)

    suffix = ', ' if i > 0 else ''
    for n in node:
        if n == 'pos_arg':
            no_paramnames = False
            self.write(suffix)
            param = paramnames[i]
            self.write(param)
            if param in annotate_args:
                aa = annotate_args[param]
                if isinstance(aa, tuple):
                    aa = aa[0]
                    self.write(': "%s"' % aa)
                elif isinstance(aa, AST):
                    self.write(': ')
                    self.preorder(aa)

            self.write('=')
            i += 1
            self.preorder(n)
            if (line_number != self.line_number):
                suffix = ",\n" + indent
                line_number = self.line_number
            else:
                suffix = ', '

    # self.println(indent, '#flags:\t', int(code.co_flags))
    if kw_args + annotate_argc > 0:
        if no_paramnames:
            if not code_has_star_arg(code):
                if argc > 0:
                    self.write(", *, ")
                else:
                    self.write("*, ")
                pass
            else:
                self.write(", ")

            kwargs = node[0]
            last = len(kwargs) - 1
            i = 0
            for n in node[0]:
                if n == 'kwarg':
                    if (line_number != self.line_number):
                        self.write("\n" + indent)
                        line_number = self.line_number
                    self.write('%s=' % n[0].pattr)
                    self.preorder(n[1])
                    if i < last:
                        self.write(', ')
                    i += 1
                    pass
                pass
            annotate_args = []
            for n in node:
                if n == 'annotate_arg':
                    annotate_args.append(n[0])
                elif n == 'annotate_tuple':
                    t = n[0].attr
                    if t[-1] == 'return':
                        t = t[0:-1]
                        annotate_args = annotate_args[:-1]
                        pass
                    last = len(annotate_args) - 1
                    for i in range(len(annotate_args)):
                        self.write("%s: " % (t[i]))
                        self.preorder(annotate_args[i])
                        if i < last:
                            self.write(', ')
                            pass
                        pass
                    break
                pass
            pass

        if code_has_star_star_arg(code):
            if argc > 0:
                self.write(', ')
            self.write('**%s' % code.co_varnames[argc + kw_pairs])

    if isLambda:
        self.write(": ")
    else:
        self.write(')')
        if 'return' in annotate_tuple[0].attr:
            if (line_number != self.line_number) and not no_paramnames:
                self.write("\n" + indent)
                line_number = self.line_number
            self.write(' -> ')
            # value, string = annotate_args['return']
            # if string:
            #     self.write(' -> "%s"' % value)
            # else:
            #     self.write(' -> %s' % value)
            self.preorder(node[annotate_last - 1])

        self.println(":")

    if (len(code.co_consts) > 0 and code.co_consts[0] is not None
            and not isLambda):  # ugly
        # docstring exists, dump it
        print_docstring(self, self.indent, code.co_consts[0])

    code._tokens = None  # save memory
    assert ast == 'stmts'

    all_globals = find_all_globals(ast, set())
    for g in ((all_globals & self.mod_globs) | find_globals(ast, set())):
        self.println(self.indent, 'global ', g)
    self.mod_globs -= all_globals
    has_none = 'None' in code.co_names
    rn = has_none and not find_none(ast)
    self.gen_source(ast,
                    code.co_name,
                    code._customize,
                    isLambda=isLambda,
                    returnNone=rn)
    code._tokens = code._customize = None  # save memory
def make_function2(self, node, is_lambda, nested=1, code_node=None):
    """
    Dump function defintion, doc string, and function body.
    This code is specialied for Python 2.
    """

    # FIXME: call make_function3 if we are self.version >= 3.0
    # and then simplify the below.

    def build_param(ast, name, default):
        """build parameters:
            - handle defaults
            - handle format tuple parameters
        """
        # if formal parameter is a tuple, the paramater name
        # starts with a dot (eg. '.1', '.2')
        if name.startswith('.'):
            # replace the name with the tuple-string
            name = self.get_tuple_parameter(ast, name)
            pass

        if default:
            value = self.traverse(default, indent='')
            maybe_show_tree_param_default(self.showast, name, value)
            result = '%s=%s' % (name, value)
            if result[-2:] == '= ':  # default was 'LOAD_CONST None'
                result += 'None'
            return result
        else:
            return name

    # MAKE_FUNCTION_... or MAKE_CLOSURE_...
    assert node[-1].kind.startswith('MAKE_')

    args_node = node[-1]
    if isinstance(args_node.attr, tuple):
        # positional args are after kwargs
        defparams = node[1:args_node.attr[0] + 1]
        pos_args, kw_args, annotate_argc = args_node.attr
    else:
        defparams = node[:args_node.attr]
        kw_args = 0
        pass

    lambda_index = None

    if lambda_index and is_lambda and iscode(node[lambda_index].attr):
        assert node[lambda_index].kind == 'LOAD_LAMBDA'
        code = node[lambda_index].attr
    else:
        code = code_node.attr

    assert iscode(code)
    code = Code(code, self.scanner, self.currentclass)

    # add defaults values to parameter names
    argc = code.co_argcount
    paramnames = list(code.co_varnames[:argc])

    # defaults are for last n parameters, thus reverse
    paramnames.reverse()
    defparams.reverse()

    try:
        ast = self.build_ast(code._tokens,
                             code._customize,
                             is_lambda=is_lambda,
                             noneInNames=('None' in code.co_names))
    except (ParserError, ParserError2) as p:
        self.write(str(p))
        if not self.tolerate_errors:
            self.ERROR = p
        return

    kw_pairs = 0
    indent = self.indent

    # build parameters
    params = [
        build_param(ast, name, default)
        for name, default in zip_longest(paramnames, defparams, fillvalue=None)
    ]
    params.reverse()  # back to correct order

    if code_has_star_arg(code):
        params.append('*%s' % code.co_varnames[argc])
        argc += 1

    # dump parameter list (with default values)
    if is_lambda:
        self.write("lambda ", ", ".join(params))
        # If the last statement is None (which is the
        # same thing as "return None" in a lambda) and the
        # next to last statement is a "yield". Then we want to
        # drop the (return) None since that was just put there
        # to have something to after the yield finishes.
        # FIXME: this is a bit hoaky and not general
        if (len(ast) > 1 and self.traverse(ast[-1]) == 'None'
                and self.traverse(ast[-2]).strip().startswith('yield')):
            del ast[-1]
            # Now pick out the expr part of the last statement
            ast_expr = ast[-1]
            while ast_expr.kind != 'expr':
                ast_expr = ast_expr[0]
            ast[-1] = ast_expr
            pass
    else:
        self.write("(", ", ".join(params))

    if kw_args > 0:
        if not (4 & code.co_flags):
            if argc > 0:
                self.write(", *, ")
            else:
                self.write("*, ")
            pass
        else:
            self.write(", ")

        for n in node:
            if n == 'pos_arg':
                continue
            else:
                self.preorder(n)
            break
        pass

    if code_has_star_star_arg(code):
        if argc > 0:
            self.write(', ')
        self.write('**%s' % code.co_varnames[argc + kw_pairs])

    if is_lambda:
        self.write(": ")
    else:
        self.println("):")

    if len(code.co_consts
           ) > 0 and code.co_consts[0] is not None and not is_lambda:  # ugly
        # docstring exists, dump it
        print_docstring(self, indent, code.co_consts[0])

    code._tokens = None  # save memory
    if not is_lambda:
        assert ast == 'stmts'

    all_globals = find_all_globals(ast, set())

    globals, nonlocals = find_globals_and_nonlocals(ast, set(), set(), code,
                                                    self.version)

    # Python 2 doesn't support the "nonlocal" statement
    assert self.version >= 3.0 or not nonlocals

    for g in sorted((all_globals & self.mod_globs) | globals):
        self.println(self.indent, 'global ', g)
    self.mod_globs -= all_globals
    has_none = 'None' in code.co_names
    rn = has_none and not find_none(ast)
    self.gen_source(ast,
                    code.co_name,
                    code._customize,
                    is_lambda=is_lambda,
                    returnNone=rn)
    code._tokens = None
    code._customize = None  # save memory
def make_function3_annotate(self, node, isLambda, nested=1,
                            codeNode=None, annotate_last=-1):
    """
    Dump function defintion, doc string, and function
    body. This code is specialized for Python 3"""

    def build_param(ast, name, default):
        """build parameters:
            - handle defaults
            - handle format tuple parameters
        """
        if default:
            value = self.traverse(default, indent='')
            maybe_show_ast_param_default(self.showast, name, value)
            result = '%s=%s' % (name,  value)
            if result[-2:] == '= ':	# default was 'LOAD_CONST None'
                result += 'None'
            return result
        else:
            return name

    # MAKE_FUNCTION_... or MAKE_CLOSURE_...
    assert node[-1].type.startswith('MAKE_')

    annotate_tuple = None
    for annotate_last in range(len(node)-1, -1, -1):
        if node[annotate_last] == 'annotate_tuple':
            annotate_tuple = node[annotate_last]
            break
    annotate_args = {}

    if (annotate_tuple == 'annotate_tuple'
        and annotate_tuple[0] in ('LOAD_CONST', 'LOAD_NAME')
        and isinstance(annotate_tuple[0].attr, tuple)):
        annotate_tup = annotate_tuple[0].attr
        i = -1
        j = annotate_last-1
        l = -len(node)
        while j >= l and node[j].type in ('annotate_arg' 'annotate_tuple'):
            annotate_args[annotate_tup[i]] = node[j][0]
            i -= 1
            j -= 1

    args_node = node[-1]
    if isinstance(args_node.attr, tuple):
        # positional args are before kwargs
        defparams = node[:args_node.attr[0]]
        pos_args, kw_args, annotate_argc  = args_node.attr
        if 'return' in annotate_args.keys():
            annotate_argc = len(annotate_args) - 1
    else:
        defparams = node[:args_node.attr]
        kw_args  = 0
        annotate_argc = 0
        pass

    if 3.0 <= self.version <= 3.2:
        lambda_index = -2
    elif 3.03 <= self.version:
        lambda_index = -3
    else:
        lambda_index = None

    if lambda_index and isLambda and iscode(node[lambda_index].attr):
        assert node[lambda_index].type == 'LOAD_LAMBDA'
        code = node[lambda_index].attr
    else:
        code = codeNode.attr

    assert iscode(code)
    code = Code(code, self.scanner, self.currentclass)

    # add defaults values to parameter names
    argc = code.co_argcount
    paramnames = list(code.co_varnames[:argc])

    try:
        ast = self.build_ast(code._tokens,
                             code._customize,
                             isLambda = isLambda,
                             noneInNames = ('None' in code.co_names))
    except ParserError as p:
        self.write(str(p))
        self.ERROR = p
        return

    kw_pairs = args_node.attr[1]
    indent = self.indent

    if isLambda:
        self.write("lambda ")
    else:
        self.write("(")

    last_line = self.f.getvalue().split("\n")[-1]
    l = len(last_line)
    indent = ' ' * l
    line_number = self.line_number

    if code_has_star_arg(code):
        self.write('*%s' % code.co_varnames[argc + kw_pairs])
        argc += 1

    i = len(paramnames) - len(defparams)
    suffix = ''
    for param in paramnames[:i]:
        self.write(suffix, param)
        suffix = ', '
        if param in annotate_tuple[0].attr:
            p = annotate_tuple[0].attr.index(param)
            self.write(': ')
            self.preorder(node[p])
            if (line_number != self.line_number):
                suffix = ",\n" + indent
                line_number = self.line_number
            # value, string = annotate_args[param]
            # if string:
            #     self.write(': "%s"' % value)
            # else:
            #     self.write(': %s' % value)

    suffix = ', ' if i > 0 else ''
    for n in node:
        if n == 'pos_arg':
            self.write(suffix)
            param = paramnames[i]
            self.write(param)
            if param in annotate_args:
                aa = annotate_args[param]
                if isinstance(aa, tuple):
                    aa = aa[0]
                self.write(': "%s"' % aa)
            self.write('=')
            i += 1
            self.preorder(n)
            if (line_number != self.line_number):
                suffix = ",\n" + indent
                line_number = self.line_number
            else:
                suffix = ', '


    # self.println(indent, '#flags:\t', int(code.co_flags))
    if kw_args + annotate_argc > 0:
        if not code_has_star_arg(code):
            if argc > 0:

                self.write(", *, ")
            else:
                self.write("*, ")
            pass
        else:
            self.write(", ")

        kwargs = node[0]
        last = len(kwargs)-1
        i = 0
        for n in node[0]:
            if n == 'kwarg':
                if (line_number != self.line_number):
                    self.write("\n" + indent)
                    line_number = self.line_number
                self.write('%s=' % n[0].pattr)
                self.preorder(n[1])
                if i < last:
                    self.write(', ')
                i += 1
                pass
            pass
        annotate_args = []
        for n in node:
            if n == 'annotate_arg':
                annotate_args.append(n[0])
            elif n == 'annotate_tuple':
                t = n[0].attr
                if t[-1] == 'return':
                    t = t[0:-1]
                    annotate_args = annotate_args[:-1]
                    pass
                last = len(annotate_args) - 1
                for i in range(len(annotate_args)):
                    self.write("%s: " % (t[i]))
                    self.preorder(annotate_args[i])
                    if i < last:
                        self.write(', ')
                        pass
                    pass
                break
            pass
        pass

    if code_has_star_star_arg(code):
        if argc > 0:
            self.write(', ')
        self.write('**%s' % code.co_varnames[argc + kw_pairs])

    if isLambda:
        self.write(": ")
    else:
        self.write(')')
        if 'return' in annotate_tuple[0].attr:
            if (line_number != self.line_number):
                self.write("\n" + indent)
                line_number = self.line_number
            self.write(' -> ')
            # value, string = annotate_args['return']
            # if string:
            #     self.write(' -> "%s"' % value)
            # else:
            #     self.write(' -> %s' % value)
            self.preorder(node[annotate_last-1])

        self.println(":")

    if (len(code.co_consts) > 0 and
        code.co_consts[0] is not None and not isLambda): # ugly
        # docstring exists, dump it
        print_docstring(self, self.indent, code.co_consts[0])

    code._tokens = None # save memory
    assert ast == 'stmts'

    all_globals = find_all_globals(ast, set())
    for g in ((all_globals & self.mod_globs) | find_globals(ast, set())):
        self.println(self.indent, 'global ', g)
    self.mod_globs -= all_globals
    has_none = 'None' in code.co_names
    rn = has_none and not find_none(ast)
    self.gen_source(ast, code.co_name, code._customize, isLambda=isLambda,
                    returnNone=rn)
    code._tokens = code._customize = None # save memory
def make_function3(self, node, is_lambda, nested=1, code_node=None):
    """Dump function definition, doc string, and function body in
      Python version 3.0 and above
    """

    # For Python 3.3, the evaluation stack in MAKE_FUNCTION is:

    # * default argument objects in positional order
    # * pairs of name and default argument, with the name just below
    #   the object on the stack, for keyword-only parameters
    # * parameter annotation objects
    # * a tuple listing the parameter names for the annotations
    #   (only if there are ony annotation objects)
    # * the code associated with the function (at TOS1)
    # * the qualified name of the function (at TOS)

    # For Python 3.0 .. 3.2 the evaluation stack is:
    # The function object is defined to have argc default parameters,
    # which are found below TOS.
    # * first come positional args in the order they are given in the source,
    # * next come the keyword args in the order they given in the source,
    # * finally is the code associated with the function (at TOS)
    #
    # Note: There is no qualified name at TOS

    # MAKE_CLOSURE adds an additional closure slot

    # In Python 3.6 stack entries change again. I understand
    # 3.7 changes some of those changes. Yes, it is hard to follow
    # and I am sure I haven't been able to keep up.

    # Thank you, Python.

    def build_param(ast, name, default, annotation=None):
        """build parameters:
            - handle defaults
            - handle format tuple parameters
        """
        if self.version >= 3.6:
            value = default
        else:
            value = self.traverse(default, indent='')
        maybe_show_tree_param_default(self.showast, name, value)
        if annotation:
            result = '%s: %s=%s' % (name, annotation, value)
        else:
            result = '%s=%s' % (name, value)

        # The below can probably be removed. This is probably
        # a holdover from days when LOAD_CONST erroneously
        # didn't handle LOAD_CONST None properly
        if result[-2:] == '= ':  # default was 'LOAD_CONST None'
            result += 'None'

        return result

    # MAKE_FUNCTION_... or MAKE_CLOSURE_...
    assert node[-1].kind.startswith('MAKE_')

    # Python 3.3+ adds a qualified name at TOS (-1)
    # moving down the LOAD_LAMBDA instruction
    if 3.0 <= self.version <= 3.2:
        lambda_index = -2
    elif 3.03 <= self.version:
        lambda_index = -3
    else:
        lambda_index = None

    args_node = node[-1]

    annotate_dict = {}

    # Get a list of tree nodes that constitute the values for the "default
    # parameters"; these are default values that appear before any *, and are
    # not to be confused with keyword parameters which may appear after *.
    args_attr = args_node.attr

    if isinstance(args_attr, tuple) or (self.version >= 3.6
                                        and isinstance(args_attr, list)):
        if len(args_attr) == 3:
            pos_args, kw_args, annotate_argc = args_attr
        else:
            pos_args, kw_args, annotate_argc, closure = args_attr

            i = -4
            kw_pairs = 0
            if closure:
                # FIXME: fill in
                i -= 1
            if annotate_argc:
                # Turn into subroutine and DRY with other use
                annotate_node = node[i]
                if annotate_node == 'expr':
                    annotate_node = annotate_node[0]
                    annotate_name_node = annotate_node[-1]
                    if annotate_node == 'dict' and annotate_name_node.kind.startswith(
                            'BUILD_CONST_KEY_MAP'):
                        types = [
                            self.traverse(n, indent='')
                            for n in annotate_node[:-2]
                        ]
                        names = annotate_node[-2].attr
                        l = len(types)
                        assert l == len(names)
                        for i in range(l):
                            annotate_dict[names[i]] = types[i]
                        pass
                    pass
                i -= 1
            if kw_args:
                kw_node = node[i]
                if kw_node == 'expr':
                    kw_node = kw_node[0]
                if kw_node == 'dict':
                    kw_pairs = kw_node[-1].attr

        # FIXME: there is probably a better way to classify this.
        have_kwargs = node[0].kind.startswith(
            'kwarg') or node[0] == 'no_kwargs'
        if len(node) >= 4:
            lc_index = -4
        else:
            lc_index = -3
            pass

        if (3.0 <= self.version <= 3.3 and len(node) > 2
                and node[lambda_index] != 'LOAD_LAMBDA'
                and (have_kwargs or node[lc_index].kind != 'load_closure')):

            # Find the index in "node" where the first default
            # parameter value is located. Note this is in contrast to
            # key-word arguments, pairs of (name, value), which appear after "*".
            # "default_values_start" is this location.
            default_values_start = 0
            if node[0] == 'no_kwargs':
                default_values_start += 1
            # args are after kwargs; kwargs are bundled as one node
            if node[default_values_start] == 'kwargs':
                default_values_start += 1
            defparams = node[default_values_start:default_values_start +
                             args_node.attr[0]]
        else:
            if self.version < 3.6:
                defparams = node[:args_node.attr[0]]
                kw_args = 0
            else:
                defparams = []
                # FIXME: DRY with code below
                default, kw_args, annotate_argc = args_node.attr[0:3]
                if default:
                    expr_node = node[0]
                    if node[0] == 'pos_arg':
                        expr_node = expr_node[0]
                    assert expr_node == 'expr', "expecting mkfunc default node to be an expr"
                    if (expr_node[0] == 'LOAD_CONST'
                            and isinstance(expr_node[0].attr, tuple)):
                        defparams = [repr(a) for a in expr_node[0].attr]
                    elif expr_node[0] in frozenset(
                        ('list', 'tuple', 'dict', 'set')):
                        defparams = [
                            self.traverse(n, indent='')
                            for n in expr_node[0][:-1]
                        ]
                else:
                    defparams = []
                pass
    else:
        if self.version < 3.6:
            defparams = node[:args_node.attr]
            kw_args = 0
        else:
            default, kw_args, annotate, closure = args_node.attr
            if default:
                expr_node = node[0]
                if node[0] == 'pos_arg':
                    expr_node = expr_node[0]
                assert expr_node == 'expr', "expecting mkfunc default node to be an expr"
                if (expr_node[0] == 'LOAD_CONST'
                        and isinstance(expr_node[0].attr, tuple)):
                    defparams = [repr(a) for a in expr_node[0].attr]
                elif expr_node[0] in frozenset(
                    ('list', 'tuple', 'dict', 'set')):
                    defparams = [
                        self.traverse(n, indent='') for n in expr_node[0][:-1]
                    ]
            else:
                defparams = []

            i = -4
            kw_pairs = 0
            if closure:
                # FIXME: fill in
                annotate = node[i]
                i -= 1
            if annotate_argc:
                # Turn into subroutine and DRY with other use
                annotate_node = node[i]
                if annotate_node == 'expr':
                    annotate_node = annotate_node[0]
                    annotate_name_node = annotate_node[-1]
                    if annotate_node == 'dict' and annotate_name_node.kind.startswith(
                            'BUILD_CONST_KEY_MAP'):
                        types = [
                            self.traverse(n, indent='')
                            for n in annotate_node[:-2]
                        ]
                        names = annotate_node[-2].attr
                        l = len(types)
                        assert l == len(names)
                        for i in range(l):
                            annotate_dict[names[i]] = types[i]
                        pass
                    pass
                i -= 1
            if kw_args:
                kw_node = node[i]
                if kw_node == 'expr':
                    kw_node = kw_node[0]
                if kw_node == 'dict':
                    kw_pairs = kw_node[-1].attr
        pass

    if lambda_index and is_lambda and iscode(node[lambda_index].attr):
        assert node[lambda_index].kind == 'LOAD_LAMBDA'
        code = node[lambda_index].attr
    else:
        code = code_node.attr

    assert iscode(code)
    scanner_code = Code(code, self.scanner, self.currentclass)

    # add defaults values to parameter names
    argc = code.co_argcount
    kwonlyargcount = code.co_kwonlyargcount

    paramnames = list(scanner_code.co_varnames[:argc])
    if kwonlyargcount > 0:
        kwargs = list(scanner_code.co_varnames[argc:argc + kwonlyargcount])

    # defaults are for last n parameters, thus reverse
    paramnames.reverse()
    defparams.reverse()

    try:
        ast = self.build_ast(scanner_code._tokens,
                             scanner_code._customize,
                             is_lambda=is_lambda,
                             noneInNames=('None' in code.co_names))
    except (ParserError, ParserError2) as p:
        self.write(str(p))
        if not self.tolerate_errors:
            self.ERROR = p
        return

    if self.version >= 3.0:
        if self.version < 3.6:
            kw_pairs = args_node.attr[1]
    else:
        kw_pairs = 0

    i = len(paramnames) - len(defparams)

    # build parameters
    params = []
    if defparams:
        for i, defparam in enumerate(defparams):
            params.append(
                build_param(ast, paramnames[i], defparam,
                            annotate_dict.get(paramnames[i])))

        for param in paramnames[i + 1:]:
            if param in annotate_dict:
                params.append("%s: %s" % (param, annotate_dict[param]))
            else:
                params.append(param)
    else:
        for param in paramnames:
            if param in annotate_dict:
                params.append("%s: %s" % (param, annotate_dict[param]))
            else:
                params.append(param)

    params.reverse()  # back to correct order

    if code_has_star_arg(code):
        if self.version > 3.0:
            star_arg = code.co_varnames[argc + kwonlyargcount]
            if annotate_dict and star_arg in annotate_dict:
                params.append('*%s: %s' % (star_arg, annotate_dict[star_arg]))
            else:
                params.append('*%s' % star_arg)
        else:
            params.append('*%s' % code.co_varnames[argc])
        argc += 1

    # dump parameter list (with default values)
    if is_lambda:
        self.write("lambda ", ", ".join(params))
        # If the last statement is None (which is the
        # same thing as "return None" in a lambda) and the
        # next to last statement is a "yield". Then we want to
        # drop the (return) None since that was just put there
        # to have something to after the yield finishes.
        # FIXME: this is a bit hoaky and not general
        if (len(ast) > 1 and self.traverse(ast[-1]) == 'None'
                and self.traverse(ast[-2]).strip().startswith('yield')):
            del ast[-1]
            # Now pick out the expr part of the last statement
            ast_expr = ast[-1]
            while ast_expr.kind != 'expr':
                ast_expr = ast_expr[0]
            ast[-1] = ast_expr
            pass
    else:
        # FIXME: add annotations here
        self.write("(", ", ".join(params))
    # self.println(indent, '#flags:\t', int(code.co_flags))

    # FIXME: Could we remove ends_in_comma and its tests if we just
    # created a parameter list and at the very end did a join on that?
    # Unless careful, We might lose line breaks though.
    ends_in_comma = False
    if kwonlyargcount > 0:
        if not (4 & code.co_flags):
            if argc > 0:
                self.write(", *, ")
            else:
                self.write("*, ")
            pass
            ends_in_comma = True
        else:
            if argc > 0:
                self.write(", ")
                ends_in_comma = True

        if 3.0 <= self.version <= 3.5:
            kw_args = [None] * kwonlyargcount
            kw_nodes = node[0]
            if kw_nodes == "kwargs":
                for n in kw_nodes:
                    name = eval(n[0].pattr)
                    default = self.traverse(n[1], indent='')
                    idx = kwargs.index(name)
                    kw_args[idx] = "%s=%s" % (name, default)

            other_kw = [c == None for c in kw_args]

            for i, flag in enumerate(other_kw):
                if flag:
                    kw_args[i] = "%s" % kwargs[i]
            self.write(', '.join(kw_args))
            ends_in_comma = False
        elif self.version >= 3.6:
            # argc = node[-1].attr
            # co = node[-3].attr
            # argcount = co.co_argcount
            # kwonlyargcount = co.co_kwonlyargcount

            free_tup = ann_dict = kw_dict = default_tup = None
            fn_bits = node[-1].attr
            index = -4  # Skip over:
            #  MAKE_FUNCTION,
            #  LOAD_CONST qualified name,
            #  LOAD_CONST code object
            if fn_bits[-1]:
                free_tup = node[index]
                index -= 1
            if fn_bits[-2]:
                ann_dict = node[index]
                index -= 1
            if fn_bits[-3]:
                kw_dict = node[index]
                index -= 1
            if fn_bits[-4]:
                default_tup = node[index]

            if kw_dict == 'expr':
                kw_dict = kw_dict[0]

            # FIXME: handle free_tup, annotate_dict, and default_tup
            kw_args = [None] * kwonlyargcount

            if kw_dict:
                assert kw_dict == 'dict'
                defaults = [self.traverse(n, indent='') for n in kw_dict[:-2]]
                names = eval(self.traverse(kw_dict[-2]))
                assert len(defaults) == len(names)
                sep = ''
                # FIXME: possibly handle line breaks
                for i, n in enumerate(names):
                    idx = kwargs.index(n)
                    if annotate_dict and n in annotate_dict:
                        t = "%s: %s=%s" % (n, annotate_dict[n], defaults[i])
                    else:
                        t = "%s=%s" % (n, defaults[i])
                    kw_args[idx] = t
                    pass
                pass

            # handle others
            other_kw = [c == None for c in kw_args]

            for i, flag in enumerate(other_kw):
                if flag:
                    n = kwargs[i]
                    if ann_dict and n in annotate_dict:
                        kw_args[i] = "%s: %s" % (n, annotate_dict[n])
                    else:
                        kw_args[i] = "%s" % n

            self.write(', '.join(kw_args))
            ends_in_comma = False

        pass
    else:
        if argc == 0:
            ends_in_comma = True

    if code_has_star_star_arg(code):
        if not ends_in_comma:
            self.write(', ')
        star_star_arg = code.co_varnames[argc + kwonlyargcount]
        if annotate_dict and star_star_arg in annotate_dict:
            self.write('**%s: %s' %
                       (star_star_arg, annotate_dict[star_star_arg]))
        else:
            self.write('**%s' % star_star_arg)

    if is_lambda:
        self.write(": ")
    else:
        self.write(')')
        if annotate_dict and 'return' in annotate_dict:
            self.write(' -> %s' % annotate_dict['return'])
        self.println(":")

    if len(code.co_consts
           ) > 0 and code.co_consts[0] is not None and not is_lambda:  # ugly
        # docstring exists, dump it
        print_docstring(self, self.indent, code.co_consts[0])

    scanner_code._tokens = None  # save memory
    assert ast == 'stmts'

    all_globals = find_all_globals(ast, set())
    globals, nonlocals = find_globals_and_nonlocals(ast, set(), set(), code,
                                                    self.version)

    for g in sorted((all_globals & self.mod_globs) | globals):
        self.println(self.indent, 'global ', g)

    for nl in sorted(nonlocals):
        self.println(self.indent, 'nonlocal ', nl)

    self.mod_globs -= all_globals
    has_none = 'None' in code.co_names
    rn = has_none and not find_none(ast)
    self.gen_source(ast,
                    code.co_name,
                    scanner_code._customize,
                    is_lambda=is_lambda,
                    returnNone=rn)
    scanner_code._tokens = None
    scanner_code._customize = None  # save memory
Example #43
0
def disco_loop_asm_format(opc, version, co, real_out,
                          fn_name_map, all_fns):
    """Produces disassembly in a format more conducive to
    automatic assembly by producing inner modules before they are
    used by outer ones. Since this is recusive, we'll
    use more stack space at runtime.
    """

    if version < 3.0:
        co = code2compat(co)
    else:
        co = code3compat(co)

    co_name = co.co_name
    mapped_name = fn_name_map.get(co_name, co_name)

    new_consts = []
    for c in co.co_consts:
        if iscode(c):
            if version < 3.0:
                c_compat = code2compat(c)
            else:
                c_compat = code3compat(c)
            disco_loop_asm_format(opc, version, c_compat, real_out,
                                  fn_name_map, all_fns)

            m = re.match(".* object <(.+)> at", str(c))
            if m:
                basename = m.group(1)
                if basename != 'module':
                    mapped_name = code_uniquify(basename, c.co_code)
                    c_compat.co_name = mapped_name
            c_compat.freeze()
            new_consts.append(c_compat)
        else:
            new_consts.append(c)
        pass
    co.co_consts = new_consts

    m = re.match("^<(.+)>$", co.co_name)
    if m or co_name in all_fns:
        if co_name in all_fns:
            basename = co_name
        else:
            basename = m.group(1)
        if basename != 'module':
            mapped_name = code_uniquify(basename, co.co_code)
            co_name = mapped_name
            assert mapped_name not in fn_name_map
        fn_name_map[mapped_name] = basename
        co.co_name = mapped_name
        pass
    elif co_name in fn_name_map:
        # FIXME: better would be a hash of the co_code
        mapped_name = code_uniquify(co_name, co.co_code)
        fn_name_map[mapped_name] = co_name
        co.co_name = mapped_name
        pass

    co = co.freeze()
    all_fns.add(co_name)
    if co.co_name != '<module>' or co.co_filename:
        real_out.write("\n" + format_code_info(co, version, mapped_name) + "\n")

    bytecode = Bytecode(co, opc, dup_lines=True)
    real_out.write(bytecode.dis(asm_format=True) + "\n")
Example #44
0
    def ingest(self, co, classname=None, code_objects={}, show_asm=None):
        """
        Pick out tokens from an uncompyle6 code object, and transform them,
        returning a list of uncompyle6 'Token's.

        The transformations are made to assist the deparsing grammar.
        Specificially:
           -  various types of LOAD_CONST's are categorized in terms of what they load
           -  COME_FROM instructions are added to assist parsing control structures
           -  MAKE_FUNCTION and FUNCTION_CALLS append the number of positional arguments

        Also, when we encounter certain tokens, we add them to a set which will cause custom
        grammar rules. Specifically, variable arg tokens like MAKE_FUNCTION or BUILD_LIST
        cause specific rules for the specific number of arguments they take.
        """

        show_asm = self.show_asm if not show_asm else show_asm
        # show_asm = 'after'
        if show_asm in ('both', 'before'):
            bytecode = Bytecode(co, self.opc)
            for instr in bytecode.get_instructions(co):
                print(instr._disassemble())

        # Container for tokens
        tokens = []

        customize = {}
        if self.is_pypy:
            customize['PyPy'] = 1

        self.code = array('B', co.co_code)
        self.build_lines_data(co)
        self.build_prev_op()

        bytecode = Bytecode(co, self.opc)

        # FIXME: put as its own method?
        # Scan for assertions. Later we will
        # turn 'LOAD_GLOBAL' to 'LOAD_ASSERT'.
        # 'LOAD_ASSERT' is used in assert statements.
        self.load_asserts = set()
        bs = list(bytecode)
        n = len(bs)
        for i in range(n):
            inst = bs[i]

            # We need to detect the difference between
            # "raise AssertionError" and "assert"
            # If we have a JUMP_FORWARD after the
            # RAISE_VARARGS then we have a "raise" statement
            # else we have an "assert" statement.
            if inst.opname == 'POP_JUMP_IF_TRUE' and i+1 < n:
                next_inst = bs[i+1]
                if (next_inst.opname == 'LOAD_GLOBAL' and
                    next_inst.argval == 'AssertionError'):
                    for j in range(i+2, n):
                        raise_inst = bs[j]
                        if raise_inst.opname.startswith('RAISE_VARARGS'):
                            if j+1 >= n or bs[j+1].opname != 'JUMP_FORWARD':
                                self.load_asserts.add(next_inst.offset)
                                pass
                            break
                    pass
                pass

        # Get jump targets
        # Format: {target offset: [jump offsets]}
        jump_targets = self.find_jump_targets(show_asm)

        for inst in bytecode:

            argval = inst.argval
            if inst.offset in jump_targets:
                jump_idx = 0
                # We want to process COME_FROMs to the same offset to be in *descending*
                # offset order so we have the larger range or biggest instruction interval
                # last. (I think they are sorted in increasing order, but for safety
                # we sort them). That way, specific COME_FROM tags will match up
                # properly. For example, a "loop" with an "if" nested in it should have the
                # "loop" tag last so the grammar rule matches that properly.
                for jump_offset in sorted(jump_targets[inst.offset], reverse=True):
                    come_from_name = 'COME_FROM'
                    opname = self.opName(jump_offset)
                    if opname.startswith('SETUP_'):
                        come_from_type = opname[len('SETUP_'):]
                        come_from_name = 'COME_FROM_%s' % come_from_type
                        pass
                    tokens.append(Token(come_from_name,
                                        None, repr(jump_offset),
                                        offset='%s_%s' % (inst.offset, jump_idx),
                                        has_arg = True, opc=self.opc))
                    jump_idx += 1
                    pass
                pass
            elif inst.offset in self.else_start:
                end_offset = self.else_start[inst.offset]
                tokens.append(Token('ELSE',
                                    None, repr(end_offset),
                                    offset='%s' % (inst.offset),
                                    has_arg = True, opc=self.opc))

                pass

            pattr =  inst.argrepr
            opname = inst.opname
            op = inst.opcode

            if opname in ['LOAD_CONST']:
                const = inst.argval
                if iscode(const):
                    if const.co_name == '<lambda>':
                        opname = 'LOAD_LAMBDA'
                    elif const.co_name == '<genexpr>':
                        opname = 'LOAD_GENEXPR'
                    elif const.co_name == '<dictcomp>':
                        opname = 'LOAD_DICTCOMP'
                    elif const.co_name == '<setcomp>':
                        opname = 'LOAD_SETCOMP'
                    elif const.co_name == '<listcomp>':
                        opname = 'LOAD_LISTCOMP'
                    # verify() uses 'pattr' for comparison, since 'attr'
                    # now holds Code(const) and thus can not be used
                    # for comparison (todo: think about changing this)
                    # pattr = 'code_object @ 0x%x %s->%s' %\
                    # (id(const), const.co_filename, const.co_name)
                    pattr = '<code_object ' + const.co_name + '>'
                else:
                    pattr = const
                    pass
            elif opname in ('MAKE_FUNCTION', 'MAKE_CLOSURE'):
                pos_args, name_pair_args, annotate_args = parse_fn_counts(inst.argval)
                if name_pair_args > 0:
                    opname = '%s_N%d' % (opname, name_pair_args)
                    pass
                if annotate_args > 0:
                    opname = '%s_A_%d' % (opname, annotate_args)
                    pass
                opname = '%s_%d' % (opname, pos_args)
                pattr = ("%d positional, %d keyword pair, %d annotated" %
                             (pos_args, name_pair_args, annotate_args))
                tokens.append(
                    Token(
                        type_ = opname,
                        attr = (pos_args, name_pair_args, annotate_args),
                        pattr = pattr,
                        offset = inst.offset,
                        linestart = inst.starts_line,
                        op = op,
                        has_arg = op_has_argument(op, op3),
                        opc = self.opc
                    )
                )
                continue
            elif op in self.varargs_ops:
                pos_args = inst.argval
                if self.is_pypy and not pos_args and opname == 'BUILD_MAP':
                    opname = 'BUILD_MAP_n'
                else:
                    opname = '%s_%d' % (opname, pos_args)
            elif self.is_pypy and opname in ('CALL_METHOD', 'JUMP_IF_NOT_DEBUG'):
                # The value in the dict is in special cases in semantic actions, such
                # as CALL_FUNCTION. The value is not used in these cases, so we put
                # in arbitrary value 0.
                customize[opname] = 0
            elif opname == 'UNPACK_EX':
                # FIXME: try with scanner and parser by
                # changing inst.argval
                before_args = inst.argval & 0xFF
                after_args = (inst.argval >> 8) & 0xff
                pattr = "%d before vararg, %d after" % (before_args, after_args)
                argval = (before_args, after_args)
                opname = '%s_%d+%d' % (opname, before_args, after_args)

            elif op == self.opc.JUMP_ABSOLUTE:
                # Further classify JUMP_ABSOLUTE into backward jumps
                # which are used in loops, and "CONTINUE" jumps which
                # may appear in a "continue" statement.  The loop-type
                # and continue-type jumps will help us classify loop
                # boundaries The continue-type jumps help us get
                # "continue" statements with would otherwise be turned
                # into a "pass" statement because JUMPs are sometimes
                # ignored in rules as just boundary overhead. In
                # comprehensions we might sometimes classify JUMP_BACK
                # as CONTINUE, but that's okay since we add a grammar
                # rule for that.
                pattr = inst.argval
                target = self.get_target(inst.offset)
                if target <= inst.offset:
                    next_opname = self.opname[self.code[inst.offset+3]]
                    if (inst.offset in self.stmts and
                        next_opname not in ('END_FINALLY', 'POP_BLOCK',
                                            # Python 3.0 only uses POP_TOP
                                            'POP_TOP')
                        and inst.offset not in self.not_continue):
                        opname = 'CONTINUE'
                    else:
                        opname = 'JUMP_BACK'
                        # FIXME: this is a hack to catch stuff like:
                        #   if x: continue
                        # the "continue" is not on a new line.
                        # There are other situations where we don't catch
                        # CONTINUE as well.
                        if tokens[-1].type == 'JUMP_BACK' and tokens[-1].attr <= argval:
                            # intern is used because we are changing the *previous* token
                            tokens[-1].type = intern('CONTINUE')

            elif op == self.opc.RETURN_VALUE:
                if inst.offset in self.return_end_ifs:
                    opname = 'RETURN_END_IF'
            elif inst.offset in self.load_asserts:
                opname = 'LOAD_ASSERT'

            tokens.append(
                Token(
                    type_ = opname,
                    attr = argval,
                    pattr = pattr,
                    offset = inst.offset,
                    linestart = inst.starts_line,
                    op = op,
                    has_arg = (op >= op3.HAVE_ARGUMENT),
                    opc = self.opc
                    )
                )
            pass

        if show_asm in ('both', 'after'):
            for t in tokens:
                print(t)
            print()
        return tokens, customize
def make_function2(self, node, isLambda, nested=1, codeNode=None):
    """
    Dump function defintion, doc string, and function body.
    This code is specialied for Python 2.
    """

    # FIXME: call make_function3 if we are self.version >= 3.0
    # and then simplify the below.

    def build_param(ast, name, default):
        """build parameters:
            - handle defaults
            - handle format tuple parameters
        """
        # if formal parameter is a tuple, the paramater name
        # starts with a dot (eg. '.1', '.2')
        if name.startswith('.'):
            # replace the name with the tuple-string
            name = self.get_tuple_parameter(ast, name)
            pass

        if default:
            value = self.traverse(default, indent='')
            maybe_show_ast_param_default(self.showast, name, value)
            result = '%s=%s' % (name,  value)
            if result[-2:] == '= ':	# default was 'LOAD_CONST None'
                result += 'None'
            return result
        else:
            return name

    # MAKE_FUNCTION_... or MAKE_CLOSURE_...
    assert node[-1].type.startswith('MAKE_')

    args_node = node[-1]
    if isinstance(args_node.attr, tuple):
        # positional args are after kwargs
        defparams = node[1:args_node.attr[0]+1]
        pos_args, kw_args, annotate_argc  = args_node.attr
    else:
        defparams = node[:args_node.attr]
        kw_args  = 0
        annotate_argc  = 0
        pass

    lambda_index = None

    if lambda_index and isLambda and iscode(node[lambda_index].attr):
        assert node[lambda_index].type == 'LOAD_LAMBDA'
        code = node[lambda_index].attr
    else:
        code = codeNode.attr

    assert iscode(code)
    code = Code(code, self.scanner, self.currentclass)

    # add defaults values to parameter names
    argc = code.co_argcount
    paramnames = list(code.co_varnames[:argc])

    # defaults are for last n parameters, thus reverse
    paramnames.reverse(); defparams.reverse()

    try:
        ast = self.build_ast(code._tokens,
                             code._customize,
                             isLambda = isLambda,
                             noneInNames = ('None' in code.co_names))
    except ParserError as p:
        self.write(str(p))
        self.ERROR = p
        return

    kw_pairs = args_node.attr[1] if self.version >= 3.0 else 0
    indent = self.indent

    # build parameters
    params = [build_param(ast, name, default) for
              name, default in zip_longest(paramnames, defparams, fillvalue=None)]
    params.reverse() # back to correct order

    if code_has_star_arg(code):
        params.append('*%s' % code.co_varnames[argc])
        argc += 1

    # dump parameter list (with default values)
    if isLambda:
        self.write("lambda ", ", ".join(params))
    else:
        self.write("(", ", ".join(params))

    if kw_args > 0:
        if not (4 & code.co_flags):
            if argc > 0:
                self.write(", *, ")
            else:
                self.write("*, ")
            pass
        else:
            self.write(", ")

        for n in node:
            if n == 'pos_arg':
                continue
            else:
                self.preorder(n)
            break
        pass

    if code_has_star_star_arg(code):
        if argc > 0:
            self.write(', ')
        self.write('**%s' % code.co_varnames[argc + kw_pairs])

    if isLambda:
        self.write(": ")
    else:
        self.println("):")

    if len(code.co_consts) > 0 and code.co_consts[0] is not None and not isLambda: # ugly
        # docstring exists, dump it
        print_docstring(self, indent, code.co_consts[0])

    code._tokens = None # save memory
    assert ast == 'stmts'

    all_globals = find_all_globals(ast, set())
    for g in ((all_globals & self.mod_globs) | find_globals(ast, set())):
        self.println(self.indent, 'global ', g)
    self.mod_globs -= all_globals
    has_none = 'None' in code.co_names
    rn = has_none and not find_none(ast)
    self.gen_source(ast, code.co_name, code._customize, isLambda=isLambda,
                    returnNone=rn)
    code._tokens = None; code._customize = None # save memory
Example #46
0
def decompile(bytecode_version,
              co,
              out=None,
              showasm=None,
              showast=False,
              timestamp=None,
              showgrammar=False,
              code_objects={},
              source_size=None,
              is_pypy=None,
              magic_int=None,
              mapstream=None,
              do_fragments=False):
    """
    ingests and deparses a given code block 'co'

    if `bytecode_version` is None, use the current Python intepreter
    version.

    Caller is responsible for closing `out` and `mapstream`
    """
    if bytecode_version is None:
        bytecode_version = sysinfo2float()

    # store final output stream for case of error
    real_out = out or sys.stdout

    def write(s):
        s += '\n'
        real_out.write(s)

    assert iscode(co)

    co_pypy_str = 'PyPy ' if is_pypy else ''
    run_pypy_str = 'PyPy ' if IS_PYPY else ''
    sys_version_lines = sys.version.split('\n')
    write('# decompyle3 version %s\n'
          '# %sPython bytecode %s%s\n# Decompiled from: %sPython %s' %
          (VERSION, co_pypy_str, bytecode_version, " (%s)" % str(magic_int)
           if magic_int else "", run_pypy_str, '\n# '.join(sys_version_lines)))
    if co.co_filename:
        write('# Embedded file name: %s' % co.co_filename, )
    if timestamp:
        write('# Compiled at: %s' % datetime.datetime.fromtimestamp(timestamp))
    if source_size:
        write('# Size of source mod 2**32: %d bytes' % source_size)

    debug_opts = {'asm': showasm, 'ast': showast, 'grammar': showgrammar}

    try:
        if mapstream:
            if isinstance(mapstream, str):
                mapstream = _get_outstream(mapstream)

            deparsed = deparse_code_with_map(
                bytecode_version,
                co,
                out,
                showasm,
                showast,
                showgrammar,
                code_objects=code_objects,
                is_pypy=is_pypy,
            )
            header_count = 3 + len(sys_version_lines)
            linemap = [(line_no,
                        deparsed.source_linemap[line_no] + header_count)
                       for line_no in sorted(deparsed.source_linemap.keys())]
            mapstream.write("\n\n# %s\n" % linemap)
        else:
            if do_fragments:
                deparse_fn = code_deparse_fragments
            else:
                deparse_fn = code_deparse
            deparsed = deparse_fn(co,
                                  out,
                                  bytecode_version,
                                  debug_opts=debug_opts,
                                  is_pypy=is_pypy)
            pass
        return deparsed
    except pysource.SourceWalkerError as e:
        # deparsing failed
        raise pysource.SourceWalkerError(str(e))
Example #47
0
def cmp_code_objects(version,
                     is_pypy,
                     code_obj1,
                     code_obj2,
                     name='',
                     ignore_code=False):
    """
    Compare two code-objects.

    This is the main part of this module.
    """
    # print code_obj1, type(code_obj2)
    assert iscode(code_obj1), \
      "cmp_code_object first object type is %s, not code" % type(code_obj1)
    assert iscode(code_obj2), \
      "cmp_code_object second object type is %s, not code" % type(code_obj2)
    # print dir(code_obj1)
    if isinstance(code_obj1, object):
        # new style classes (Python 2.2)
        # assume _both_ code objects to be new stle classes
        assert dir(code_obj1) == dir(code_obj2)
    else:
        # old style classes
        assert dir(code_obj1) == code_obj1.__members__
        assert dir(code_obj2) == code_obj2.__members__
        assert code_obj1.__members__ == code_obj2.__members__

    if name == '__main__':
        name = code_obj1.co_name
    else:
        name = '%s.%s' % (name, code_obj1.co_name)
        if name == '.?': name = '__main__'

    if isinstance(code_obj1, object) and code_equal(code_obj1, code_obj2):
        # use the new style code-classes' __cmp__ method, which
        # should be faster and more sophisticated
        # if this compare fails, we use the old routine to
        # find out, what exactly is nor equal
        # if this compare succeds, simply return
        # return
        pass

    if isinstance(code_obj1, object):
        members = [x for x in dir(code_obj1) if x.startswith('co_')]
    else:
        members = dir(code_obj1)
    members.sort()  # ; members.reverse()

    tokens1 = None
    for member in members:
        if member in __IGNORE_CODE_MEMBERS__ or ignore_code:
            pass
        elif member == 'co_code' and not ignore_code:
            if version == 2.3:
                import uncompyle6.scanners.scanner23 as scan
                scanner = scan.Scanner23(show_asm=False)
            elif version == 2.4:
                import uncompyle6.scanners.scanner24 as scan
                scanner = scan.Scanner24(show_asm=False)
            elif version == 2.5:
                import uncompyle6.scanners.scanner25 as scan
                scanner = scan.Scanner25(show_asm=False)
            elif version == 2.6:
                import uncompyle6.scanners.scanner26 as scan
                scanner = scan.Scanner26(show_asm=False)
            elif version == 2.7:
                if is_pypy:
                    import uncompyle6.scanners.pypy27 as scan
                    scanner = scan.ScannerPyPy27(show_asm=False)
                else:
                    import uncompyle6.scanners.scanner27 as scan
                    scanner = scan.Scanner27()
            elif version == 3.0:
                import uncompyle6.scanners.scanner30 as scan
                scanner = scan.Scanner30()
            elif version == 3.1:
                import uncompyle6.scanners.scanner32 as scan
                scanner = scan.Scanner32()
            elif version == 3.2:
                if is_pypy:
                    import uncompyle6.scanners.pypy32 as scan
                    scanner = scan.ScannerPyPy32()
                else:
                    import uncompyle6.scanners.scanner32 as scan
                    scanner = scan.Scanner32()
            elif version == 3.3:
                import uncompyle6.scanners.scanner33 as scan
                scanner = scan.Scanner33()
            elif version == 3.4:
                import uncompyle6.scanners.scanner34 as scan
                scanner = scan.Scanner34()
            elif version == 3.5:
                import uncompyle6.scanners.scanner35 as scan
                scanner = scan.Scanner35()
            elif version == 3.6:
                import uncompyle6.scanners.scanner36 as scan
                scanner = scan.Scanner36()

            global JUMP_OPS
            JUMP_OPS = list(scan.JUMP_OPS) + ['JUMP_BACK']

            # use changed Token class
            # We (re)set this here to save exception handling,
            # which would get confusing.
            scanner.setTokenClass(Token)
            try:
                # ingest both code-objects
                tokens1, customize = scanner.ingest(code_obj1)
                del customize  # save memory
                tokens2, customize = scanner.ingest(code_obj2)
                del customize  # save memory
            finally:
                scanner.resetTokenClass()  # restore Token class

            targets1 = dis.findlabels(code_obj1.co_code)
            tokens1 = [t for t in tokens1 if t.kind != 'COME_FROM']
            tokens2 = [t for t in tokens2 if t.kind != 'COME_FROM']

            i1 = 0
            i2 = 0
            offset_map = {}
            check_jumps = {}
            while i1 < len(tokens1):
                if i2 >= len(tokens2):
                    if len(tokens1) == len(tokens2) + 2 \
                          and tokens1[-1].kind == 'RETURN_VALUE' \
                          and tokens1[-2].kind == 'LOAD_CONST' \
                          and tokens1[-2].pattr is None \
                          and tokens1[-3].kind == 'RETURN_VALUE':
                        break
                    else:
                        raise CmpErrorCodeLen(name, tokens1, tokens2)

                offset_map[tokens1[i1].offset] = tokens2[i2].offset

                for idx1, idx2, offset2 in check_jumps.get(
                        tokens1[i1].offset, []):
                    if offset2 != tokens2[i2].offset:
                        raise CmpErrorCode(name, tokens1[idx1].offset,
                                           tokens1[idx1], tokens2[idx2],
                                           tokens1, tokens2)

                if tokens1[i1].kind != tokens2[i2].kind:
                    if tokens1[i1].kind == 'LOAD_CONST' == tokens2[i2].kind:
                        i = 1
                        while tokens1[i1 + i].kind == 'LOAD_CONST':
                            i += 1
                        if tokens1[i1+i].kind.startswith(('BUILD_TUPLE', 'BUILD_LIST')) \
                              and i == int(tokens1[i1+i].kind.split('_')[-1]):
                            t = tuple(
                                [elem.pattr for elem in tokens1[i1:i1 + i]])
                            if t != tokens2[i2].pattr:
                                raise CmpErrorCode(name, tokens1[i1].offset,
                                                   tokens1[i1], tokens2[i2],
                                                   tokens1, tokens2)
                            i1 += i + 1
                            i2 += 1
                            continue
                        elif i == 2 and tokens1[
                                i1 + i].kind == 'ROT_TWO' and tokens2[
                                    i2 + 1].kind == 'UNPACK_SEQUENCE_2':
                            i1 += 3
                            i2 += 2
                            continue
                        elif i == 2 and tokens1[i1 + i].kind in BIN_OP_FUNCS:
                            f = BIN_OP_FUNCS[tokens1[i1 + i].kind]
                            if f(tokens1[i1].pattr,
                                 tokens1[i1 + 1].pattr) == tokens2[i2].pattr:
                                i1 += 3
                                i2 += 1
                                continue
                    elif tokens1[i1].kind == 'UNARY_NOT':
                        if tokens2[i2].kind == 'POP_JUMP_IF_TRUE':
                            if tokens1[i1 + 1].kind == 'POP_JUMP_IF_FALSE':
                                i1 += 2
                                i2 += 1
                                continue
                        elif tokens2[i2].kind == 'POP_JUMP_IF_FALSE':
                            if tokens1[i1 + 1].kind == 'POP_JUMP_IF_TRUE':
                                i1 += 2
                                i2 += 1
                                continue
                    elif tokens1[i1].kind in ('JUMP_FORWARD', 'JUMP_BACK') \
                          and tokens1[i1-1].kind == 'RETURN_VALUE' \
                          and tokens2[i2-1].kind in ('RETURN_VALUE', 'RETURN_END_IF') \
                          and int(tokens1[i1].offset) not in targets1:
                        i1 += 1
                        continue
                    elif tokens1[i1].kind == 'JUMP_BACK' and tokens2[
                            i2].kind == 'CONTINUE':
                        # FIXME: should make sure that offset is inside loop, not outside of it
                        i1 += 2
                        i2 += 2
                        continue
                    elif tokens1[i1].kind == 'JUMP_FORWARD' and tokens2[i2].kind == 'JUMP_BACK' \
                          and tokens1[i1+1].kind == 'JUMP_BACK' and tokens2[i2+1].kind == 'JUMP_BACK' \
                          and int(tokens1[i1].pattr) == int(tokens1[i1].offset) + 3:
                        if int(tokens1[i1].pattr) == int(tokens1[i1 +
                                                                 1].offset):
                            i1 += 2
                            i2 += 2
                            continue
                    elif tokens1[i1].kind == 'LOAD_NAME' and tokens2[i2].kind == 'LOAD_CONST' \
                         and tokens1[i1].pattr == 'None' and tokens2[i2].pattr is None:
                        pass
                    elif tokens1[i1].kind == 'LOAD_GLOBAL' and tokens2[i2].kind == 'LOAD_NAME' \
                         and tokens1[i1].pattr == tokens2[i2].pattr:
                        pass
                    elif tokens1[i1].kind == 'LOAD_ASSERT' and tokens2[i2].kind == 'LOAD_NAME' \
                         and tokens1[i1].pattr == tokens2[i2].pattr:
                        pass
                    elif (tokens1[i1].kind == 'RETURN_VALUE'
                          and tokens2[i2].kind == 'RETURN_END_IF'):
                        pass
                    elif (tokens1[i1].kind == 'BUILD_TUPLE_0'
                          and tokens2[i2].pattr == ()):
                        pass
                    else:
                        raise CmpErrorCode(name, tokens1[i1].offset,
                                           tokens1[i1], tokens2[i2], tokens1,
                                           tokens2)
                elif tokens1[i1].kind in JUMP_OPS and tokens1[
                        i1].pattr != tokens2[i2].pattr:
                    if tokens1[i1].kind == 'JUMP_BACK':
                        dest1 = int(tokens1[i1].pattr)
                        dest2 = int(tokens2[i2].pattr)
                        if offset_map[dest1] != dest2:
                            raise CmpErrorCode(name, tokens1[i1].offset,
                                               tokens1[i1], tokens2[i2],
                                               tokens1, tokens2)
                    else:
                        # import pdb; pdb.set_trace()
                        try:
                            dest1 = int(tokens1[i1].pattr)
                            if dest1 in check_jumps:
                                check_jumps[dest1].append((i1, i2, dest2))
                            else:
                                check_jumps[dest1] = [(i1, i2, dest2)]
                        except:
                            pass

                i1 += 1
                i2 += 1
            del tokens1, tokens2  # save memory
        elif member == 'co_consts':
            # partial optimization can make the co_consts look different,
            #   so we'll just compare the code consts
            codes1 = (c for c in code_obj1.co_consts
                      if hasattr(c, 'co_consts'))
            codes2 = (c for c in code_obj2.co_consts
                      if hasattr(c, 'co_consts'))

            for c1, c2 in zip(codes1, codes2):
                cmp_code_objects(version, is_pypy, c1, c2, name=name)
        elif member == 'co_flags':
            flags1 = code_obj1.co_flags
            flags2 = code_obj2.co_flags
            if is_pypy:
                # For PYPY for now we don't care about PYPY_SOURCE_IS_UTF8:
                flags2 &= ~0x0100  # PYPY_SOURCE_IS_UTF8
            # We also don't care about COROUTINE or GENERATOR for now
            flags1 &= ~0x000000a0
            flags2 &= ~0x000000a0
            if flags1 != flags2:
                raise CmpErrorMember(name, 'co_flags', pretty_flags(flags1),
                                     pretty_flags(flags2))
        else:
            # all other members must be equal
            if getattr(code_obj1, member) != getattr(code_obj2, member):
                raise CmpErrorMember(name, member, getattr(code_obj1, member),
                                     getattr(code_obj2, member))
Example #48
0
    def n_classdef3(node):
        """Handle "classdef" nonterminal for 3.0 >= version 3.0 <= 3.5
        """

        assert 3.0 <= self.version <= 3.5

        # class definition ('class X(A,B,C):')
        cclass = self.currentclass

        # Pick out various needed bits of information
        # * class_name - the name of the class
        # * subclass_info - the parameters to the class  e.g.
        #      class Foo(bar, baz)
        #               ----------
        # * subclass_code - the code for the subclass body
        subclass_info = None
        if node == "classdefdeco2":
            if self.version <= 3.3:
                class_name = node[2][0].attr
            else:
                class_name = node[1][2].attr
            build_class = node
        else:
            build_class = node[0]
            class_name = node[1][0].attr
            build_class = node[0]

        assert "mkfunc" == build_class[1]
        mkfunc = build_class[1]
        if mkfunc[0] in ("kwargs", "no_kwargs"):
            if 3.0 <= self.version <= 3.2:
                for n in mkfunc:
                    if hasattr(n, "attr") and iscode(n.attr):
                        subclass_code = n.attr
                        break
                    elif n == "expr":
                        subclass_code = n[0].attr
                    pass
                pass
            else:
                for n in mkfunc:
                    if hasattr(n, "attr") and iscode(n.attr):
                        subclass_code = n.attr
                        break
                    pass
                pass
            if node == "classdefdeco2":
                subclass_info = node
            else:
                subclass_info = node[0]
        elif build_class[1][0] == "load_closure":
            # Python 3 with closures not functions
            load_closure = build_class[1]
            if hasattr(load_closure[-3], "attr"):
                # Python 3.3 classes with closures work like this.
                # Note have to test before 3.2 case because
                # index -2 also has an attr.
                subclass_code = find_code_node(load_closure, -3).attr
            elif hasattr(load_closure[-2], "attr"):
                # Python 3.2 works like this
                subclass_code = find_code_node(load_closure, -2).attr
            else:
                raise "Internal Error n_classdef: cannot find class body"
            if hasattr(build_class[3], "__len__"):
                if not subclass_info:
                    subclass_info = build_class[3]
            elif hasattr(build_class[2], "__len__"):
                subclass_info = build_class[2]
            else:
                raise "Internal Error n_classdef: cannot superclass name"
        elif not subclass_info:
            if mkfunc[0] in ("no_kwargs", "kwargs"):
                subclass_code = mkfunc[1].attr
            else:
                subclass_code = mkfunc[0].attr
            if node == "classdefdeco2":
                subclass_info = node
            else:
                subclass_info = node[0]

        if node == "classdefdeco2":
            self.write("\n")
        else:
            self.write("\n\n")

        self.currentclass = str(class_name)
        self.write(self.indent, "class ", self.currentclass)

        self.print_super_classes3(subclass_info)
        self.println(":")

        # class body
        self.indent_more()
        self.build_class(subclass_code)
        self.indent_less()

        self.currentclass = cclass
        if len(self.param_stack) > 1:
            self.write("\n\n")
        else:
            self.write("\n\n\n")

        self.prune()
def make_function3(self, node, isLambda, nested=1, codeNode=None):
    """Dump function definition, doc string, and function body."""

    # FIXME: call make_function3 if we are self.version >= 3.0
    # and then simplify the below.

    def build_param(ast, name, default):
        """build parameters:
            - handle defaults
            - handle format tuple parameters
        """
        if default:
            value = self.traverse(default, indent='')
            maybe_show_ast_param_default(self.showast, name, value)
            result = '%s=%s' % (name,  value)
            if result[-2:] == '= ':	# default was 'LOAD_CONST None'
                result += 'None'
            return result
        else:
            return name

    # MAKE_FUNCTION_... or MAKE_CLOSURE_...
    assert node[-1].type.startswith('MAKE_')

    args_node = node[-1]
    if isinstance(args_node.attr, tuple):
        if self.version <= 3.3 and len(node) > 2 and node[-3] != 'LOAD_LAMBDA':
            # positional args are after kwargs
            defparams = node[1:args_node.attr[0]+1]
        else:
            # positional args are before kwargs
            defparams = node[:args_node.attr[0]]
        pos_args, kw_args, annotate_argc  = args_node.attr
    else:
        defparams = node[:args_node.attr]
        kw_args  = 0
        pass

    if 3.0 <= self.version <= 3.2:
        lambda_index = -2
    elif 3.03 <= self.version:
        lambda_index = -3
    else:
        lambda_index = None

    if lambda_index and isLambda and iscode(node[lambda_index].attr):
        assert node[lambda_index].type == 'LOAD_LAMBDA'
        code = node[lambda_index].attr
    else:
        code = codeNode.attr

    assert iscode(code)
    code = Code(code, self.scanner, self.currentclass)

    # add defaults values to parameter names
    argc = code.co_argcount
    paramnames = list(code.co_varnames[:argc])

    # defaults are for last n parameters, thus reverse
    if not 3.0 <= self.version <= 3.2:
        paramnames.reverse(); defparams.reverse()

    try:
        ast = self.build_ast(code._tokens,
                             code._customize,
                             isLambda = isLambda,
                             noneInNames = ('None' in code.co_names))
    except ParserError as p:
        self.write(str(p))
        self.ERROR = p
        return

    kw_pairs = args_node.attr[1] if self.version >= 3.0 else 0
    indent = self.indent

    # build parameters
    if self.version != 3.2:
        params = [build_param(ast, name, default) for
                  name, default in zip_longest(paramnames, defparams, fillvalue=None)]
        params.reverse() # back to correct order

        if code_has_star_arg(code):
            if self.version > 3.0:
                params.append('*%s' % code.co_varnames[argc + kw_pairs])
            else:
                params.append('*%s' % code.co_varnames[argc])
            argc += 1

        # dump parameter list (with default values)
        if isLambda:
            self.write("lambda ", ", ".join(params))
        else:
            self.write("(", ", ".join(params))
        # self.println(indent, '#flags:\t', int(code.co_flags))

    else:
        if isLambda:
            self.write("lambda ")
        else:
            self.write("(")
            pass

        last_line = self.f.getvalue().split("\n")[-1]
        l = len(last_line)
        indent = ' ' * l
        line_number = self.line_number

        if code_has_star_arg(code):
            self.write('*%s' % code.co_varnames[argc + kw_pairs])
            argc += 1

        i = len(paramnames) - len(defparams)
        self.write(", ".join(paramnames[:i]))
        suffix = ', ' if i > 0 else ''
        for n in node:
            if n == 'pos_arg':
                self.write(suffix)
                self.write(paramnames[i] + '=')
                i += 1
                self.preorder(n)
                if (line_number != self.line_number):
                    suffix = ",\n" + indent
                    line_number = self.line_number
                else:
                    suffix = ', '

    if kw_args > 0:
        if not (4 & code.co_flags):
            if argc > 0:
                self.write(", *, ")
            else:
                self.write("*, ")
            pass
        else:
            self.write(", ")

        if not 3.0 <= self.version <= 3.2:
            for n in node:
                if n == 'pos_arg':
                    continue
                elif self.version >= 3.4 and not (n.type in ('kwargs', 'kwarg')):
                    continue
                else:
                    self.preorder(n)
                break
        else:
            kwargs = node[0]
            last = len(kwargs)-1
            i = 0
            for n in node[0]:
                if n == 'kwarg':
                    self.write('%s=' % n[0].pattr)
                    self.preorder(n[1])
                    if i < last:
                        self.write(', ')
                    i += 1
                    pass
                pass
            pass
        pass

    if code_has_star_star_arg(code):
        if argc > 0:
            self.write(', ')
        self.write('**%s' % code.co_varnames[argc + kw_pairs])

    if isLambda:
        self.write(": ")
    else:
        self.println("):")

    if len(code.co_consts) > 0 and code.co_consts[0] is not None and not isLambda: # ugly
        # docstring exists, dump it
        print_docstring(self, self.indent, code.co_consts[0])

    code._tokens = None # save memory
    assert ast == 'stmts'

    all_globals = find_all_globals(ast, set())
    for g in ((all_globals & self.mod_globs) | find_globals(ast, set())):
        self.println(self.indent, 'global ', g)
    self.mod_globs -= all_globals
    has_none = 'None' in code.co_names
    rn = has_none and not find_none(ast)
    self.gen_source(ast, code.co_name, code._customize, isLambda=isLambda,
                    returnNone=rn)
    code._tokens = None; code._customize = None # save memory