コード例 #1
0
def python_parser(
    version: str,
    co,
    out=sys.stdout,
    showasm=False,
    parser_debug=PARSER_DEFAULT_DEBUG,
    is_pypy=False,
    is_lambda=False,
):
    """
    Parse a code object to an abstract syntax tree representation.

    :param version:         The python version this code is from as a float, for
                            example 2.6, 2.7, 3.2, 3.3, 3.4, 3.5 etc.
    :param co:              The code object to parse.
    :param out:             File like object to write the output to.
    :param showasm:         Flag which determines whether the disassembled and
                            ingested code is written to sys.stdout or not.
    :param parser_debug:    dict containing debug flags for the spark parser.

    :return: Abstract syntax tree representation of the code object.
    """

    assert iscode(co)
    from decompyle3.scanner import get_scanner

    scanner = get_scanner(version, is_pypy)
    tokens, customize = scanner.ingest(co)
    maybe_show_asm(showasm, tokens)

    # For heavy grammar debugging
    # parser_debug = {'rules': True, 'transition': True, 'reduce' : True,
    #                 'showstack': 'full'}
    p = get_python_parser(version, parser_debug)
    return parse(p, tokens, customize, is_lambda)
コード例 #2
0
def test_template_engine():
    s = StringIO()
    sys_version = float(sys.version[0:3])
    scanner = get_scanner(sys_version, is_pypy=False)
    scanner.insts = []
    sw = SourceWalker(3.7, s, scanner)
    sw.ast = NONE
    sw.template_engine(('--%c--', 0), NONE)
    print(sw.f.getvalue())
    assert sw.f.getvalue() == '--None--'
コード例 #3
0
ファイル: test_fjt.py プロジェクト: zbx911/python-decompile3
def test_if_in_for():
    code = bug.__code__
    scan = get_scanner(PYTHON_VERSION)
    if 2.7 <= PYTHON_VERSION <= 3.0 and not IS_PYPY:
        scan.build_instructions(code)
        fjt = scan.find_jump_targets(False)

        ## FIXME: the data below is wrong.
        ## we get different results currenty as well.
        ## We need to probably fix both the code
        ## and the test below
        # assert {15: [3], 69: [66], 63: [18]} == fjt
        # assert scan.structs == \
        #   [{'start': 0, 'end': 72, 'type': 'root'},
        #    {'start': 15, 'end': 66, 'type': 'if-then'},
        #    {'start': 31, 'end': 59, 'type': 'for-loop'},
        #    {'start': 62, 'end': 63, 'type': 'for-else'}]

        code = bug_loop.__code__
        scan.build_instructions(code)
        fjt = scan.find_jump_targets(False)
        assert {64: [42], 67: [42, 42], 42: [16, 41], 19: [6]} == fjt
        assert scan.structs == [
            {"start": 0, "end": 80, "type": "root"},
            {"start": 3, "end": 64, "type": "if-then"},
            {"start": 6, "end": 15, "type": "try"},
            {"start": 19, "end": 38, "type": "except"},
            {"start": 45, "end": 67, "type": "while-loop"},
            {"start": 70, "end": 64, "type": "while-else"},
            # previous bug was not mistaking while-loop for if-then
            {"start": 48, "end": 67, "type": "while-loop"},
        ]

    elif 3.2 < PYTHON_VERSION <= 3.4:
        scan.build_instructions(code)
        fjt = scan.find_jump_targets(False)
        assert {69: [66], 63: [18]} == fjt
        assert scan.structs == [
            {"end": 72, "type": "root", "start": 0},
            {"end": 66, "type": "if-then", "start": 6},
            {"end": 63, "type": "if-then", "start": 18},
            {"end": 59, "type": "for-loop", "start": 31},
            {"end": 63, "type": "for-else", "start": 62},
        ]
    else:
        print("FIXME: should fix for %s" % PYTHON_VERSION)
        assert True
    return
コード例 #4
0
def disco(version, co, out=None, is_pypy=False):
    """
    diassembles and deparses a given code block 'co'
    """

    assert iscode(co)

    # store final output stream for case of error
    real_out = out or sys.stdout
    print('# Python %s' % version, file=real_out)
    if co.co_filename:
        print('# Embedded file name: %s' % co.co_filename, file=real_out)

    scanner = get_scanner(version, is_pypy=is_pypy)

    queue = deque([co])
    disco_loop(scanner.ingest, queue, real_out)
コード例 #5
0
def dump_and_check(p, version: float, modified_tokens: set) -> None:

    p.dump_grammar()
    print("=" * 50, "\n")

    p.check_grammar()
    from decompyle3 import PYTHON_VERSION, IS_PYPY

    if PYTHON_VERSION == version:
        lhs, rhs, tokens, right_recursive, dup_rhs = p.check_sets()
        from decompyle3.scanner import get_scanner

        s = get_scanner(PYTHON_VERSION, IS_PYPY)
        modified_tokens = set("""JUMP_BACK CONTINUE RETURN_END_IF COME_FROM
               LOAD_GENEXPR LOAD_ASSERT LOAD_SETCOMP LOAD_DICTCOMP LOAD_CLASSNAME
               LAMBDA_MARKER RETURN_LAST
            """.split())
        print("\nModified opcodes:", modified_tokens)
        opcode_set = set(s.opc.opname).union(modified_tokens)

        pseudo_tokens = set(tokens) - opcode_set
        import re

        pseudo_tokens = set([re.sub(r"_\d+$", "", t) for t in pseudo_tokens])
        pseudo_tokens = set([re.sub("_CONT$", "", t) for t in pseudo_tokens])
        pseudo_tokens = set(pseudo_tokens) - opcode_set

        print("\nPseudo tokens:")
        print(pseudo_tokens)
        import sys

        if len(sys.argv) > 1:
            from spark_parser.spark import rule2str

            for rule in sorted(p.rule2name.items()):
                print(rule2str(rule[0]))
コード例 #6
0
def test_grammar():
    def check_tokens(tokens, opcode_set):
        remain_tokens = set(tokens) - opcode_set
        remain_tokens = set([re.sub(r"_\d+$", "", t) for t in remain_tokens])
        remain_tokens = set([re.sub("_CONT$", "", t) for t in remain_tokens])
        remain_tokens = set(
            [re.sub("LOAD_CODE$", "", t) for t in remain_tokens])
        remain_tokens = set(remain_tokens) - opcode_set
        assert remain_tokens == set([]), "Remaining tokens %s\n====\n%s" % (
            remain_tokens,
            p.dump_grammar(),
        )

    p = get_python_parser(PYTHON_VERSION, is_pypy=IS_PYPY)
    (lhs, rhs, tokens, right_recursive, dup_rhs) = p.check_sets()

    # We have custom rules that create the below
    expect_lhs = set(["pos_arg"])

    if PYTHON_VERSION < 3.8:
        expect_lhs.add("get_iter")

    unused_rhs = set(["list", "mkfunc", "mklambda", "unpack"])

    expect_right_recursive = set([("designList", ("store", "DUP_TOP",
                                                  "designList"))])

    expect_lhs.add("load_genexpr")
    expect_lhs.add("kvlist")
    expect_lhs.add("kv3")

    unused_rhs = unused_rhs.union(
        set("""
    except_pop_except generator_exp
    """.split()))
    unused_rhs.add("dict_comp")
    unused_rhs.add("classdefdeco1")
    unused_rhs.add("tryelsestmtl")
    unused_rhs.add("dict")

    expect_right_recursive.add(
        (("l_stmts", ("lastl_stmt", "come_froms", "l_stmts"))))
    pass

    assert expect_lhs == set(lhs)

    # FIXME
    if PYTHON_VERSION != 3.8:
        assert unused_rhs == set(rhs)

    assert expect_right_recursive == right_recursive

    expect_dup_rhs = frozenset([
        ("COME_FROM", ),
        ("CONTINUE", ),
        ("JUMP_ABSOLUTE", ),
        ("LOAD_CONST", ),
        ("JUMP_BACK", ),
        ("JUMP_FORWARD", ),
    ])
    reduced_dup_rhs = dict(
        (k, dup_rhs[k]) for k in dup_rhs if k not in expect_dup_rhs)
    if reduced_dup_rhs:
        print(
            "\nPossible duplicate RHS that might be folded, into one of the LHS symbols"
        )
        for k in reduced_dup_rhs:
            print(k, reduced_dup_rhs[k])
    # assert not reduced_dup_rhs, reduced_dup_rhs

    s = get_scanner(PYTHON_VERSION, IS_PYPY)
    ignore_set = set("""
            JUMP_BACK CONTINUE
            COME_FROM COME_FROM_EXCEPT
            COME_FROM_EXCEPT_CLAUSE
            COME_FROM_LOOP COME_FROM_WITH
            COME_FROM_FINALLY ELSE
            LOAD_GENEXPR LOAD_ASSERT LOAD_SETCOMP LOAD_DICTCOMP LOAD_STR LOAD_CODE
            LAMBDA_MARKER
            RETURN_END_IF RETURN_END_IF_LAMBDA RETURN_VALUE_LAMBDA RETURN_LAST
            """.split())
コード例 #7
0
def cmp_code_objects(version, is_pypy, code_obj1, code_obj2, verify, name=""):
    """
    Compare two code-objects.

    This is the main part of this module.
    """
    # print code_obj1, type(code_obj2)
    assert iscode(
        code_obj1
    ), "cmp_code_object first object type is %s, not code" % type(code_obj1)
    assert iscode(
        code_obj2
    ), "cmp_code_object second object type is %s, not code" % type(code_obj2)
    # print dir(code_obj1)
    if isinstance(code_obj1, object):
        # new style classes (Python 2.2)
        # assume _both_ code objects to be new stle classes
        assert dir(code_obj1) == dir(code_obj2)
    else:
        # old style classes
        assert dir(code_obj1) == code_obj1.__members__
        assert dir(code_obj2) == code_obj2.__members__
        assert code_obj1.__members__ == code_obj2.__members__

    if name == "__main__":
        name = code_obj1.co_name
    else:
        name = "%s.%s" % (name, code_obj1.co_name)
        if name == ".?":
            name = "__main__"

    if isinstance(code_obj1, object) and code_equal(code_obj1, code_obj2):
        # use the new style code-classes' __cmp__ method, which
        # should be faster and more sophisticated
        # if this compare fails, we use the old routine to
        # find out, what exactly is nor equal
        # if this compare succeds, simply return
        # return
        pass

    if isinstance(code_obj1, object):
        members = [x for x in dir(code_obj1) if x.startswith("co_")]
    else:
        members = dir(code_obj1)
    members.sort()  # ; members.reverse()

    tokens1 = None
    for member in members:
        if member in __IGNORE_CODE_MEMBERS__ or verify != "verify":
            pass
        elif member == "co_code":
            if verify != "strong":
                continue
            scanner = get_scanner(version, is_pypy, show_asm=False)

            global JUMP_OPS
            JUMP_OPS = list(scan.JUMP_OPS) + ["JUMP_BACK"]

            # use changed Token class
            # We (re)set this here to save exception handling,
            # which would get confusing.
            scanner.setTokenClass(Token)
            try:
                # ingest both code-objects
                tokens1, customize = scanner.ingest(code_obj1)
                del customize  # save memory
                tokens2, customize = scanner.ingest(code_obj2)
                del customize  # save memory
            finally:
                scanner.resetTokenClass()  # restore Token class

            targets1 = dis.findlabels(code_obj1.co_code)
            tokens1 = [t for t in tokens1 if t.kind != "COME_FROM"]
            tokens2 = [t for t in tokens2 if t.kind != "COME_FROM"]

            i1 = 0
            i2 = 0
            offset_map = {}
            check_jumps = {}
            while i1 < len(tokens1):
                if i2 >= len(tokens2):
                    if (len(tokens1) == len(tokens2) + 2
                            and tokens1[-1].kind == "RETURN_VALUE"
                            and tokens1[-2].kind == "LOAD_CONST"
                            and tokens1[-2].pattr is None
                            and tokens1[-3].kind == "RETURN_VALUE"):
                        break
                    else:
                        raise CmpErrorCodeLen(name, tokens1, tokens2)

                offset_map[tokens1[i1].offset] = tokens2[i2].offset

                for idx1, idx2, offset2 in check_jumps.get(
                        tokens1[i1].offset, []):
                    if offset2 != tokens2[i2].offset:
                        raise CmpErrorCode(
                            name,
                            tokens1[idx1].offset,
                            tokens1[idx1],
                            tokens2[idx2],
                            tokens1,
                            tokens2,
                        )

                if tokens1[i1].kind != tokens2[i2].kind:
                    if tokens1[i1].kind == "LOAD_CONST" == tokens2[i2].kind:
                        i = 1
                        while tokens1[i1 + i].kind == "LOAD_CONST":
                            i += 1
                        if tokens1[i1 + i].kind.startswith(
                            ("BUILD_TUPLE", "BUILD_LIST")) and i == int(
                                tokens1[i1 + i].kind.split("_")[-1]):
                            t = tuple(
                                [elem.pattr for elem in tokens1[i1:i1 + i]])
                            if t != tokens2[i2].pattr:
                                raise CmpErrorCode(
                                    name,
                                    tokens1[i1].offset,
                                    tokens1[i1],
                                    tokens2[i2],
                                    tokens1,
                                    tokens2,
                                )
                            i1 += i + 1
                            i2 += 1
                            continue
                        elif (i == 2 and tokens1[i1 + i].kind == "ROT_TWO"
                              and tokens2[i2 + 1].kind == "UNPACK_SEQUENCE_2"):
                            i1 += 3
                            i2 += 2
                            continue
                        elif i == 2 and tokens1[i1 + i].kind in BIN_OP_FUNCS:
                            f = BIN_OP_FUNCS[tokens1[i1 + i].kind]
                            if (f(tokens1[i1].pattr,
                                  tokens1[i1 + 1].pattr) == tokens2[i2].pattr):
                                i1 += 3
                                i2 += 1
                                continue
                    elif tokens1[i1].kind == "UNARY_NOT":
                        if tokens2[i2].kind == "POP_JUMP_IF_TRUE":
                            if tokens1[i1 + 1].kind == "POP_JUMP_IF_FALSE":
                                i1 += 2
                                i2 += 1
                                continue
                        elif tokens2[i2].kind == "POP_JUMP_IF_FALSE":
                            if tokens1[i1 + 1].kind == "POP_JUMP_IF_TRUE":
                                i1 += 2
                                i2 += 1
                                continue
                    elif (tokens1[i1].kind in ("JUMP_FORWARD", "JUMP_BACK")
                          and tokens1[i1 - 1].kind == "RETURN_VALUE"
                          and tokens2[i2 - 1].kind
                          in ("RETURN_VALUE", "RETURN_END_IF")
                          and int(tokens1[i1].offset) not in targets1):
                        i1 += 1
                        continue
                    elif (tokens1[i1].kind == "JUMP_BACK"
                          and tokens2[i2].kind == "CONTINUE"):
                        # FIXME: should make sure that offset is inside loop, not outside of it
                        i1 += 2
                        i2 += 2
                        continue
                    elif (tokens1[i1].kind == "JUMP_FORWARD"
                          and tokens2[i2].kind == "JUMP_BACK"
                          and tokens1[i1 + 1].kind == "JUMP_BACK"
                          and tokens2[i2 + 1].kind == "JUMP_BACK"
                          and int(tokens1[i1].pattr)
                          == int(tokens1[i1].offset) + 3):
                        if int(tokens1[i1].pattr) == int(tokens1[i1 +
                                                                 1].offset):
                            i1 += 2
                            i2 += 2
                            continue
                    elif (tokens1[i1].kind == "LOAD_NAME"
                          and tokens2[i2].kind == "LOAD_CONST"
                          and tokens1[i1].pattr == "None"
                          and tokens2[i2].pattr is None):
                        pass
                    elif (tokens1[i1].kind == "LOAD_GLOBAL"
                          and tokens2[i2].kind == "LOAD_NAME"
                          and tokens1[i1].pattr == tokens2[i2].pattr):
                        pass
                    elif (tokens1[i1].kind == "LOAD_ASSERT"
                          and tokens2[i2].kind == "LOAD_NAME"
                          and tokens1[i1].pattr == tokens2[i2].pattr):
                        pass
                    elif (tokens1[i1].kind == "RETURN_VALUE"
                          and tokens2[i2].kind == "RETURN_END_IF"):
                        pass
                    elif (tokens1[i1].kind == "BUILD_TUPLE_0"
                          and tokens2[i2].pattr == ()):
                        pass
                    else:
                        raise CmpErrorCode(
                            name,
                            tokens1[i1].offset,
                            tokens1[i1],
                            tokens2[i2],
                            tokens1,
                            tokens2,
                        )
                elif (tokens1[i1].kind in JUMP_OPS
                      and tokens1[i1].pattr != tokens2[i2].pattr):
                    if tokens1[i1].kind == "JUMP_BACK":
                        dest1 = int(tokens1[i1].pattr)
                        dest2 = int(tokens2[i2].pattr)
                        if offset_map[dest1] != dest2:
                            raise CmpErrorCode(
                                name,
                                tokens1[i1].offset,
                                tokens1[i1],
                                tokens2[i2],
                                tokens1,
                                tokens2,
                            )
                    else:
                        # import pdb; pdb.set_trace()
                        try:
                            dest1 = int(tokens1[i1].pattr)
                            if dest1 in check_jumps:
                                check_jumps[dest1].append((i1, i2, dest2))
                            else:
                                check_jumps[dest1] = [(i1, i2, dest2)]
                        except:
                            pass

                i1 += 1
                i2 += 1
            del tokens1, tokens2  # save memory
        elif member == "co_consts":
            # partial optimization can make the co_consts look different,
            #   so we'll just compare the code consts
            codes1 = (c for c in code_obj1.co_consts
                      if hasattr(c, "co_consts"))
            codes2 = (c for c in code_obj2.co_consts
                      if hasattr(c, "co_consts"))

            for c1, c2 in zip(codes1, codes2):
                cmp_code_objects(version, is_pypy, c1, c2, verify, name=name)
        elif member == "co_flags":
            flags1 = code_obj1.co_flags
            flags2 = code_obj2.co_flags
            if is_pypy:
                # For PYPY for now we don't care about PYPY_SOURCE_IS_UTF8:
                flags2 &= ~0x0100  # PYPY_SOURCE_IS_UTF8
            # We also don't care about COROUTINE or GENERATOR for now
            flags1 &= ~0x000000A0
            flags2 &= ~0x000000A0
            if flags1 != flags2:
                raise CmpErrorMember(name, "co_flags", pretty_flags(flags1),
                                     pretty_flags(flags2))
        else:
            # all other members must be equal
            if getattr(code_obj1, member) != getattr(code_obj2, member):
                raise CmpErrorMember(name, member, getattr(code_obj1, member),
                                     getattr(code_obj2, member))
コード例 #8
0
def test_get_scanner():
    # See that we can retrieve a scanner using a full version number
    assert get_scanner("3.7.3")
コード例 #9
0

class Python38ParserSingle(Python38Parser, PythonParserSingle):
    pass


if __name__ == "__main__":
    # Check grammar
    p = Python38Parser()
    p.check_grammar()
    from decompyle3 import PYTHON_VERSION, IS_PYPY

    if PYTHON_VERSION == 3.8:
        lhs, rhs, tokens, right_recursive = p.check_sets()
        from decompyle3.scanner import get_scanner

        s = get_scanner(PYTHON_VERSION, IS_PYPY)
        opcode_set = set(s.opc.opname).union(
            set("""JUMP_BACK CONTINUE RETURN_END_IF COME_FROM
               LOAD_GENEXPR LOAD_ASSERT LOAD_SETCOMP LOAD_DICTCOMP LOAD_CLASSNAME
               LAMBDA_MARKER RETURN_LAST
            """.split()))
        remain_tokens = set(tokens) - opcode_set
        import re

        remain_tokens = set([re.sub(r"_\d+$", "", t) for t in remain_tokens])
        remain_tokens = set([re.sub("_CONT$", "", t) for t in remain_tokens])
        remain_tokens = set(remain_tokens) - opcode_set
        print(remain_tokens)
        # print(sorted(p.rule2name.items()))
コード例 #10
0
ファイル: aligner.py プロジェクト: x0ret/python-decompile3
def code_deparse_align(co, out=sys.stderr, version=None, is_pypy=None,
                       debug_opts=DEFAULT_DEBUG_OPTS,
                       code_objects={}, compile_mode='exec'):
    """
    ingests and deparses a given code block 'co'
    """

    assert iscode(co)

    if version is None:
        version = float(sys.version[0:3])
    if is_pypy is None:
        is_pypy = IS_PYPY


    # store final output stream for case of error
    scanner = get_scanner(version, is_pypy=is_pypy)

    tokens, customize = scanner.ingest(co, code_objects=code_objects)
    show_asm = debug_opts.get('asm', None)
    maybe_show_asm(show_asm, tokens)

    debug_parser = dict(PARSER_DEFAULT_DEBUG)
    show_grammar = debug_opts.get('grammar', None)
    show_grammar = debug_opts.get('grammar', None)
    if show_grammar:
        debug_parser['reduce'] = show_grammar
        debug_parser['errorstack'] = True

    #  Build a parse tree from tokenized and massaged disassembly.
    show_ast = debug_opts.get('ast', None)
    deparsed = AligningWalker(version, scanner, out, showast=show_ast,
                            debug_parser=debug_parser, compile_mode=compile_mode,
                            is_pypy = is_pypy)

    isTopLevel = co.co_name == '<module>'
    deparsed.ast = deparsed.build_ast(tokens, customize, isTopLevel=isTopLevel)

    assert deparsed.ast == 'stmts', 'Should have parsed grammar start'

    del tokens # save memory

    deparsed.mod_globs = find_globals(deparsed.ast, set())

    # convert leading '__doc__ = "..." into doc string
    try:
        if deparsed.ast[0][0] == ASSIGN_DOC_STRING(co.co_consts[0]):
            deparsed.print_docstring('', co.co_consts[0])
            del deparsed.ast[0]
        if deparsed.ast[-1] == RETURN_NONE:
            deparsed.ast.pop() # remove last node
            # todo: if empty, add 'pass'
    except:
        pass

    # What we've been waiting for: Generate Python source from the parse tree!
    deparsed.gen_source(deparsed.ast, co.co_name, customize)

    for g in sorted(deparsed.mod_globs):
        deparsed.write('# global %s ## Warning: Unused global\n' % g)

    if deparsed.ERROR:
        raise SourceWalkerError("Deparsing stopped due to parse error")
    return deparsed