Exemple #1
0
def test_if_in_for():
    code = bug.__code__
    scan = get_scanner(PYTHON_VERSION)
    print(PYTHON_VERSION)
    if 2.7 <= PYTHON_VERSION <= 3.0 and not IS_PYPY:
        n = scan.setup_code(code)
        scan.build_lines_data(code, n)
        scan.build_prev_op(n)
        fjt = scan.find_jump_targets()
        assert {15: [3], 69: [66], 63: [18]} == fjt
        assert scan.structs == \
          [{'start': 0, 'end': 72, 'type': 'root'},
           {'start': 18, 'end': 66, 'type': 'if-then'},
           {'start': 31, 'end': 59, 'type': 'for-loop'},
           {'start': 62, 'end': 63, 'type': 'for-else'}]
    elif 3.2 < PYTHON_VERSION <= 3.4:
        scan.code = array('B', code.co_code)
        scan.build_lines_data(code)
        scan.build_prev_op()
        fjt  = scan.find_jump_targets()
        assert {69: [66], 63: [18]} == fjt
        assert scan.structs == \
          [{'end': 72, 'type': 'root', 'start': 0},
           {'end': 66, 'type': 'if-then', 'start': 6},
           {'end': 63, 'type': 'if-then', 'start': 18},
           {'end': 59, 'type': 'for-loop', 'start': 31},
           {'end': 63, 'type': 'for-else', 'start': 62}]
    else:
        assert True, "FIXME: should note fixed"
    return
Exemple #2
0
def python_parser(version, co, out=sys.stdout, showasm=False,
                  parser_debug=PARSER_DEFAULT_DEBUG, is_pypy=False):
    """
    Parse a code object to an abstract syntax tree representation.

    :param version:         The python version this code is from as a float, for
                            example 2.6, 2.7, 3.2, 3.3, 3.4, 3.5 etc.
    :param co:              The code object to parse.
    :param out:             File like object to write the output to.
    :param showasm:         Flag which determines whether the disassembled and
                            ingested code is written to sys.stdout or not.
    :param parser_debug:    dict containing debug flags for the spark parser.

    :return: Abstract syntax tree representation of the code object.
    """

    assert iscode(co)
    from uncompyle6.scanner import get_scanner
    scanner = get_scanner(version, is_pypy)
    tokens, customize = scanner.ingest(co)
    maybe_show_asm(showasm, tokens)

    # For heavy grammar debugging
    # parser_debug = {'rules': True, 'transition': True, 'reduce' : True,
    #                 'showstack': 'full'}
    p = get_python_parser(version, parser_debug)
    return parse(p, tokens, customize)
Exemple #3
0
def python_parser(version, co, out=sys.stdout, showasm=False,
                  parser_debug=PARSER_DEFAULT_DEBUG, is_pypy=False):
    """
    Parse a code object to an abstract syntax tree representation.

    :param version:         The python version this code is from as a float, for
                            example 2.6, 2.7, 3.2, 3.3, 3.4, 3.5 etc.
    :param co:              The code object to parse.
    :param out:             File like object to write the output to.
    :param showasm:         Flag which determines whether the disassembled and
                            ingested code is written to sys.stdout or not.
    :param parser_debug:    dict containing debug flags for the spark parser.

    :return: Abstract syntax tree representation of the code object.
    """

    assert iscode(co)
    from uncompyle6.scanner import get_scanner
    scanner = get_scanner(version, is_pypy)
    tokens, customize = scanner.ingest(co)
    maybe_show_asm(showasm, tokens)

    # For heavy grammar debugging
    # parser_debug = {'rules': True, 'transition': True, 'reduce' : True,
    #                 'showstack': 'full'}
    p = get_python_parser(version, parser_debug)
    return parse(p, tokens, customize)
Exemple #4
0
def test_template_engine():
    s = StringIO()
    sys_version = float(sys.version[0:3])
    scanner = get_scanner(sys_version, is_pypy=False)
    scanner.insts = []
    sw = SourceWalker(2.7, s, scanner)
    sw.ast = NONE
    sw.template_engine(('--%c--', 0), NONE)
    print(sw.f.getvalue())
    assert sw.f.getvalue() == '--None--'
Exemple #5
0
def test_grammar():
    def check_tokens(tokens, opcode_set):
        remain_tokens = set(tokens) - opcode_set
        remain_tokens = set([re.sub('_\d+$', '', t) for t in remain_tokens])
        remain_tokens = set([re.sub('_CONT$', '', t) for t in remain_tokens])
        remain_tokens = set(remain_tokens) - opcode_set
        assert remain_tokens == set([]), \
            "Remaining tokens %s\n====\n%s" % (remain_tokens, p.dump_grammar())

    p = get_python_parser(PYTHON_VERSION, is_pypy=IS_PYPY)
    lhs, rhs, tokens, right_recursive = p.check_sets()
    expect_lhs = set(['expr1024', 'pos_arg'])
    unused_rhs = set([
        'build_list', 'call_function', 'mkfunc', 'mklambda', 'unpack',
        'unpack_list'
    ])
    expect_right_recursive = frozenset([
        ('designList', ('designator', 'DUP_TOP', 'designList'))
    ])
    if PYTHON3:
        expect_lhs.add('load_genexpr')

        unused_rhs = unused_rhs.union(
            set("""
        except_pop_except genexpr classdefdeco2 listcomp
        """.split()))
        if 3.0 <= PYTHON_VERSION:
            expect_lhs.add("annotate_arg")
            expect_lhs.add("annotate_tuple")
            unused_rhs.add("mkfunc_annotate")
            pass
    else:
        expect_lhs.add('kwarg')
    assert expect_lhs == set(lhs)
    assert unused_rhs == set(rhs)
    assert expect_right_recursive == right_recursive
    s = get_scanner(PYTHON_VERSION, IS_PYPY)
    ignore_set = set("""
            JUMP_BACK CONTINUE
            COME_FROM COME_FROM_EXCEPT
            COME_FROM_EXCEPT_CLAUSE
            COME_FROM_LOOP COME_FROM_WITH
            COME_FROM_FINALLY ELSE
            LOAD_GENEXPR LOAD_ASSERT LOAD_SETCOMP LOAD_DICTCOMP
            LAMBDA_MARKER
            RETURN_END_IF RETURN_END_IF_LAMBDA RETURN_VALUE_LAMBDA RETURN_LAST
            """.split())
    if 2.6 <= PYTHON_VERSION <= 2.7:
        opcode_set = set(s.opc.opname).union(ignore_set)
        check_tokens(tokens, opcode_set)
    elif PYTHON_VERSION == 3.4:
        ignore_set.add('LOAD_CLASSNAME')
        ignore_set.add('STORE_LOCALS')
        opcode_set = set(s.opc.opname).union(ignore_set)
        check_tokens(tokens, opcode_set)
Exemple #6
0
def test_if_in_for():
    code = bug.__code__
    scan = get_scanner(PYTHON_VERSION)
    print(PYTHON_VERSION)
    if 2.7 <= PYTHON_VERSION <= 3.0 and not IS_PYPY:
        n = scan.setup_code(code)
        scan.build_lines_data(code, n)
        scan.build_prev_op(n)
        fjt = scan.find_jump_targets(False)

        ## FIXME: the data below is wrong.
        ## we get different results currenty as well.
        ## We need to probably fix both the code
        ## and the test below
        # assert {15: [3], 69: [66], 63: [18]} == fjt
        # assert scan.structs == \
        #   [{'start': 0, 'end': 72, 'type': 'root'},
        #    {'start': 15, 'end': 66, 'type': 'if-then'},
        #    {'start': 31, 'end': 59, 'type': 'for-loop'},
        #    {'start': 62, 'end': 63, 'type': 'for-else'}]

        code = bug_loop.__code__
        n = scan.setup_code(code)
        scan.build_lines_data(code, n)
        scan.build_prev_op(n)
        fjt = scan.find_jump_targets(False)
        assert{64: [42], 67: [42, 42], 42: [16, 41], 19: [6]} == fjt
        assert scan.structs == [
            {'start': 0, 'end': 80, 'type': 'root'},
            {'start': 3, 'end': 64, 'type': 'if-then'},
            {'start': 6, 'end': 15, 'type': 'try'},
            {'start': 19, 'end': 38, 'type': 'except'},
            {'start': 45, 'end': 67, 'type': 'while-loop'},
            {'start': 70, 'end': 64, 'type': 'while-else'},
            # previous bug was not mistaking while-loop for if-then
            {'start': 48, 'end': 67, 'type': 'while-loop'}]

    elif 3.2 < PYTHON_VERSION <= 3.4:
        bytecode = Bytecode(code, scan.opc)
        scan.code = array('B', code.co_code)
        scan.build_lines_data(code)
        scan.build_prev_op()
        scan.insts = list(bytecode)
        fjt  = scan.find_jump_targets(False)
        assert {69: [66], 63: [18]} == fjt
        assert scan.structs == \
          [{'end': 72, 'type': 'root', 'start': 0},
           {'end': 66, 'type': 'if-then', 'start': 6},
           {'end': 63, 'type': 'if-then', 'start': 18},
           {'end': 59, 'type': 'for-loop', 'start': 31},
           {'end': 63, 'type': 'for-else', 'start': 62}]
    else:
        assert True, "FIXME: should note fixed"
    return
Exemple #7
0
def python_parser(version, co, out=sys.stdout, showasm=False,
                  parser_debug=PARSER_DEFAULT_DEBUG):
    assert iscode(co)
    from uncompyle6.scanner import get_scanner
    scanner = get_scanner(version)
    tokens, customize = scanner.disassemble(co)
    if showasm:
        for t in tokens:
            print(t)

    p = get_python_parser(version, parser_debug)
    return parse(p, tokens, customize)
Exemple #8
0
def align_deparse_code(version, co, out=sys.stderr, showasm=False, showast=False,
                 showgrammar=False, code_objects={}, compile_mode='exec', is_pypy=False):
    """
    ingests and deparses a given code block 'co'
    """

    assert iscode(co)
    # store final output stream for case of error
    scanner = get_scanner(version, is_pypy=is_pypy)

    tokens, customize = scanner.ingest(co, code_objects=code_objects)
    maybe_show_asm(showasm, tokens)

    debug_parser = dict(PARSER_DEFAULT_DEBUG)
    if showgrammar:
        debug_parser['reduce'] = showgrammar
        debug_parser['errorstack'] = True

    #  Build AST from disassembly.
    deparsed = AligningWalker(version, scanner, out, showast=showast,
                            debug_parser=debug_parser, compile_mode=compile_mode,
                            is_pypy = is_pypy)

    isTopLevel = co.co_name == '<module>'
    deparsed.ast = deparsed.build_ast(tokens, customize, isTopLevel=isTopLevel)

    assert deparsed.ast == 'stmts', 'Should have parsed grammar start'

    del tokens # save memory

    deparsed.mod_globs = find_globals(deparsed.ast, set())

    # convert leading '__doc__ = "..." into doc string
    try:
        if deparsed.ast[0][0] == ASSIGN_DOC_STRING(co.co_consts[0]):
            deparsed.print_docstring('', co.co_consts[0])
            del deparsed.ast[0]
        if deparsed.ast[-1] == RETURN_NONE:
            deparsed.ast.pop() # remove last node
            # todo: if empty, add 'pass'
    except:
        pass

    # What we've been waiting for: Generate source from AST!
    deparsed.gen_source(deparsed.ast, co.co_name, customize)

    for g in deparsed.mod_globs:
        deparsed.write('# global %s ## Warning: Unused global' % g)

    if deparsed.ERROR:
        raise SourceWalkerError("Deparsing stopped due to parse error")
    return deparsed
def test_grammar():

    def check_tokens(tokens, opcode_set):
        remain_tokens = set(tokens) - opcode_set
        remain_tokens = set([re.sub('_\d+$','', t) for t in remain_tokens])
        remain_tokens = set([re.sub('_CONT$','', t) for t in remain_tokens])
        remain_tokens = set(remain_tokens) - opcode_set
        assert remain_tokens == set([]), \
            "Remaining tokens %s\n====\n%s" % (remain_tokens, p.dumpGrammar())

    p = get_python_parser(PYTHON_VERSION, is_pypy=IS_PYPY)
    lhs, rhs, tokens, right_recursive = p.checkSets()
    expect_lhs = set(['expr1024', 'pos_arg'])
    unused_rhs = set(['build_list', 'call_function', 'mkfunc',
                      'mklambda',
                      'unpack', 'unpack_list'])
    expect_right_recursive = [['designList', ('designator', 'DUP_TOP', 'designList')]]
    if PYTHON3:
        expect_lhs.add('load_genexpr')

        unused_rhs = unused_rhs.union(set("""
        except_pop_except genexpr classdefdeco2 listcomp
        """.split()))
        if 3.0 <= PYTHON_VERSION:
            expect_lhs.add("annotate_arg")
            expect_lhs.add("annotate_tuple")
            unused_rhs.add("mkfunc_annotate")
            pass
    else:
        expect_lhs.add('kwarg')
    assert expect_lhs == set(lhs)
    assert unused_rhs == set(rhs)
    assert expect_right_recursive == right_recursive
    s = get_scanner(PYTHON_VERSION, IS_PYPY)
    ignore_set = set(
            """
            JUMP_BACK CONTINUE RETURN_END_IF
            COME_FROM COME_FROM_EXCEPT
            COME_FROM_EXCEPT_CLAUSE
            COME_FROM_LOOP COME_FROM_WITH
            COME_FROM_FINALLY ELSE
            LOAD_GENEXPR LOAD_ASSERT LOAD_SETCOMP LOAD_DICTCOMP
            LAMBDA_MARKER RETURN_LAST
            """.split())
    if 2.6 <= PYTHON_VERSION <= 2.7:
        opcode_set = set(s.opc.opname).union(ignore_set)
        check_tokens(tokens, opcode_set)
    elif PYTHON_VERSION == 3.4:
        ignore_set.add('LOAD_CLASSNAME')
        ignore_set.add('STORE_LOCALS')
        opcode_set = set(s.opc.opname).union(ignore_set)
        check_tokens(tokens, opcode_set)
def python_parser(version, co, out=sys.stdout, showasm=False,
                  parser_debug=PARSER_DEFAULT_DEBUG):
    import inspect
    assert hasattr(co, 'co_name')
    from uncompyle6.scanner import get_scanner
    scanner = get_scanner(version)
    tokens, customize = scanner.disassemble(co)
    if showasm:
        for t in tokens:
            print(t)

    p = get_python_parser(version, parser_debug)
    return parse(p, tokens, customize)
Exemple #11
0
def deparse_code(version,
                 co,
                 out=StringIO(),
                 showasm=False,
                 showast=False,
                 showgrammar=False):

    assert iscode(co)
    # store final output stream for case of error
    scanner = get_scanner(version)

    tokens, customize = scanner.disassemble(co)

    tokens, customize = scanner.disassemble(co)
    if showasm:
        for t in tokens:
            print(t)

    debug_parser = dict(PARSER_DEFAULT_DEBUG)
    debug_parser['reduce'] = showgrammar

    #  Build AST from disassembly.
    # deparsed = pysource.FragmentsWalker(out, scanner, showast=showast)
    deparsed = FragmentsWalker(version,
                               scanner,
                               showast=showast,
                               debug_parser=debug_parser)

    deparsed.ast = deparsed.build_ast(tokens, customize)

    assert deparsed.ast == 'stmts', 'Should have parsed grammar start'

    del tokens  # save memory

    # convert leading '__doc__ = "..." into doc string
    assert deparsed.ast == 'stmts'
    deparsed.mod_globs = pysource.find_globals(deparsed.ast, set())

    # Just when you think we've forgotten about what we
    # were supposed to to: Generate source from AST!
    deparsed.gen_source(deparsed.ast, co.co_name, customize)

    deparsed.set_pos_info(deparsed.ast, 0, len(deparsed.text))
    deparsed.fixup_parents(deparsed.ast, None)

    for g in deparsed.mod_globs:
        deparsed.write('# global %s ## Warning: Unused global' % g)
    if deparsed.ERROR:
        raise deparsed.ERROR

    return deparsed
Exemple #12
0
def python_parser(version, co, out=sys.stdout, showasm=False,
                  parser_debug=PARSER_DEFAULT_DEBUG):
    assert iscode(co)
    from uncompyle6.scanner import get_scanner
    scanner = get_scanner(version)
    tokens, customize = scanner.disassemble(co)
    if showasm:
        for t in tokens:
            print(t)

    # For heavy grammar debugging
    parser_debug = {'rules': True, 'transition': True, 'reduce' : True}
    p = get_python_parser(version, parser_debug)
    return parse(p, tokens, customize)
Exemple #13
0
def test_if_in_for():
    code = bug.__code__
    scan = get_scanner(PYTHON_VERSION)
    print(PYTHON_VERSION)
    if 2.7 <= PYTHON_VERSION <= 3.0 and not IS_PYPY:
        n = scan.setup_code(code)
        scan.build_lines_data(code, n)
        scan.build_prev_op(n)
        fjt = scan.find_jump_targets(False)
        assert {15: [3], 69: [66], 63: [18]} == fjt
        assert scan.structs == \
          [{'start': 0, 'end': 72, 'type': 'root'},
           {'start': 15, 'end': 66, 'type': 'if-then'},
           {'start': 31, 'end': 59, 'type': 'for-loop'},
           {'start': 62, 'end': 63, 'type': 'for-else'}]

        code = bug_loop.__code__
        n = scan.setup_code(code)
        scan.build_lines_data(code, n)
        scan.build_prev_op(n)
        fjt = scan.find_jump_targets(False)
        assert{64: [42], 67: [42, 42], 42: [16, 41], 19: [6]} == fjt
        assert scan.structs == [
            {'start': 0, 'end': 80, 'type': 'root'},
            {'start': 3, 'end': 64, 'type': 'if-then'},
            {'start': 6, 'end': 15, 'type': 'try'},
            {'start': 19, 'end': 38, 'type': 'except'},
            {'start': 45, 'end': 67, 'type': 'while-loop'},
            {'start': 70, 'end': 64, 'type': 'while-else'},
            # previous bug was not mistaking while-loop for if-then
            {'start': 48, 'end': 67, 'type': 'while-loop'}]

    elif 3.2 < PYTHON_VERSION <= 3.4:
        scan.code = array('B', code.co_code)
        scan.build_lines_data(code)
        scan.build_prev_op()
        fjt  = scan.find_jump_targets(False)
        assert {69: [66], 63: [18]} == fjt
        assert scan.structs == \
          [{'end': 72, 'type': 'root', 'start': 0},
           {'end': 66, 'type': 'if-then', 'start': 6},
           {'end': 63, 'type': 'if-then', 'start': 18},
           {'end': 59, 'type': 'for-loop', 'start': 31},
           {'end': 63, 'type': 'for-else', 'start': 62}]
    else:
        assert True, "FIXME: should note fixed"
    return
Exemple #14
0
def disco(version, co, out=None, is_pypy=False):
    """
    diassembles and deparses a given code block 'co'
    """

    assert iscode(co)

    # store final output stream for case of error
    real_out = out or sys.stdout
    print('# Python %s' % version, file=real_out)
    if co.co_filename:
        print('# Embedded file name: %s' % co.co_filename, file=real_out)

    scanner = get_scanner(version, is_pypy=is_pypy)

    queue = deque([co])
    disco_loop(scanner.ingest, queue, real_out)
Exemple #15
0
def deparse_code(version, co, out=StringIO(), showasm=False, showast=False,
                 showgrammar=False):

    assert iscode(co)
    # store final output stream for case of error
    scanner = get_scanner(version)

    tokens, customize = scanner.disassemble(co)

    tokens, customize = scanner.disassemble(co)
    if showasm:
        for t in tokens:
            print(t)

    debug_parser = dict(PARSER_DEFAULT_DEBUG)
    if showgrammar:
        debug_parser['reduce'] = showgrammar
        debug_parser['errorstack'] = True

    #  Build AST from disassembly.
    # deparsed = pysource.FragmentsWalker(out, scanner, showast=showast)
    deparsed = FragmentsWalker(version, scanner, showast=showast, debug_parser=debug_parser)

    deparsed.ast = deparsed.build_ast(tokens, customize)

    assert deparsed.ast == 'stmts', 'Should have parsed grammar start'

    del tokens # save memory

    # convert leading '__doc__ = "..." into doc string
    assert deparsed.ast == 'stmts'
    deparsed.mod_globs = pysource.find_globals(deparsed.ast, set())

    # Just when you think we've forgotten about what we
    # were supposed to to: Generate source from AST!
    deparsed.gen_source(deparsed.ast, co.co_name, customize)

    deparsed.set_pos_info(deparsed.ast, 0, len(deparsed.text))
    deparsed.fixup_parents(deparsed.ast, None)

    for g in deparsed.mod_globs:
        deparsed.write('# global %s ## Warning: Unused global' % g)
    if deparsed.ERROR:
        raise deparsed.ERROR

    return deparsed
Exemple #16
0
def disco(version, co, out=None, is_pypy=False):
    """
    diassembles and deparses a given code block 'co'
    """

    assert iscode(co)

    # store final output stream for case of error
    real_out = out or sys.stdout
    print('# Python %s' % version, file=real_out)
    if co.co_filename:
        print('# Embedded file name: %s' % co.co_filename,
              file=real_out)

    scanner = get_scanner(version, is_pypy=is_pypy)

    queue = deque([co])
    disco_loop(scanner.disassemble, queue, real_out)
def disco(version, co, out=None):
    """
    diassembles and deparses a given code block 'co'
    """

    assert hasattr(co, 'co_name')

    # store final output stream for case of error
    real_out = out or sys.stdout
    print('# Python %s' % version, file=real_out)
    if co.co_filename:
        print('# Embedded file name: %s' % co.co_filename, file=real_out)

    scanner = get_scanner(version)
    tokens, customize = scanner.disassemble(co)

    for t in tokens:
        print(t, file=real_out)
    print(file=out)
Exemple #18
0
def disco(version, co, out=None):
    """
    diassembles and deparses a given code block 'co'
    """

    assert iscode(co)

    # store final output stream for case of error
    real_out = out or sys.stdout
    print('# Python %s' % version, file=real_out)
    if co.co_filename:
        print('# Embedded file name: %s' % co.co_filename,
              file=real_out)

    scanner = get_scanner(version)
    tokens, customize = scanner.disassemble(co)

    for t in tokens:
        print(t, file=real_out)
    print(file=out)
Exemple #19
0
def test_get_scanner():
    # See that we can retrieve a scanner using a full version number
    assert get_scanner('2.7.13')
Exemple #20
0
def test_if_in_for():
    code = bug.__code__
    scan = get_scanner(PYTHON_VERSION_TRIPLE)
    if (2, 7) <= PYTHON_VERSION_TRIPLE < (3, 1) and not IS_PYPY:
        scan.build_instructions(code)
        fjt = scan.find_jump_targets(False)

        ## FIXME: the data below is wrong.
        ## we get different results currenty as well.
        ## We need to probably fix both the code
        ## and the test below
        # assert {15: [3], 69: [66], 63: [18]} == fjt
        # assert scan.structs == \
        #   [{'start': 0, 'end': 72, 'type': 'root'},
        #    {'start': 15, 'end': 66, 'type': 'if-then'},
        #    {'start': 31, 'end': 59, 'type': 'for-loop'},
        #    {'start': 62, 'end': 63, 'type': 'for-else'}]

        code = bug_loop.__code__
        scan.build_instructions(code)
        fjt = scan.find_jump_targets(False)
        assert {64: [42], 67: [42, 42], 42: [16, 41], 19: [6]} == fjt
        assert scan.structs == [
            {
                'start': 0,
                'end': 80,
                'type': 'root'
            },
            {
                'start': 3,
                'end': 64,
                'type': 'if-then'
            },
            {
                'start': 6,
                'end': 15,
                'type': 'try'
            },
            {
                'start': 19,
                'end': 38,
                'type': 'except'
            },
            {
                'start': 45,
                'end': 67,
                'type': 'while-loop'
            },
            {
                'start': 70,
                'end': 64,
                'type': 'while-else'
            },
            # previous bug was not mistaking while-loop for if-then
            {
                'start': 48,
                'end': 67,
                'type': 'while-loop'
            }
        ]

    elif (3, 2) < PYTHON_VERSION_TRIPLE <= (3, 4):
        scan.build_instructions(code)
        fjt = scan.find_jump_targets(False)
        assert {69: [66], 63: [18]} == fjt
        assert scan.structs == \
          [{'end': 72, 'type': 'root', 'start': 0},
           {'end': 66, 'type': 'if-then', 'start': 6},
           {'end': 63, 'type': 'if-then', 'start': 18},
           {'end': 59, 'type': 'for-loop', 'start': 31},
           {'end': 63, 'type': 'for-else', 'start': 62}]
    else:
        print("FIXME: should fix for %s" % version_tuple_to_str())
        assert True
    return
Exemple #21
0
def test_grammar():
    def check_tokens(tokens, opcode_set):
        remain_tokens = set(tokens) - opcode_set
        remain_tokens = set([re.sub('_\d+$', '', t) for t in remain_tokens])
        remain_tokens = set([re.sub('_CONT$', '', t) for t in remain_tokens])
        remain_tokens = set(remain_tokens) - opcode_set
        assert remain_tokens == set([]), \
            "Remaining tokens %s\n====\n%s" % (remain_tokens, p.dump_grammar())

    p = get_python_parser(PYTHON_VERSION, is_pypy=IS_PYPY)
    (lhs, rhs, tokens, right_recursive, dup_rhs) = p.check_sets()

    # We have custom rules that create the below
    expect_lhs = set(['pos_arg', 'get_iter', 'attribute'])

    unused_rhs = set([
        'list',
        'mkfunc',
        'mklambda',
        'unpack',
    ])
    expect_right_recursive = set([('designList', ('store', 'DUP_TOP',
                                                  'designList'))])

    if PYTHON_VERSION != 3.7:
        unused_rhs.add('call')

    if PYTHON_VERSION > 2.6:
        expect_lhs.add('kvlist')
        expect_lhs.add('kv3')
        unused_rhs.add('dict')

    if PYTHON3:
        expect_lhs.add('load_genexpr')

        unused_rhs = unused_rhs.union(
            set("""
        except_pop_except generator_exp
        """.split()))
        if PYTHON_VERSION >= 3.0:
            expect_lhs.add("annotate_arg")
            expect_lhs.add("annotate_tuple")
            unused_rhs.add("mkfunc_annotate")
            unused_rhs.add("dict_comp")
            unused_rhs.add("classdefdeco1")
            if PYTHON_VERSION >= 3.5:
                expect_right_recursive.add(
                    (('l_stmts', ('lastl_stmt', 'come_froms', 'l_stmts'))))
                pass
            elif 3.0 < PYTHON_VERSION < 3.3:
                expect_right_recursive.add(
                    (('l_stmts', ('lastl_stmt', 'COME_FROM', 'l_stmts'))))
                pass
            pass
        pass
    else:
        expect_lhs.add('kwarg')

    assert expect_lhs == set(lhs)
    assert unused_rhs == set(rhs)
    assert expect_right_recursive == right_recursive

    expect_dup_rhs = frozenset([('COME_FROM', ), ('CONTINUE', ),
                                ('JUMP_ABSOLUTE', ), ('LOAD_CONST', ),
                                ('JUMP_BACK', ), ('JUMP_FORWARD', )])
    reduced_dup_rhs = dict(
        (k, dup_rhs[k]) for k in dup_rhs if k not in expect_dup_rhs)
    for k in reduced_dup_rhs:
        print(k, reduced_dup_rhs[k])
    # assert not reduced_dup_rhs, reduced_dup_rhs

    s = get_scanner(PYTHON_VERSION, IS_PYPY)
    ignore_set = set("""
            JUMP_BACK CONTINUE
            COME_FROM COME_FROM_EXCEPT
            COME_FROM_EXCEPT_CLAUSE
            COME_FROM_LOOP COME_FROM_WITH
            COME_FROM_FINALLY ELSE
            LOAD_GENEXPR LOAD_ASSERT LOAD_SETCOMP LOAD_DICTCOMP
            LAMBDA_MARKER
            RETURN_END_IF RETURN_END_IF_LAMBDA RETURN_VALUE_LAMBDA RETURN_LAST
            """.split())
    if 2.6 <= PYTHON_VERSION <= 2.7:
        opcode_set = set(s.opc.opname).union(ignore_set)
        if PYTHON_VERSION == 2.6:
            opcode_set.add("THEN")
        check_tokens(tokens, opcode_set)
    elif PYTHON_VERSION == 3.4:
        ignore_set.add('LOAD_CLASSNAME')
        ignore_set.add('STORE_LOCALS')
        opcode_set = set(s.opc.opname).union(ignore_set)
        check_tokens(tokens, opcode_set)
def test_grammar():
    def check_tokens(tokens, opcode_set):
        remain_tokens = set(tokens) - opcode_set
        remain_tokens = set([re.sub(r"_\d+$", "", t) for t in remain_tokens])
        remain_tokens = set([re.sub("_CONT$", "", t) for t in remain_tokens])
        remain_tokens = set(
            [re.sub("LOAD_CODE$", "", t) for t in remain_tokens])
        remain_tokens = set(remain_tokens) - opcode_set
        assert remain_tokens == set([]), "Remaining tokens %s\n====\n%s" % (
            remain_tokens,
            p.dump_grammar(),
        )

    p = get_python_parser(PYTHON_VERSION, is_pypy=IS_PYPY)
    (lhs, rhs, tokens, right_recursive, dup_rhs) = p.check_sets()

    # We have custom rules that create the below
    expect_lhs = set(["pos_arg"])

    if PYTHON_VERSION < 3.8:
        if PYTHON_VERSION < 3.7:
            expect_lhs.add("attribute")

        expect_lhs.add("get_iter")
    else:
        expect_lhs.add("async_with_as_stmt")
        expect_lhs.add("async_with_stmt")

    unused_rhs = set(["list", "mkfunc", "mklambda", "unpack"])

    expect_right_recursive = set([("designList", ("store", "DUP_TOP",
                                                  "designList"))])

    if PYTHON_VERSION <= 3.7:
        unused_rhs.add("call")

    if PYTHON_VERSION > 2.6:
        expect_lhs.add("kvlist")
        expect_lhs.add("kv3")
        unused_rhs.add("dict")

    if PYTHON3:
        expect_lhs.add("load_genexpr")

        unused_rhs = unused_rhs.union(
            set("""
        except_pop_except generator_exp
        """.split()))
        if PYTHON_VERSION >= 3.0:
            if PYTHON_VERSION < 3.7:
                expect_lhs.add("annotate_arg")
                expect_lhs.add("annotate_tuple")
                unused_rhs.add("mkfunc_annotate")

            unused_rhs.add("dict_comp")
            unused_rhs.add("classdefdeco1")
            unused_rhs.add("tryelsestmtl")
            if PYTHON_VERSION >= 3.5:
                expect_right_recursive.add(
                    (("l_stmts", ("lastl_stmt", "come_froms", "l_stmts"))))
                pass
            elif 3.0 < PYTHON_VERSION < 3.3:
                expect_right_recursive.add(
                    (("l_stmts", ("lastl_stmt", "COME_FROM", "l_stmts"))))
                pass
            pass
        pass
    else:
        expect_lhs.add("kwarg")

    assert expect_lhs == set(lhs)

    # FIXME
    if PYTHON_VERSION != 3.8:
        assert unused_rhs == set(rhs)

    assert expect_right_recursive == right_recursive

    expect_dup_rhs = frozenset([
        ("COME_FROM", ),
        ("CONTINUE", ),
        ("JUMP_ABSOLUTE", ),
        ("LOAD_CONST", ),
        ("JUMP_BACK", ),
        ("JUMP_FORWARD", ),
    ])
    reduced_dup_rhs = dict(
        (k, dup_rhs[k]) for k in dup_rhs if k not in expect_dup_rhs)
    for k in reduced_dup_rhs:
        print(k, reduced_dup_rhs[k])
    # assert not reduced_dup_rhs, reduced_dup_rhs

    s = get_scanner(PYTHON_VERSION, IS_PYPY)
    ignore_set = set("""
            JUMP_BACK CONTINUE
            COME_FROM COME_FROM_EXCEPT
            COME_FROM_EXCEPT_CLAUSE
            COME_FROM_LOOP COME_FROM_WITH
            COME_FROM_FINALLY ELSE
            LOAD_GENEXPR LOAD_ASSERT LOAD_SETCOMP LOAD_DICTCOMP LOAD_STR LOAD_CODE
            LAMBDA_MARKER
            RETURN_END_IF RETURN_END_IF_LAMBDA RETURN_VALUE_LAMBDA RETURN_LAST
            """.split())
    if 2.6 <= PYTHON_VERSION <= 2.7:
        opcode_set = set(s.opc.opname).union(ignore_set)
        if PYTHON_VERSION == 2.6:
            opcode_set.add("THEN")
        check_tokens(tokens, opcode_set)
    elif PYTHON_VERSION == 3.4:
        ignore_set.add("LOAD_CLASSNAME")
        ignore_set.add("STORE_LOCALS")
        opcode_set = set(s.opc.opname).union(ignore_set)
        check_tokens(tokens, opcode_set)
Exemple #23
0
def code_deparse_align(co,
                       out=sys.stderr,
                       version=None,
                       is_pypy=None,
                       debug_opts=DEFAULT_DEBUG_OPTS,
                       code_objects={},
                       compile_mode='exec'):
    """
    ingests and deparses a given code block 'co'
    """

    assert iscode(co)

    if version is None:
        version = float(sys.version[0:3])
    if is_pypy is None:
        is_pypy = IS_PYPY

    # store final output stream for case of error
    scanner = get_scanner(version, is_pypy=is_pypy)

    tokens, customize = scanner.ingest(co, code_objects=code_objects)
    show_asm = debug_opts.get('asm', None)
    maybe_show_asm(show_asm, tokens)

    debug_parser = dict(PARSER_DEFAULT_DEBUG)
    show_grammar = debug_opts.get('grammar', None)
    show_grammar = debug_opts.get('grammar', None)
    if show_grammar:
        debug_parser['reduce'] = show_grammar
        debug_parser['errorstack'] = True

    #  Build a parse tree from tokenized and massaged disassembly.
    show_ast = debug_opts.get('ast', None)
    deparsed = AligningWalker(version,
                              scanner,
                              out,
                              showast=show_ast,
                              debug_parser=debug_parser,
                              compile_mode=compile_mode,
                              is_pypy=is_pypy)

    isTopLevel = co.co_name == '<module>'
    deparsed.ast = deparsed.build_ast(tokens, customize, isTopLevel=isTopLevel)

    assert deparsed.ast == 'stmts', 'Should have parsed grammar start'

    del tokens  # save memory

    deparsed.mod_globs = find_globals(deparsed.ast, set())

    # convert leading '__doc__ = "..." into doc string
    try:
        if deparsed.ast[0][0] == ASSIGN_DOC_STRING(co.co_consts[0]):
            deparsed.print_docstring('', co.co_consts[0])
            del deparsed.ast[0]
        if deparsed.ast[-1] == RETURN_NONE:
            deparsed.ast.pop()  # remove last node
            # todo: if empty, add 'pass'
    except:
        pass

    # What we've been waiting for: Generate Python source from the parse tree!
    deparsed.gen_source(deparsed.ast, co.co_name, customize)

    for g in sorted(deparsed.mod_globs):
        deparsed.write('# global %s ## Warning: Unused global\n' % g)

    if deparsed.ERROR:
        raise SourceWalkerError("Deparsing stopped due to parse error")
    return deparsed
Exemple #24
0
def cmp_code_objects(version, is_pypy, code_obj1, code_obj2, verify, name=""):
    """
    Compare two code-objects.

    This is the main part of this module.
    """
    # print code_obj1, type(code_obj2)
    assert iscode(
        code_obj1
    ), "cmp_code_object first object type is %s, not code" % type(code_obj1)
    assert iscode(
        code_obj2
    ), "cmp_code_object second object type is %s, not code" % type(code_obj2)
    # print dir(code_obj1)
    if isinstance(code_obj1, object):
        # new style classes (Python 2.2)
        # assume _both_ code objects to be new stle classes
        assert dir(code_obj1) == dir(code_obj2)
    else:
        # old style classes
        assert dir(code_obj1) == code_obj1.__members__
        assert dir(code_obj2) == code_obj2.__members__
        assert code_obj1.__members__ == code_obj2.__members__

    if name == "__main__":
        name = code_obj1.co_name
    else:
        name = "%s.%s" % (name, code_obj1.co_name)
        if name == ".?":
            name = "__main__"

    if isinstance(code_obj1, object) and code_equal(code_obj1, code_obj2):
        # use the new style code-classes' __cmp__ method, which
        # should be faster and more sophisticated
        # if this compare fails, we use the old routine to
        # find out, what exactly is nor equal
        # if this compare succeds, simply return
        # return
        pass

    if isinstance(code_obj1, object):
        members = [x for x in dir(code_obj1) if x.startswith("co_")]
    else:
        members = dir(code_obj1)
    members.sort()  # ; members.reverse()

    tokens1 = None
    for member in members:
        if member in __IGNORE_CODE_MEMBERS__ or verify != "verify":
            pass
        elif member == "co_code":
            if verify != "strong":
                continue
            scanner = get_scanner(version, is_pypy, show_asm=False)

            global JUMP_OPS
            JUMP_OPS = list(scan.JUMP_OPS) + ["JUMP_BACK"]

            # use changed Token class
            # We (re)set this here to save exception handling,
            # which would get confusing.
            scanner.setTokenClass(Token)
            try:
                # ingest both code-objects
                tokens1, customize = scanner.ingest(code_obj1)
                del customize  # save memory
                tokens2, customize = scanner.ingest(code_obj2)
                del customize  # save memory
            finally:
                scanner.resetTokenClass()  # restore Token class

            targets1 = dis.findlabels(code_obj1.co_code)
            tokens1 = [t for t in tokens1 if t.kind != "COME_FROM"]
            tokens2 = [t for t in tokens2 if t.kind != "COME_FROM"]

            i1 = 0
            i2 = 0
            offset_map = {}
            check_jumps = {}
            while i1 < len(tokens1):
                if i2 >= len(tokens2):
                    if (len(tokens1) == len(tokens2) + 2
                            and tokens1[-1].kind == "RETURN_VALUE"
                            and tokens1[-2].kind == "LOAD_CONST"
                            and tokens1[-2].pattr is None
                            and tokens1[-3].kind == "RETURN_VALUE"):
                        break
                    else:
                        raise CmpErrorCodeLen(name, tokens1, tokens2)

                offset_map[tokens1[i1].offset] = tokens2[i2].offset

                for idx1, idx2, offset2 in check_jumps.get(
                        tokens1[i1].offset, []):
                    if offset2 != tokens2[i2].offset:
                        raise CmpErrorCode(
                            name,
                            tokens1[idx1].offset,
                            tokens1[idx1],
                            tokens2[idx2],
                            tokens1,
                            tokens2,
                        )

                if tokens1[i1].kind != tokens2[i2].kind:
                    if tokens1[i1].kind == "LOAD_CONST" == tokens2[i2].kind:
                        i = 1
                        while tokens1[i1 + i].kind == "LOAD_CONST":
                            i += 1
                        if tokens1[i1 + i].kind.startswith(
                            ("BUILD_TUPLE", "BUILD_LIST")) and i == int(
                                tokens1[i1 + i].kind.split("_")[-1]):
                            t = tuple(
                                [elem.pattr for elem in tokens1[i1:i1 + i]])
                            if t != tokens2[i2].pattr:
                                raise CmpErrorCode(
                                    name,
                                    tokens1[i1].offset,
                                    tokens1[i1],
                                    tokens2[i2],
                                    tokens1,
                                    tokens2,
                                )
                            i1 += i + 1
                            i2 += 1
                            continue
                        elif (i == 2 and tokens1[i1 + i].kind == "ROT_TWO"
                              and tokens2[i2 + 1].kind == "UNPACK_SEQUENCE_2"):
                            i1 += 3
                            i2 += 2
                            continue
                        elif i == 2 and tokens1[i1 + i].kind in BIN_OP_FUNCS:
                            f = BIN_OP_FUNCS[tokens1[i1 + i].kind]
                            if (f(tokens1[i1].pattr,
                                  tokens1[i1 + 1].pattr) == tokens2[i2].pattr):
                                i1 += 3
                                i2 += 1
                                continue
                    elif tokens1[i1].kind == "UNARY_NOT":
                        if tokens2[i2].kind == "POP_JUMP_IF_TRUE":
                            if tokens1[i1 + 1].kind == "POP_JUMP_IF_FALSE":
                                i1 += 2
                                i2 += 1
                                continue
                        elif tokens2[i2].kind == "POP_JUMP_IF_FALSE":
                            if tokens1[i1 + 1].kind == "POP_JUMP_IF_TRUE":
                                i1 += 2
                                i2 += 1
                                continue
                    elif (tokens1[i1].kind in ("JUMP_FORWARD", "JUMP_BACK")
                          and tokens1[i1 - 1].kind == "RETURN_VALUE"
                          and tokens2[i2 - 1].kind
                          in ("RETURN_VALUE", "RETURN_END_IF")
                          and int(tokens1[i1].offset) not in targets1):
                        i1 += 1
                        continue
                    elif (tokens1[i1].kind == "JUMP_BACK"
                          and tokens2[i2].kind == "CONTINUE"):
                        # FIXME: should make sure that offset is inside loop, not outside of it
                        i1 += 2
                        i2 += 2
                        continue
                    elif (tokens1[i1].kind == "JUMP_FORWARD"
                          and tokens2[i2].kind == "JUMP_BACK"
                          and tokens1[i1 + 1].kind == "JUMP_BACK"
                          and tokens2[i2 + 1].kind == "JUMP_BACK"
                          and int(tokens1[i1].pattr)
                          == int(tokens1[i1].offset) + 3):
                        if int(tokens1[i1].pattr) == int(tokens1[i1 +
                                                                 1].offset):
                            i1 += 2
                            i2 += 2
                            continue
                    elif (tokens1[i1].kind == "LOAD_NAME"
                          and tokens2[i2].kind == "LOAD_CONST"
                          and tokens1[i1].pattr == "None"
                          and tokens2[i2].pattr is None):
                        pass
                    elif (tokens1[i1].kind == "LOAD_GLOBAL"
                          and tokens2[i2].kind == "LOAD_NAME"
                          and tokens1[i1].pattr == tokens2[i2].pattr):
                        pass
                    elif (tokens1[i1].kind == "LOAD_ASSERT"
                          and tokens2[i2].kind == "LOAD_NAME"
                          and tokens1[i1].pattr == tokens2[i2].pattr):
                        pass
                    elif (tokens1[i1].kind == "RETURN_VALUE"
                          and tokens2[i2].kind == "RETURN_END_IF"):
                        pass
                    elif (tokens1[i1].kind == "BUILD_TUPLE_0"
                          and tokens2[i2].pattr == ()):
                        pass
                    else:
                        raise CmpErrorCode(
                            name,
                            tokens1[i1].offset,
                            tokens1[i1],
                            tokens2[i2],
                            tokens1,
                            tokens2,
                        )
                elif (tokens1[i1].kind in JUMP_OPS
                      and tokens1[i1].pattr != tokens2[i2].pattr):
                    if tokens1[i1].kind == "JUMP_BACK":
                        dest1 = int(tokens1[i1].pattr)
                        dest2 = int(tokens2[i2].pattr)
                        if offset_map[dest1] != dest2:
                            raise CmpErrorCode(
                                name,
                                tokens1[i1].offset,
                                tokens1[i1],
                                tokens2[i2],
                                tokens1,
                                tokens2,
                            )
                    else:
                        # import pdb; pdb.set_trace()
                        try:
                            dest1 = int(tokens1[i1].pattr)
                            if dest1 in check_jumps:
                                check_jumps[dest1].append((i1, i2, dest2))
                            else:
                                check_jumps[dest1] = [(i1, i2, dest2)]
                        except:
                            pass

                i1 += 1
                i2 += 1
            del tokens1, tokens2  # save memory
        elif member == "co_consts":
            # partial optimization can make the co_consts look different,
            #   so we'll just compare the code consts
            codes1 = (c for c in code_obj1.co_consts
                      if hasattr(c, "co_consts"))
            codes2 = (c for c in code_obj2.co_consts
                      if hasattr(c, "co_consts"))

            for c1, c2 in zip(codes1, codes2):
                cmp_code_objects(version, is_pypy, c1, c2, verify, name=name)
        elif member == "co_flags":
            flags1 = code_obj1.co_flags
            flags2 = code_obj2.co_flags
            if is_pypy:
                # For PYPY for now we don't care about PYPY_SOURCE_IS_UTF8:
                flags2 &= ~0x0100  # PYPY_SOURCE_IS_UTF8
            # We also don't care about COROUTINE or GENERATOR for now
            flags1 &= ~0x000000A0
            flags2 &= ~0x000000A0
            if flags1 != flags2:
                raise CmpErrorMember(
                    name,
                    "co_flags",
                    pretty_code_flags(flags1),
                    pretty_code_flags(flags2),
                )
        else:
            # all other members must be equal
            if getattr(code_obj1, member) != getattr(code_obj2, member):
                raise CmpErrorMember(name, member, getattr(code_obj1, member),
                                     getattr(code_obj2, member))
Exemple #25
0
                pass
            pass
        return False


class Python36ParserSingle(Python36Parser, PythonParserSingle):
    pass


if __name__ == '__main__':
    # Check grammar
    p = Python36Parser()
    p.check_grammar()
    from uncompyle6 import PYTHON_VERSION, IS_PYPY
    if PYTHON_VERSION == 3.6:
        lhs, rhs, tokens, right_recursive = p.check_sets()
        from uncompyle6.scanner import get_scanner
        s = get_scanner(PYTHON_VERSION, IS_PYPY)
        opcode_set = set(s.opc.opname).union(
            set("""JUMP_BACK CONTINUE RETURN_END_IF COME_FROM
               LOAD_GENEXPR LOAD_ASSERT LOAD_SETCOMP LOAD_DICTCOMP LOAD_CLASSNAME
               LAMBDA_MARKER RETURN_LAST
            """.split()))
        remain_tokens = set(tokens) - opcode_set
        import re
        remain_tokens = set([re.sub(r'_\d+$', '', t) for t in remain_tokens])
        remain_tokens = set([re.sub('_CONT$', '', t) for t in remain_tokens])
        remain_tokens = set(remain_tokens) - opcode_set
        print(remain_tokens)
        # print(sorted(p.rule2name.items()))
Exemple #26
0
        # Python 3.4+ has more loop optimization that removes
        # JUMP_FORWARD in some cases, and hence we also don't
        # see COME_FROM
        _ifstmts_jump ::= c_stmts_opt
        """
class Python35ParserSingle(Python35Parser, PythonParserSingle):
    pass

if __name__ == '__main__':
    # Check grammar
    p = Python35Parser()
    p.checkGrammar()
    from uncompyle6 import PYTHON_VERSION, IS_PYPY
    if PYTHON_VERSION == 3.5:
        lhs, rhs, tokens, right_recursive = p.checkSets()
        from uncompyle6.scanner import get_scanner
        s = get_scanner(PYTHON_VERSION, IS_PYPY)
        opcode_set = set(s.opc.opname).union(set(
            """JUMP_BACK CONTINUE RETURN_END_IF COME_FROM
               LOAD_GENEXPR LOAD_ASSERT LOAD_SETCOMP LOAD_DICTCOMP LOAD_CLASSNAME
               LAMBDA_MARKER RETURN_LAST
            """.split()))
        remain_tokens = set(tokens) - opcode_set
        import re
        remain_tokens = set([re.sub('_\d+$','', t) for t in remain_tokens])
        remain_tokens = set([re.sub('_CONT$','', t) for t in remain_tokens])
        remain_tokens = set(remain_tokens) - opcode_set
        print(remain_tokens)
        # print(sorted(p.rule2name.items()))