Python Token Examples

Programming Language: Python

Namespace/Package Name: uncompyle6.scanner

Class/Type: Token

Examples at hotexamples.com: 10

Python Token - 10 examples found. These are the top rated real world Python examples of uncompyle6.scanner.Token extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Token(10)

Frequently Used Methods

Token (10)

Example #1

Show file

 def n_return_stmt(self, node):
     start = len(self.f.getvalue()) + len(self.indent)
     if self.__params['isLambda']:
         self.preorder(node[0])
         if hasattr(node[-1], 'offset'):
             self.set_pos_info(node[-1], start, len(self.f.getvalue()))
         self.prune()
     else:
         start = len(self.f.getvalue()) + len(self.indent)
         self.write(self.indent, 'return')
         if self.return_none or node != AST(
                 'return_stmt',
             [AST('ret_expr', [NONE]),
              Token('RETURN_VALUE')]):
             self.write(' ')
             self.last_finish = len(self.f.getvalue())
             self.preorder(node[0])
             if hasattr(node[-1], 'offset'):
                 self.set_pos_info(node[-1], start, len(self.f.getvalue()))
                 pass
             pass
         else:
             for n in node:
                 self.set_pos_info(n, start, len(self.f.getvalue()))
                 pass
             pass
         self.set_pos_info(node, start, len(self.f.getvalue()))
         self.print_()
         self.prune()  # stop recursing

Example #2

Show file

File: fragments.py Project: Prashant-Jonny/python-uncompyle6

 def n_yield(self, node):
     start = len(self.f.getvalue())
     self.write('yield')
     if node != AST('yield', [NONE, Token('YIELD_VALUE')]):
         self.write(' ')
         node[0].parent = node
         self.preorder(node[0])
     self.set_pos_info(node, start, len(self.f.getvalue()))
     self.prune() # stop recursing

Example #3

Show file

File: fragments.py Project: Prashant-Jonny/python-uncompyle6

    def build_ast(self, tokens, customize, isLambda=0, noneInNames=False):
        # assert type(tokens) == ListType
        # assert isinstance(tokens[0], Token)

        if isLambda:
            tokens.append(Token('LAMBDA_MARKER'))
            try:
                ast = parser.parse(self.p, tokens, customize)
            except parser.ParserError as e:
                raise ParserError(e, tokens)
            if self.showast:
                print(repr(ast))
            return ast

        # The bytecode for the end of the main routine has a
        # "return None". However you can't issue a "return" statement in
        # main. In the other build_ast routine we eliminate the
        # return statement instructions before parsing.
        # But here we want to keep these instructions at the expense of
        # a fully runnable Python program because we
        # my be queried about the role of one of those instructuions

        if len(tokens) >= 2 and not noneInNames:
            if tokens[-1].type == 'RETURN_VALUE':
                if tokens[-2].type != 'LOAD_CONST':
                    tokens.append(Token('RETURN_LAST'))
        if len(tokens) == 0:
            return

        # Build AST from disassembly.
        try:
            ast = parser.parse(self.p, tokens, customize)
        except parser.ParserError as e:
            raise ParserError(e, tokens)

        if self.showast:
            print(repr(ast))

        return ast

Example #4

Show file

    def n_return_if_stmt(self, node):

        start = len(self.f.getvalue()) + len(self.indent)
        if self.__params['isLambda']:
            node[0].parent = node
            self.preorder(node[0])
        else:
            start = len(self.f.getvalue()) + len(self.indent)
            self.write(self.indent, 'return')
            if self.return_none or node != AST(
                    'return_stmt',
                [AST('ret_expr', [NONE]),
                 Token('RETURN_END_IF')]):
                self.write(' ')
                self.preorder(node[0])
                if hasattr(node[-1], 'offset'):
                    self.set_pos_info(node[-1], start, len(self.f.getvalue()))
            self.print_()
        self.set_pos_info(node, start, len(self.f.getvalue()))
        self.prune()  # stop recursing

Example #5

Show file

    def ingest(self, co, classname=None, code_objects={}, show_asm=None):
        """
        Pick out tokens from an uncompyle6 code object, and transform them,
        returning a list of uncompyle6 'Token's.

        The transformations are made to assist the deparsing grammar.
        Specificially:
           -  various types of LOAD_CONST's are categorized in terms of what they load
           -  COME_FROM instructions are added to assist parsing control structures
           -  MAKE_FUNCTION and FUNCTION_CALLS append the number of positional arguments

        Also, when we encounter certain tokens, we add them to a set which will cause custom
        grammar rules. Specifically, variable arg tokens like MAKE_FUNCTION or BUILD_LIST
        cause specific rules for the specific number of arguments they take.
        """

        show_asm = self.show_asm if not show_asm else show_asm
        # show_asm = 'after'
        if show_asm in ('both', 'before'):
            bytecode = Bytecode(co, self.opc)
            for instr in bytecode.get_instructions(co):
                print(instr._disassemble())

        # Container for tokens
        tokens = []

        customize = {}
        if self.is_pypy:
            customize['PyPy'] = 1

        self.code = array('B', co.co_code)
        self.build_lines_data(co)
        self.build_prev_op()

        bytecode = Bytecode(co, self.opc)

        # FIXME: put as its own method?
        # Scan for assertions. Later we will
        # turn 'LOAD_GLOBAL' to 'LOAD_ASSERT'.
        # 'LOAD_ASSERT' is used in assert statements.
        self.load_asserts = set()
        bs = list(bytecode)
        n = len(bs)
        for i in range(n):
            inst = bs[i]

            # We need to detect the difference between
            # "raise AssertionError" and "assert"
            # If we have a JUMP_FORWARD after the
            # RAISE_VARARGS then we have a "raise" statement
            # else we have an "assert" statement.
            if inst.opname == 'POP_JUMP_IF_TRUE' and i+1 < n:
                next_inst = bs[i+1]
                if (next_inst.opname == 'LOAD_GLOBAL' and
                    next_inst.argval == 'AssertionError'):
                    for j in range(i+2, n):
                        raise_inst = bs[j]
                        if raise_inst.opname.startswith('RAISE_VARARGS'):
                            if j+1 >= n or bs[j+1].opname != 'JUMP_FORWARD':
                                self.load_asserts.add(next_inst.offset)
                                pass
                            break
                    pass
                pass

        # Get jump targets
        # Format: {target offset: [jump offsets]}
        jump_targets = self.find_jump_targets(show_asm)

        for inst in bytecode:

            argval = inst.argval
            if inst.offset in jump_targets:
                jump_idx = 0
                # We want to process COME_FROMs to the same offset to be in *descending*
                # offset order so we have the larger range or biggest instruction interval
                # last. (I think they are sorted in increasing order, but for safety
                # we sort them). That way, specific COME_FROM tags will match up
                # properly. For example, a "loop" with an "if" nested in it should have the
                # "loop" tag last so the grammar rule matches that properly.
                for jump_offset in sorted(jump_targets[inst.offset], reverse=True):
                    come_from_name = 'COME_FROM'
                    opname = self.opName(jump_offset)
                    if opname.startswith('SETUP_'):
                        come_from_type = opname[len('SETUP_'):]
                        come_from_name = 'COME_FROM_%s' % come_from_type
                        pass
                    tokens.append(Token(come_from_name,
                                        None, repr(jump_offset),
                                        offset='%s_%s' % (inst.offset, jump_idx),
                                        has_arg = True, opc=self.opc))
                    jump_idx += 1
                    pass
                pass
            elif inst.offset in self.else_start:
                end_offset = self.else_start[inst.offset]
                tokens.append(Token('ELSE',
                                    None, repr(end_offset),
                                    offset='%s' % (inst.offset),
                                    has_arg = True, opc=self.opc))

                pass

            pattr =  inst.argrepr
            opname = inst.opname
            op = inst.opcode

            if opname in ['LOAD_CONST']:
                const = inst.argval
                if iscode(const):
                    if const.co_name == '<lambda>':
                        opname = 'LOAD_LAMBDA'
                    elif const.co_name == '<genexpr>':
                        opname = 'LOAD_GENEXPR'
                    elif const.co_name == '<dictcomp>':
                        opname = 'LOAD_DICTCOMP'
                    elif const.co_name == '<setcomp>':
                        opname = 'LOAD_SETCOMP'
                    elif const.co_name == '<listcomp>':
                        opname = 'LOAD_LISTCOMP'
                    # verify() uses 'pattr' for comparison, since 'attr'
                    # now holds Code(const) and thus can not be used
                    # for comparison (todo: think about changing this)
                    # pattr = 'code_object @ 0x%x %s->%s' %\
                    # (id(const), const.co_filename, const.co_name)
                    pattr = '<code_object ' + const.co_name + '>'
                else:
                    pattr = const
                    pass
            elif opname in ('MAKE_FUNCTION', 'MAKE_CLOSURE'):
                pos_args, name_pair_args, annotate_args = parse_fn_counts(inst.argval)
                if name_pair_args > 0:
                    opname = '%s_N%d' % (opname, name_pair_args)
                    pass
                if annotate_args > 0:
                    opname = '%s_A_%d' % (opname, annotate_args)
                    pass
                opname = '%s_%d' % (opname, pos_args)
                pattr = ("%d positional, %d keyword pair, %d annotated" %
                             (pos_args, name_pair_args, annotate_args))
                tokens.append(
                    Token(
                        type_ = opname,
                        attr = (pos_args, name_pair_args, annotate_args),
                        pattr = pattr,
                        offset = inst.offset,
                        linestart = inst.starts_line,
                        op = op,
                        has_arg = op_has_argument(op, op3),
                        opc = self.opc
                    )
                )
                continue
            elif op in self.varargs_ops:
                pos_args = inst.argval
                if self.is_pypy and not pos_args and opname == 'BUILD_MAP':
                    opname = 'BUILD_MAP_n'
                else:
                    opname = '%s_%d' % (opname, pos_args)
            elif self.is_pypy and opname in ('CALL_METHOD', 'JUMP_IF_NOT_DEBUG'):
                # The value in the dict is in special cases in semantic actions, such
                # as CALL_FUNCTION. The value is not used in these cases, so we put
                # in arbitrary value 0.
                customize[opname] = 0
            elif opname == 'UNPACK_EX':
                # FIXME: try with scanner and parser by
                # changing inst.argval
                before_args = inst.argval & 0xFF
                after_args = (inst.argval >> 8) & 0xff
                pattr = "%d before vararg, %d after" % (before_args, after_args)
                argval = (before_args, after_args)
                opname = '%s_%d+%d' % (opname, before_args, after_args)

            elif op == self.opc.JUMP_ABSOLUTE:
                # Further classify JUMP_ABSOLUTE into backward jumps
                # which are used in loops, and "CONTINUE" jumps which
                # may appear in a "continue" statement.  The loop-type
                # and continue-type jumps will help us classify loop
                # boundaries The continue-type jumps help us get
                # "continue" statements with would otherwise be turned
                # into a "pass" statement because JUMPs are sometimes
                # ignored in rules as just boundary overhead. In
                # comprehensions we might sometimes classify JUMP_BACK
                # as CONTINUE, but that's okay since we add a grammar
                # rule for that.
                pattr = inst.argval
                target = self.get_target(inst.offset)
                if target <= inst.offset:
                    next_opname = self.opname[self.code[inst.offset+3]]
                    if (inst.offset in self.stmts and
                        next_opname not in ('END_FINALLY', 'POP_BLOCK',
                                            # Python 3.0 only uses POP_TOP
                                            'POP_TOP')
                        and inst.offset not in self.not_continue):
                        opname = 'CONTINUE'
                    else:
                        opname = 'JUMP_BACK'
                        # FIXME: this is a hack to catch stuff like:
                        #   if x: continue
                        # the "continue" is not on a new line.
                        # There are other situations where we don't catch
                        # CONTINUE as well.
                        if tokens[-1].type == 'JUMP_BACK' and tokens[-1].attr <= argval:
                            # intern is used because we are changing the *previous* token
                            tokens[-1].type = intern('CONTINUE')

            elif op == self.opc.RETURN_VALUE:
                if inst.offset in self.return_end_ifs:
                    opname = 'RETURN_END_IF'
            elif inst.offset in self.load_asserts:
                opname = 'LOAD_ASSERT'

            tokens.append(
                Token(
                    type_ = opname,
                    attr = argval,
                    pattr = pattr,
                    offset = inst.offset,
                    linestart = inst.starts_line,
                    op = op,
                    has_arg = (op >= op3.HAVE_ARGUMENT),
                    opc = self.opc
                    )
                )
            pass

        if show_asm in ('both', 'after'):
            for t in tokens:
                print(t)
            print()
        return tokens, customize

Example #6

Show file

    def disassemble_generic(self, co, classname=None, code_objects={}):
        """
        Convert code object <co> into a sequence of tokens.

        The below is based on (an older version?) of Python dis.disassemble_bytes().
        """
        # Container for tokens
        tokens = []
        customize = {}
        self.code = code = array('B', co.co_code)
        codelen = len(code)
        self.build_lines_data(co)
        self.build_prev_op()
        self.code_objects = code_objects

        # self.lines contains (block,addrLastInstr)
        if classname:
            classname = '_' + classname.lstrip('_') + '__'

            def unmangle(name):
                if name.startswith(classname) and name[-2:] != '__':
                    return name[len(classname) - 2:]
                return name

            free = [ unmangle(name) for name in (co.co_cellvars + co.co_freevars) ]
            names = [ unmangle(name) for name in co.co_names ]
            varnames = [ unmangle(name) for name in co.co_varnames ]
        else:
            free = co.co_cellvars + co.co_freevars
            names = co.co_names
            varnames = co.co_varnames
            pass

        # Scan for assertions. Later we will
        # turn 'LOAD_GLOBAL' to 'LOAD_ASSERT' for those
        # assertions

        self.load_asserts = set()
        for i in self.op_range(0, codelen):
            if self.code[i] == POP_JUMP_IF_TRUE and self.code[i+3] == LOAD_GLOBAL:
                if names[self.get_argument(i+3)] == 'AssertionError':
                    self.load_asserts.add(i+3)

        # Get jump targets
        # Format: {target offset: [jump offsets]}
        jump_targets = self.find_jump_targets()

        # contains (code, [addrRefToCode])
        last_stmt = self.next_stmt[0]
        i = self.next_stmt[last_stmt]
        replace = {}

        imports = self.all_instr(0, codelen, (IMPORT_NAME, IMPORT_FROM, IMPORT_STAR))
        if len(imports) > 1:
            last_import = imports[0]
            for i in imports[1:]:
                if self.lines[last_import].next > i:
                    if self.code[last_import] == IMPORT_NAME == self.code[i]:
                        replace[i] = 'IMPORT_NAME_CONT'
                last_import = i

        # Initialize extended arg at 0. When extended arg op is encountered,
        # variable preserved for next cycle and added as arg for next op
        extended_arg = 0

        for offset in self.op_range(0, codelen):
            # Add jump target tokens
            if offset in jump_targets:
                jump_idx = 0
                for jump_offset in jump_targets[offset]:
                    tokens.append(Token('COME_FROM', None, repr(jump_offset),
                                        offset='%s_%s' % (offset, jump_idx)))
                    jump_idx += 1
                    pass
                pass

            op = code[offset]
            op_name = op3.opname[op]

            oparg = None; pattr = None

            if op >= op3.HAVE_ARGUMENT:
                oparg = self.get_argument(offset) + extended_arg
                extended_arg = 0
                if op == op3.EXTENDED_ARG:
                    extended_arg = oparg * scan.L65536
                    continue
                if op in op3.hasconst:
                    const = co.co_consts[oparg]
                    if not PYTHON3 and isinstance(const, str):
                        if const in code_objects:
                            const = code_objects[const]
                    if iscode(const):
                        oparg = const
                        if const.co_name == '<lambda>':
                            assert op_name == 'LOAD_CONST'
                            op_name = 'LOAD_LAMBDA'
                        elif const.co_name == '<genexpr>':
                            op_name = 'LOAD_GENEXPR'
                        elif const.co_name == '<dictcomp>':
                            op_name = 'LOAD_DICTCOMP'
                        elif const.co_name == '<setcomp>':
                            op_name = 'LOAD_SETCOMP'
                        elif const.co_name == '<listcomp>':
                            op_name = 'LOAD_LISTCOMP'
                        # verify() uses 'pattr' for comparison, since 'attr'
                        # now holds Code(const) and thus can not be used
                        # for comparison (todo: think about changing this)
                        # pattr = 'code_object @ 0x%x %s->%s' %\
                        # (id(const), const.co_filename, const.co_name)
                        pattr = '<code_object ' + const.co_name + '>'
                    else:
                        pattr = const
                elif op in op3.hasname:
                    pattr = names[oparg]
                elif op in op3.hasjrel:
                    pattr = repr(offset + 3 + oparg)
                elif op in op3.hasjabs:
                    pattr = repr(oparg)
                elif op in op3.haslocal:
                    pattr = varnames[oparg]
                elif op in op3.hascompare:
                    pattr = op3.cmp_op[oparg]
                elif op in op3.hasfree:
                    pattr = free[oparg]

            if op_name in ('BUILD_LIST', 'BUILD_TUPLE', 'BUILD_SET', 'BUILD_SLICE',
                            'UNPACK_SEQUENCE',
                            'MAKE_FUNCTION', 'CALL_FUNCTION', 'MAKE_CLOSURE',
                            'CALL_FUNCTION_VAR', 'CALL_FUNCTION_KW',
                            'CALL_FUNCTION_VAR_KW', 'RAISE_VARARGS'
                            ):
                # CALL_FUNCTION OP renaming is done as a custom rule in parse3
                if op_name not in ('CALL_FUNCTION', 'CALL_FUNCTION_VAR',
                                   'CALL_FUNCTION_VAR_KW', 'CALL_FUNCTION_KW',
                                   ):
                    op_name = '%s_%d' % (op_name, oparg)
                    if op_name != 'BUILD_SLICE':
                        customize[op_name] = oparg
            elif op_name == 'JUMP_ABSOLUTE':
                target = self.get_target(offset)
                if target < offset:
                    if (offset in self.stmts
                        and self.code[offset+3] not in (END_FINALLY, POP_BLOCK)
                        and offset not in self.not_continue):
                        op_name = 'CONTINUE'
                    else:
                        op_name = 'JUMP_BACK'

            elif op_name == 'LOAD_GLOBAL':
                if offset in self.load_asserts:
                    op_name = 'LOAD_ASSERT'
            elif op_name == 'RETURN_VALUE':
                if offset in self.return_end_ifs:
                    op_name = 'RETURN_END_IF'

            if offset in self.linestarts:
                linestart = self.linestarts[offset]
            else:
                linestart = None

            if offset not in replace:
                tokens.append(Token(op_name, oparg, pattr, offset, linestart))
            else:
                tokens.append(Token(replace[offset], oparg, pattr, offset, linestart))
            pass
        return tokens, customize

Example #7

Show file

    def disassemble_built_in(self, co, classname=None, code_objects={}):
        # Container for tokens
        tokens = []
        customize = {}
        self.code = array('B', co.co_code)
        self.build_lines_data(co)
        self.build_prev_op()

        # Get jump targets
        # Format: {target offset: [jump offsets]}
        jump_targets = self.find_jump_targets()
        bytecode = dis.Bytecode(co)

        # self.lines contains (block,addrLastInstr)
        if classname:
            classname = '_' + classname.lstrip('_') + '__'

            def unmangle(name):
                if name.startswith(classname) and name[-2:] != '__':
                    return name[len(classname) - 2:]
                return name

            # free = [ unmangle(name) for name in (co.co_cellvars + co.co_freevars) ]
            # names = [ unmangle(name) for name in co.co_names ]
            # varnames = [ unmangle(name) for name in co.co_varnames ]
        else:
            # free = co.co_cellvars + co.co_freevars
            # names = co.co_names
            # varnames = co.co_varnames
            pass

        # Scan for assertions. Later we will
        # turn 'LOAD_GLOBAL' to 'LOAD_ASSERT' for those
        # assertions
        self.load_asserts = set()
        bs = list(bytecode)
        n = len(bs)
        for i in range(n):
            inst = bs[i]
            if inst.opname == 'POP_JUMP_IF_TRUE' and i + 1 < n:
                next_inst = bs[i + 1]
                if (next_inst.opname == 'LOAD_GLOBAL'
                        and next_inst.argval == 'AssertionError'):
                    self.load_asserts.add(next_inst.offset)

        for inst in bytecode:
            if inst.offset in jump_targets:
                jump_idx = 0
                for jump_offset in jump_targets[inst.offset]:
                    tokens.append(
                        Token('COME_FROM',
                              None,
                              repr(jump_offset),
                              offset='%s_%s' % (inst.offset, jump_idx)))
                    jump_idx += 1
                    pass
                pass

            pattr = inst.argrepr
            opname = inst.opname

            if opname in ['LOAD_CONST']:
                const = inst.argval
                if iscode(const):
                    if const.co_name == '<lambda>':
                        opname = 'LOAD_LAMBDA'
                    elif const.co_name == '<genexpr>':
                        opname = 'LOAD_GENEXPR'
                    elif const.co_name == '<dictcomp>':
                        opname = 'LOAD_DICTCOMP'
                    elif const.co_name == '<setcomp>':
                        opname = 'LOAD_SETCOMP'
                    elif const.co_name == '<listcomp>':
                        opname = 'LOAD_LISTCOMP'
                    # verify() uses 'pattr' for comparison, since 'attr'
                    # now holds Code(const) and thus can not be used
                    # for comparison (todo: think about changing this)
                    # pattr = 'code_object @ 0x%x %s->%s' %\
                    # (id(const), const.co_filename, const.co_name)
                    pattr = '<code_object ' + const.co_name + '>'
                else:
                    pattr = const
                    pass
            elif opname in ('BUILD_LIST', 'BUILD_TUPLE', 'BUILD_SET',
                            'BUILD_SLICE', 'UNPACK_SEQUENCE', 'MAKE_FUNCTION',
                            'MAKE_CLOSURE', 'DUP_TOPX', 'RAISE_VARARGS'):
                # if opname == 'BUILD_TUPLE' and \
                #     self.code[self.prev[offset]] == LOAD_CLOSURE:
                #     continue
                # else:
                #     op_name = '%s_%d' % (op_name, oparg)
                #     if opname != BUILD_SLICE:
                #         customize[op_name] = oparg
                opname = '%s_%d' % (opname, inst.argval)
                if inst.opname != 'BUILD_SLICE':
                    customize[opname] = inst.argval

            elif opname == 'JUMP_ABSOLUTE':
                pattr = inst.argval
                target = self.get_target(inst.offset)
                if target < inst.offset:
                    if (inst.offset in self.stmts
                            and self.code[inst.offset + 3]
                            not in (END_FINALLY, POP_BLOCK)
                            and offset not in self.not_continue):
                        opname = 'CONTINUE'
                    else:
                        opname = 'JUMP_BACK'

            elif inst.offset in self.load_asserts:
                opname = 'LOAD_ASSERT'

            tokens.append(
                Token(
                    type_=opname,
                    attr=inst.argval,
                    pattr=pattr,
                    offset=inst.offset,
                    linestart=inst.starts_line,
                ))
            pass
        return tokens, {}

Example #8

Show file

    def ingest(self, co, classname=None, code_objects={}, show_asm=None):
        """
        Pick out tokens from an uncompyle6 code object, and transform them,
        returning a list of uncompyle6 Token's.

        The transformations are made to assist the deparsing grammar.
        Specificially:
           -  various types of LOAD_CONST's are categorized in terms of what they load
           -  COME_FROM instructions are added to assist parsing control structures
           -  MAKE_FUNCTION and FUNCTION_CALLS append the number of positional arguments
           -  some EXTENDED_ARGS instructions are removed

        Also, when we encounter certain tokens, we add them to a set which will cause custom
        grammar rules. Specifically, variable arg tokens like MAKE_FUNCTION or BUILD_LIST
        cause specific rules for the specific number of arguments they take.
        """

        if not show_asm:
            show_asm = self.show_asm

        bytecode = self.build_instructions(co)

        # show_asm = 'both'
        if show_asm in ('both', 'before'):
            for instr in bytecode.get_instructions(co):
                print(instr.disassemble())

        # list of tokens/instructions
        tokens = []

        # "customize" is in the process of going away here
        customize = {}

        if self.is_pypy:
            customize['PyPy'] = 0

        # Scan for assertions. Later we will
        # turn 'LOAD_GLOBAL' to 'LOAD_ASSERT'.
        # 'LOAD_ASSERT' is used in assert statements.
        self.load_asserts = set()

        n = len(self.insts)
        for i, inst in enumerate(self.insts):

            # We need to detect the difference between:
            #   raise AssertionError
            #  and
            #   assert ...
            # If we have a JUMP_FORWARD after the
            # RAISE_VARARGS then we have a "raise" statement
            # else we have an "assert" statement.
            if self.version == 3.0:
                # There is a an implied JUMP_IF_TRUE that we are not testing for (yet?) here
                assert_can_follow = inst.opname == 'POP_TOP' and i+1 < n
            else:
                assert_can_follow = inst.opname == 'POP_JUMP_IF_TRUE' and i+1 < n
            if assert_can_follow:
                next_inst = self.insts[i+1]
                if (next_inst.opname == 'LOAD_GLOBAL' and
                    next_inst.argval == 'AssertionError'):
                    if (i + 2 < n and self.insts[i+2].opname.startswith('RAISE_VARARGS')):
                        self.load_asserts.add(next_inst.offset)
                    pass
                pass

        # Get jump targets
        # Format: {target offset: [jump offsets]}
        jump_targets = self.find_jump_targets(show_asm)
        # print("XXX2", jump_targets)

        last_op_was_break = False

        for i, inst in enumerate(self.insts):

            argval = inst.argval
            op     = inst.opcode

            if inst.opname == 'EXTENDED_ARG':
                # FIXME: The EXTENDED_ARG is used to signal annotation
                # parameters
                if (i+1 < n and
                    self.insts[i+1].opcode != self.opc.MAKE_FUNCTION):
                    continue

            if inst.offset in jump_targets:
                jump_idx = 0
                # We want to process COME_FROMs to the same offset to be in *descending*
                # offset order so we have the larger range or biggest instruction interval
                # last. (I think they are sorted in increasing order, but for safety
                # we sort them). That way, specific COME_FROM tags will match up
                # properly. For example, a "loop" with an "if" nested in it should have the
                # "loop" tag last so the grammar rule matches that properly.
                for jump_offset in sorted(jump_targets[inst.offset], reverse=True):
                    come_from_name = 'COME_FROM'
                    opname = self.opname_for_offset(jump_offset)
                    if opname == 'EXTENDED_ARG':
                        j = xdis.next_offset(op, self.opc, jump_offset)
                        opname = self.opname_for_offset(j)

                    if opname.startswith('SETUP_'):
                        come_from_type = opname[len('SETUP_'):]
                        come_from_name = 'COME_FROM_%s' % come_from_type
                        pass
                    elif inst.offset in self.except_targets:
                        come_from_name = 'COME_FROM_EXCEPT_CLAUSE'
                    tokens.append(Token(come_from_name,
                                        jump_offset, repr(jump_offset),
                                        offset='%s_%s' % (inst.offset, jump_idx),
                                        has_arg = True, opc=self.opc))
                    jump_idx += 1
                    pass
                pass
            elif inst.offset in self.else_start:
                end_offset = self.else_start[inst.offset]
                tokens.append(Token('ELSE',
                                    None, repr(end_offset),
                                    offset='%s' % (inst.offset),
                                    has_arg = True, opc=self.opc))

                pass

            pattr  = inst.argrepr
            opname = inst.opname

            if op in self.opc.CONST_OPS:
                const = argval
                if iscode(const):
                    if const.co_name == '<lambda>':
                        assert opname == 'LOAD_CONST'
                        opname = 'LOAD_LAMBDA'
                    elif const.co_name == '<genexpr>':
                        opname = 'LOAD_GENEXPR'
                    elif const.co_name == '<dictcomp>':
                        opname = 'LOAD_DICTCOMP'
                    elif const.co_name == '<setcomp>':
                        opname = 'LOAD_SETCOMP'
                    elif const.co_name == '<listcomp>':
                        opname = 'LOAD_LISTCOMP'
                    # verify() uses 'pattr' for comparison, since 'attr'
                    # now holds Code(const) and thus can not be used
                    # for comparison (todo: think about changing this)
                    # pattr = 'code_object @ 0x%x %s->%s' %\
                    # (id(const), const.co_filename, const.co_name)
                    pattr = '<code_object ' + const.co_name + '>'
                else:
                    if isinstance(inst.arg, int) and inst.arg < len(co.co_consts):
                        argval, _ = _get_const_info(inst.arg, co.co_consts)
                    # Why don't we use _ above for "pattr" rather than "const"?
                    # This *is* a little hoaky, but we have to coordinate with
                    # other parts like n_LOAD_CONST in pysource.py for example.
                    pattr = const
                    pass
            elif opname in ('MAKE_FUNCTION', 'MAKE_CLOSURE'):
                if self.version >= 3.6:
                    # 3.6+ doesn't have MAKE_CLOSURE, so opname == 'MAKE_FUNCTION'
                    flags = argval
                    opname = 'MAKE_FUNCTION_%d' % (flags)
                    attr = []
                    for flag in self.MAKE_FUNCTION_FLAGS:
                        bit = flags & 1
                        attr.append(bit)
                        flags >>= 1
                    attr = attr[:4] # remove last value: attr[5] == False
                else:
                    pos_args, name_pair_args, annotate_args = parse_fn_counts(inst.argval)
                    pattr = ("%d positional, %d keyword pair, %d annotated" %
                                 (pos_args, name_pair_args, annotate_args))
                    if name_pair_args > 0:
                        opname = '%s_N%d' % (opname, name_pair_args)
                        pass
                    if annotate_args > 0:
                        opname = '%s_A_%d' % (opname, annotate_args)
                        pass
                    opname = '%s_%d' % (opname, pos_args)
                    attr = (pos_args, name_pair_args, annotate_args)
                tokens.append(
                    Token(
                        opname = opname,
                        attr = attr,
                        pattr = pattr,
                        offset = inst.offset,
                        linestart = inst.starts_line,
                        op = op,
                        has_arg = inst.has_arg,
                        opc = self.opc
                    )
                )
                continue
            elif op in self.varargs_ops:
                pos_args = argval
                if self.is_pypy and not pos_args and opname == 'BUILD_MAP':
                    opname = 'BUILD_MAP_n'
                else:
                    opname = '%s_%d' % (opname, pos_args)

            elif self.is_pypy and opname == 'JUMP_IF_NOT_DEBUG':
                # The value in the dict is in special cases in semantic actions, such
                # as JUMP_IF_NOT_DEBUG. The value is not used in these cases, so we put
                # in arbitrary value 0.
                customize[opname] = 0
            elif opname == 'UNPACK_EX':
                # FIXME: try with scanner and parser by
                # changing argval
                before_args = argval & 0xFF
                after_args = (argval >> 8) & 0xff
                pattr = "%d before vararg, %d after" % (before_args, after_args)
                argval = (before_args, after_args)
                opname = '%s_%d+%d' % (opname, before_args, after_args)

            elif op == self.opc.JUMP_ABSOLUTE:
                # Further classify JUMP_ABSOLUTE into backward jumps
                # which are used in loops, and "CONTINUE" jumps which
                # may appear in a "continue" statement.  The loop-type
                # and continue-type jumps will help us classify loop
                # boundaries The continue-type jumps help us get
                # "continue" statements with would otherwise be turned
                # into a "pass" statement because JUMPs are sometimes
                # ignored in rules as just boundary overhead. In
                # comprehensions we might sometimes classify JUMP_BACK
                # as CONTINUE, but that's okay since we add a grammar
                # rule for that.
                pattr = argval
                target = self.get_target(inst.offset)
                if target <= inst.offset:
                    next_opname = self.insts[i+1].opname

                    # 'Continue's include jumps to loops that are not
                    # and the end of a block which follow with POP_BLOCK and COME_FROM_LOOP.
                    # If the JUMP_ABSOLUTE is to a FOR_ITER and it is followed by another JUMP_FORWARD
                    # then we'll take it as a "continue".
                    is_continue = (self.insts[self.offset2inst_index[target]]
                                  .opname == 'FOR_ITER'
                                  and self.insts[i+1].opname == 'JUMP_FORWARD')

                    if (is_continue or
                        (inst.offset in self.stmts and (inst.starts_line and
                        next_opname not in self.not_continue_follow))):
                        opname = 'CONTINUE'
                    else:
                        opname = 'JUMP_BACK'
                        # FIXME: this is a hack to catch stuff like:
                        #   if x: continue
                        # the "continue" is not on a new line.
                        # There are other situations where we don't catch
                        # CONTINUE as well.
                        if tokens[-1].kind == 'JUMP_BACK' and tokens[-1].attr <= argval:
                            if tokens[-2].kind == 'BREAK_LOOP':
                                del tokens[-1]
                            else:
                                # intern is used because we are changing the *previous* token
                                tokens[-1].kind = intern('CONTINUE')
                    if last_op_was_break and opname == 'CONTINUE':
                        last_op_was_break = False
                        continue

            # FIXME: go over for Python 3.6+. This is sometimes wrong
            elif op == self.opc.RETURN_VALUE:
                if inst.offset in self.return_end_ifs:
                    opname = 'RETURN_END_IF'

            elif inst.offset in self.load_asserts:
                opname = 'LOAD_ASSERT'

            last_op_was_break = opname == 'BREAK_LOOP'
            tokens.append(
                Token(
                    opname = opname,
                    attr = argval,
                    pattr = pattr,
                    offset = inst.offset,
                    linestart = inst.starts_line,
                    op = op,
                    has_arg = inst.has_arg,
                    opc = self.opc
                    )
                )
            pass

        if show_asm in ('both', 'after'):
            for t in tokens:
                print(t.format(line_prefix='L.'))
            print()
        return tokens, customize

Example #9

Show file

    def ingest(self, co, classname=None, code_objects={}, show_asm=None):
        """
        Pick out tokens from an uncompyle6 code object, and transform them,
        returning a list of uncompyle6 'Token's.

        The transformations are made to assist the deparsing grammar.
        Specificially:
           -  various types of LOAD_CONST's are categorized in terms of what they load
           -  COME_FROM instructions are added to assist parsing control structures
           -  MAKE_FUNCTION and FUNCTION_CALLS append the number of positional arguments

        Also, when we encounter certain tokens, we add them to a set which will cause custom
        grammar rules. Specifically, variable arg tokens like MAKE_FUNCTION or BUILD_LIST
        cause specific rules for the specific number of arguments they take.
        """

        if not show_asm:
            show_asm = self.show_asm

        bytecode = self.build_instructions(co)

        # show_asm = 'after'
        if show_asm in ('both', 'before'):
            for instr in bytecode.get_instructions(co):
                print(instr.disassemble())

        # Container for tokens
        tokens = []

        customize = {}
        if self.is_pypy:
            customize['PyPy'] = 1

        codelen = len(self.code)

        free, names, varnames = self.unmangle_code_names(co, classname)
        self.names = names

        # Scan for assertions. Later we will
        # turn 'LOAD_GLOBAL' to 'LOAD_ASSERT'.
        # 'LOAD_ASSERT' is used in assert statements.
        self.load_asserts = set()
        for i in self.op_range(0, codelen):
            # We need to detect the difference between:
            #   raise AssertionError
            #  and
            #   assert ...
            if (self.code[i] == self.opc.JUMP_IF_TRUE and
                i + 4 < codelen and
                self.code[i+3] == self.opc.POP_TOP and
                self.code[i+4] == self.opc.LOAD_GLOBAL):
                if names[self.get_argument(i+4)] == 'AssertionError':
                    self.load_asserts.add(i+4)

        jump_targets = self.find_jump_targets(show_asm)
        # contains (code, [addrRefToCode])

        last_stmt = self.next_stmt[0]
        i = self.next_stmt[last_stmt]
        replace = {}
        while i < codelen - 1:
            if self.lines[last_stmt].next > i:
                # Distinguish "print ..." from "print ...,"
                if self.code[last_stmt] == self.opc.PRINT_ITEM:
                    if self.code[i] == self.opc.PRINT_ITEM:
                        replace[i] = 'PRINT_ITEM_CONT'
                    elif self.code[i] == self.opc.PRINT_NEWLINE:
                        replace[i] = 'PRINT_NEWLINE_CONT'
            last_stmt = i
            i = self.next_stmt[i]

        extended_arg = 0
        for offset in self.op_range(0, codelen):
            op = self.code[offset]
            op_name = self.opname[op]
            oparg = None; pattr = None

            if offset in jump_targets:
                jump_idx = 0
                # We want to process COME_FROMs to the same offset to be in *descending*
                # offset order so we have the larger range or biggest instruction interval
                # last. (I think they are sorted in increasing order, but for safety
                # we sort them). That way, specific COME_FROM tags will match up
                # properly. For example, a "loop" with an "if" nested in it should have the
                # "loop" tag last so the grammar rule matches that properly.
                last_jump_offset = -1
                for jump_offset  in sorted(jump_targets[offset], reverse=True):
                    if jump_offset != last_jump_offset:
                        tokens.append(Token(
                            'COME_FROM', jump_offset, repr(jump_offset),
                            offset="%s_%d" % (offset, jump_idx),
                            has_arg = True))
                        jump_idx += 1
                        last_jump_offset = jump_offset
            elif offset in self.thens:
                tokens.append(Token(
                    'THEN', None, self.thens[offset],
                    offset="%s_0" % offset,
                    has_arg = True))

            has_arg = (op >= self.opc.HAVE_ARGUMENT)
            if has_arg:
                oparg = self.get_argument(offset) + extended_arg
                extended_arg = 0
                if op == self.opc.EXTENDED_ARG:
                    extended_arg = oparg * L65536
                    continue
                if op in self.opc.CONST_OPS:
                    const = co.co_consts[oparg]
                    # We can't use inspect.iscode() because we may be
                    # using a different version of Python than the
                    # one that this was byte-compiled on. So the code
                    # types may mismatch.
                    if hasattr(const, 'co_name'):
                        oparg = const
                        if const.co_name == '<lambda>':
                            assert op_name == 'LOAD_CONST'
                            op_name = 'LOAD_LAMBDA'
                        elif const.co_name == self.genexpr_name:
                            op_name = 'LOAD_GENEXPR'
                        elif const.co_name == '<dictcomp>':
                            op_name = 'LOAD_DICTCOMP'
                        elif const.co_name == '<setcomp>':
                            op_name = 'LOAD_SETCOMP'
                        else:
                            op_name = "LOAD_CODE"
                        # verify uses 'pattr' for comparison, since 'attr'
                        # now holds Code(const) and thus can not be used
                        # for comparison (todo: think about changing this)
                        # pattr = 'code_object @ 0x%x %s->%s' % \
                        # (id(const), const.co_filename, const.co_name)
                        pattr = '<code_object ' + const.co_name + '>'
                    else:
                        if oparg < len(co.co_consts):
                            argval, _ = _get_const_info(oparg, co.co_consts)
                        # Why don't we use _ above for "pattr" rather than "const"?
                        # This *is* a little hoaky, but we have to coordinate with
                        # other parts like n_LOAD_CONST in pysource.py for example.
                        pattr = const
                        pass
                elif op in self.opc.NAME_OPS:
                    pattr = names[oparg]
                elif op in self.opc.JREL_OPS:
                    pattr = repr(offset + 3 + oparg)
                    if op == self.opc.JUMP_FORWARD:
                        target = self.get_target(offset)
                        # FIXME: this is a hack to catch stuff like:
                        #   if x: continue
                        # the "continue" is not on a new line.
                        if len(tokens) and tokens[-1].kind == 'JUMP_BACK':
                            tokens[-1].kind = intern('CONTINUE')

                elif op in self.opc.JABS_OPS:
                    pattr = repr(oparg)
                elif op in self.opc.LOCAL_OPS:
                    pattr = varnames[oparg]
                elif op in self.opc.COMPARE_OPS:
                    pattr = self.opc.cmp_op[oparg]
                elif op in self.opc.FREE_OPS:
                    pattr = free[oparg]
            if op in self.varargs_ops:
                # CE - Hack for >= 2.5
                #      Now all values loaded via LOAD_CLOSURE are packed into
                #      a tuple before calling MAKE_CLOSURE.
                if (self.version >= 2.5 and op == self.opc.BUILD_TUPLE and
                    self.code[self.prev[offset]] == self.opc.LOAD_CLOSURE):
                    continue
                else:
                    op_name = '%s_%d' % (op_name, oparg)
                    customize[op_name] = oparg
            elif self.version > 2.0 and op == self.opc.CONTINUE_LOOP:
                customize[op_name] = 0
            elif op_name in """
                 CONTINUE_LOOP EXEC_STMT LOAD_LISTCOMP LOAD_SETCOMP
                  """.split():
                customize[op_name] = 0
            elif op == self.opc.JUMP_ABSOLUTE:
                # Further classify JUMP_ABSOLUTE into backward jumps
                # which are used in loops, and "CONTINUE" jumps which
                # may appear in a "continue" statement.  The loop-type
                # and continue-type jumps will help us classify loop
                # boundaries The continue-type jumps help us get
                # "continue" statements with would otherwise be turned
                # into a "pass" statement because JUMPs are sometimes
                # ignored in rules as just boundary overhead.  In
                # comprehensions we might sometimes classify JUMP_BACK
                # as CONTINUE, but that's okay since we add a grammar
                # rule for that.
                target = self.get_target(offset)
                if target <= offset:
                    op_name = 'JUMP_BACK'
                    if (offset in self.stmts
                        and self.code[offset+3] not in (self.opc.END_FINALLY,
                                                          self.opc.POP_BLOCK)):
                        if ((offset in self.linestarts and
                            tokens[-1].kind == 'JUMP_BACK')
                            or offset not in self.not_continue):
                            op_name = 'CONTINUE'
                    else:
                        # FIXME: this is a hack to catch stuff like:
                        #   if x: continue
                        # the "continue" is not on a new line.
                        if tokens[-1].kind == 'JUMP_BACK':
                            # We need 'intern' since we have
                            # already have processed the previous
                            # token.
                            tokens[-1].kind = intern('CONTINUE')

            elif op == self.opc.LOAD_GLOBAL:
                if offset in self.load_asserts:
                    op_name = 'LOAD_ASSERT'
            elif op == self.opc.RETURN_VALUE:
                if offset in self.return_end_ifs:
                    op_name = 'RETURN_END_IF'

            linestart = self.linestarts.get(offset, None)

            if offset not in replace:
                tokens.append(Token(
                    op_name, oparg, pattr, offset, linestart, op,
                    has_arg, self.opc))
            else:
                tokens.append(Token(
                    replace[offset], oparg, pattr, offset, linestart, op,
                    has_arg, self.opc))
                pass
            pass

        if show_asm in ('both', 'after'):
            for t in tokens:
                print(t.format(line_prefix='L.'))
            print()
        return tokens, customize

Example #10

Show file

File: scanner37base.py Project: nevinhappy/python-uncompyle6

    def ingest(self, co, classname=None, code_objects={}, show_asm=None):
        """
        Pick out tokens from an uncompyle6 code object, and transform them,
        returning a list of uncompyle6 Token's.

        The transformations are made to assist the deparsing grammar.
        Specificially:
           -  various types of LOAD_CONST's are categorized in terms of what they load
           -  COME_FROM instructions are added to assist parsing control structures
           -  MAKE_FUNCTION and FUNCTION_CALLS append the number of positional arguments
           -  some EXTENDED_ARGS instructions are removed

        Also, when we encounter certain tokens, we add them to a set which will cause custom
        grammar rules. Specifically, variable arg tokens like MAKE_FUNCTION or BUILD_LIST
        cause specific rules for the specific number of arguments they take.
        """
        def tokens_append(j, token):
            tokens.append(token)
            self.offset2tok_index[token.offset] = j
            j += 1
            assert j == len(tokens)
            return j

        if not show_asm:
            show_asm = self.show_asm

        bytecode = self.build_instructions(co)

        # show_asm = 'both'
        if show_asm in ("both", "before"):
            for instr in bytecode.get_instructions(co):
                print(instr.disassemble())

        # "customize" is in the process of going away here
        customize = {}

        if self.is_pypy:
            customize["PyPy"] = 0

        # Scan for assertions. Later we will
        # turn 'LOAD_GLOBAL' to 'LOAD_ASSERT'.
        # 'LOAD_ASSERT' is used in assert statements.
        self.load_asserts = set()

        # list of tokens/instructions
        tokens = []
        self.offset2tok_index = {}

        n = len(self.insts)
        for i, inst in enumerate(self.insts):

            # We need to detect the difference between:
            #   raise AssertionError
            #  and
            #   assert ...
            # If we have a JUMP_FORWARD after the
            # RAISE_VARARGS then we have a "raise" statement
            # else we have an "assert" statement.
            assert_can_follow = inst.opname == "POP_JUMP_IF_TRUE" and i + 1 < n
            if assert_can_follow:
                next_inst = self.insts[i + 1]
                if (next_inst.opname == "LOAD_GLOBAL"
                        and next_inst.argval == "AssertionError"
                        and inst.argval):
                    raise_idx = self.offset2inst_index[self.prev_op[
                        inst.argval]]
                    raise_inst = self.insts[raise_idx]
                    if raise_inst.opname.startswith("RAISE_VARARGS"):
                        self.load_asserts.add(next_inst.offset)
                    pass
                pass

        # Get jump targets
        # Format: {target offset: [jump offsets]}
        jump_targets = self.find_jump_targets(show_asm)
        # print("XXX2", jump_targets)

        last_op_was_break = False

        j = 0
        for i, inst in enumerate(self.insts):

            argval = inst.argval
            op = inst.opcode

            if inst.opname == "EXTENDED_ARG":
                # FIXME: The EXTENDED_ARG is used to signal annotation
                # parameters
                if i + 1 < n and self.insts[
                        i + 1].opcode != self.opc.MAKE_FUNCTION:
                    continue

            if inst.offset in jump_targets:
                jump_idx = 0
                # We want to process COME_FROMs to the same offset to be in *descending*
                # offset order so we have the larger range or biggest instruction interval
                # last. (I think they are sorted in increasing order, but for safety
                # we sort them). That way, specific COME_FROM tags will match up
                # properly. For example, a "loop" with an "if" nested in it should have the
                # "loop" tag last so the grammar rule matches that properly.
                for jump_offset in sorted(jump_targets[inst.offset],
                                          reverse=True):
                    come_from_name = "COME_FROM"
                    opname = self.opname_for_offset(jump_offset)
                    if opname == "EXTENDED_ARG":
                        k = xdis.next_offset(op, self.opc, jump_offset)
                        opname = self.opname_for_offset(k)

                    if opname.startswith("SETUP_"):
                        come_from_type = opname[len("SETUP_"):]
                        come_from_name = "COME_FROM_%s" % come_from_type
                        pass
                    elif inst.offset in self.except_targets:
                        come_from_name = "COME_FROM_EXCEPT_CLAUSE"
                    j = tokens_append(
                        j,
                        Token(
                            come_from_name,
                            jump_offset,
                            repr(jump_offset),
                            offset="%s_%s" % (inst.offset, jump_idx),
                            has_arg=True,
                            opc=self.opc,
                        ),
                    )
                    jump_idx += 1
                    pass
                pass
            elif inst.offset in self.else_start:
                end_offset = self.else_start[inst.offset]
                j = tokens_append(
                    j,
                    Token(
                        "ELSE",
                        None,
                        repr(end_offset),
                        offset="%s" % (inst.offset),
                        has_arg=True,
                        opc=self.opc,
                    ),
                )

                pass

            pattr = inst.argrepr
            opname = inst.opname

            if op in self.opc.CONST_OPS:
                const = argval
                if iscode(const):
                    if const.co_name == "<lambda>":
                        assert opname == "LOAD_CONST"
                        opname = "LOAD_LAMBDA"
                    elif const.co_name == "<genexpr>":
                        opname = "LOAD_GENEXPR"
                    elif const.co_name == "<dictcomp>":
                        opname = "LOAD_DICTCOMP"
                    elif const.co_name == "<setcomp>":
                        opname = "LOAD_SETCOMP"
                    elif const.co_name == "<listcomp>":
                        opname = "LOAD_LISTCOMP"
                    else:
                        opname = "LOAD_CODE"
                    # verify() uses 'pattr' for comparison, since 'attr'
                    # now holds Code(const) and thus can not be used
                    # for comparison (todo: think about changing this)
                    # pattr = 'code_object @ 0x%x %s->%s' %\
                    # (id(const), const.co_filename, const.co_name)
                    pattr = "<code_object " + const.co_name + ">"
                elif isinstance(const, str):
                    opname = "LOAD_STR"
                else:
                    if isinstance(inst.arg,
                                  int) and inst.arg < len(co.co_consts):
                        argval, _ = _get_const_info(inst.arg, co.co_consts)
                    # Why don't we use _ above for "pattr" rather than "const"?
                    # This *is* a little hoaky, but we have to coordinate with
                    # other parts like n_LOAD_CONST in pysource.py for example.
                    pattr = const
                    pass
            elif opname in ("MAKE_FUNCTION", "MAKE_CLOSURE"):
                flags = argval
                opname = "MAKE_FUNCTION_%d" % (flags)
                attr = []
                for flag in self.MAKE_FUNCTION_FLAGS:
                    bit = flags & 1
                    attr.append(bit)
                    flags >>= 1
                attr = attr[:4]  # remove last value: attr[5] == False
                j = tokens_append(
                    j,
                    Token(
                        opname=opname,
                        attr=attr,
                        pattr=pattr,
                        offset=inst.offset,
                        linestart=inst.starts_line,
                        op=op,
                        has_arg=inst.has_arg,
                        opc=self.opc,
                    ),
                )
                continue
            elif op in self.varargs_ops:
                pos_args = argval
                if self.is_pypy and not pos_args and opname == "BUILD_MAP":
                    opname = "BUILD_MAP_n"
                else:
                    opname = "%s_%d" % (opname, pos_args)

            elif self.is_pypy and opname == "JUMP_IF_NOT_DEBUG":
                # The value in the dict is in special cases in semantic actions, such
                # as JUMP_IF_NOT_DEBUG. The value is not used in these cases, so we put
                # in arbitrary value 0.
                customize[opname] = 0
            elif opname == "UNPACK_EX":
                # FIXME: try with scanner and parser by
                # changing argval
                before_args = argval & 0xFF
                after_args = (argval >> 8) & 0xFF
                pattr = "%d before vararg, %d after" % (before_args,
                                                        after_args)
                argval = (before_args, after_args)
                opname = "%s_%d+%d" % (opname, before_args, after_args)

            elif op == self.opc.JUMP_ABSOLUTE:
                # Further classify JUMP_ABSOLUTE into backward jumps
                # which are used in loops, and "CONTINUE" jumps which
                # may appear in a "continue" statement.  The loop-type
                # and continue-type jumps will help us classify loop
                # boundaries The continue-type jumps help us get
                # "continue" statements with would otherwise be turned
                # into a "pass" statement because JUMPs are sometimes
                # ignored in rules as just boundary overhead. In
                # comprehensions we might sometimes classify JUMP_BACK
                # as CONTINUE, but that's okay since we add a grammar
                # rule for that.
                pattr = argval
                target = self.get_target(inst.offset)
                if target <= inst.offset:
                    next_opname = self.insts[i + 1].opname

                    # 'Continue's include jumps to loops that are not
                    # and the end of a block which follow with POP_BLOCK and COME_FROM_LOOP.
                    # If the JUMP_ABSOLUTE is to a FOR_ITER and it is followed by another JUMP_FORWARD
                    # then we'll take it as a "continue".
                    is_continue = (
                        self.insts[self.offset2inst_index[target]].opname
                        == "FOR_ITER"
                        and self.insts[i + 1].opname == "JUMP_FORWARD")

                    if self.version < 3.8 and (
                            is_continue or
                        (inst.offset in self.stmts and
                         (inst.starts_line
                          and next_opname not in self.not_continue_follow))):
                        opname = "CONTINUE"
                    else:
                        opname = "JUMP_BACK"
                        # FIXME: this is a hack to catch stuff like:
                        #   if x: continue
                        # the "continue" is not on a new line.
                        # There are other situations where we don't catch
                        # CONTINUE as well.
                        if tokens[-1].kind == "JUMP_BACK" and tokens[
                                -1].attr <= argval:
                            if tokens[-2].kind == "BREAK_LOOP":
                                del tokens[-1]
                            else:
                                # intern is used because we are changing the *previous* token
                                tokens[-1].kind = sys.intern("CONTINUE")
                    if last_op_was_break and opname == "CONTINUE":
                        last_op_was_break = False
                        continue

            elif inst.offset in self.load_asserts:
                opname = "LOAD_ASSERT"

            last_op_was_break = opname == "BREAK_LOOP"
            j = tokens_append(
                j,
                Token(
                    opname=opname,
                    attr=argval,
                    pattr=pattr,
                    offset=inst.offset,
                    linestart=inst.starts_line,
                    op=op,
                    has_arg=inst.has_arg,
                    opc=self.opc,
                ),
            )
            pass

        if show_asm in ("both", "after"):
            for t in tokens:
                print(t.format(line_prefix="L."))
            print()
        return tokens, customize