Ejemplo n.º 1
0
	def token_iter(self, readline):
		tokiter = _tokenize(readline, None)
		out = []
		for tok in tokiter:
			out.append(tok)
		for tok in out:
			yield tok
Ejemplo n.º 2
0
def tokenize(string: typing.AnyStr):
    if isinstance(string, bytes):
        # call the internal tokenize func to avoid sniffing the encoding
        # if it tried to sniff the encoding of a "# encoding: import_expression" file,
        # it would call our code again, resulting in a RecursionError.
        return tokenize_._tokenize(io.BytesIO(string).readline,
                                   encoding='utf-8')
    return tokenize_.generate_tokens(io.StringIO(string).readline)
Ejemplo n.º 3
0
	def token_iter(self, readline):
		tokiter = _tokenize(readline, None)
		opmap = self.grammar.OPERATOR_MAP
		for tok in tokiter:
			ttype = opmap.get(tok.string, None)
			if ttype:
				yield tok._replace(type=ttype)
			else:
				yield tok
Ejemplo n.º 4
0
def tokenize(readline):
    """Tokenizer for the quasiquotes language extension.

    Parameters
    ----------
    readline : callable
        A callable that returns the next line to tokenize.

    Yields
    ------
    t : TokenInfo
        The token stream.
    """
    # force the token stream to use `utf-8` and ignore the encoding pragma.
    tok_stream = PeekableIterator(_tokenize(
        chain(iter(readline, b''), repeat(b'')).__next__,
        'utf-8',
    ))
    for t in tok_stream:
        if t == with_tok:
            try:
                sp, dol, name, col, nl, indent = tok_stream.peek(6)
            except ValueError:
                yield t
                continue

            if (sp == spaceerror_tok and
                    dol == dollar_tok and
                    col == col_tok and
                    nl == nl_tok and
                    indent.type == INDENT):
                tok_stream.consume_peeked(6)
                yield from quote_stmt_tokenizer(name, t, tok_stream)
                continue

        elif t == left_bracket_tok:
            try:
                dol, name, pipe = tok_stream.peek(3)
            except ValueError:
                yield t
                continue

            if dol == dollar_tok and pipe == pipe_tok:
                tok_stream.consume_peeked(3)
                yield from quote_expr_tokenizer(name, t, tok_stream)
                continue

        yield t
Ejemplo n.º 5
0
    def test__tokenize_does_not_decode_with_encoding_none(self):
        literal = '"ЉЊЈЁЂ"'
        first = False
        def readline():
            nonlocal first
            if not first:
                first = True
                return literal
            else:
                return b''

        # skip the end token
        tokens = list(_tokenize(readline, encoding=None))[:-1]
        expected_tokens = [(3, '"ЉЊЈЁЂ"', (1, 0), (1, 7), '"ЉЊЈЁЂ"')]
        self.assertEqual(tokens, expected_tokens,
                         "string not tokenized when encoding is None")
Ejemplo n.º 6
0
    def test__tokenize_does_not_decode_with_encoding_none(self):
        literal = '"ЉЊЈЁЂ"'
        first = False
        def readline():
            nonlocal first
            if not first:
                first = True
                return literal
            else:
                return b''

        # skip the end token
        tokens = list(_tokenize(readline, encoding=None))[:-1]
        expected_tokens = [(3, '"ЉЊЈЁЂ"', (1, 0), (1, 7), '"ЉЊЈЁЂ"')]
        self.assertEqual(tokens, expected_tokens,
                         "string not tokenized when encoding is None")
Ejemplo n.º 7
0
    def test__tokenize_decodes_with_specified_encoding(self):
        literal = '"ЉЊЈЁЂ"'
        line = literal.encode('utf-8')
        first = False
        def readline():
            nonlocal first
            if not first:
                first = True
                return line
            else:
                return b''

        # skip the initial encoding token and the end token
        tokens = list(_tokenize(readline, encoding='utf-8'))[1:-1]
        expected_tokens = [(3, '"ЉЊЈЁЂ"', (1, 0), (1, 7), '"ЉЊЈЁЂ"')]
        self.assertEqual(tokens, expected_tokens,
                         "bytes not decoded with encoding")
Ejemplo n.º 8
0
def tokenize(readline):
    """Tokenizer for the quasiquotes language extension.

    Parameters
    ----------
    readline : callable
        A callable that returns the next line to tokenize.

    Yields
    ------
    t : TokenInfo
        The token stream.
    """
    # force the token stream to use `utf-8` and ignore the encoding pragma.
    tok_stream = PeekableIterator(
        _tokenize(
            chain(iter(readline, b''), repeat(b'')).__next__,
            'utf-8',
        ))
    for t in tok_stream:
        if t == with_tok:
            try:
                sp, dol, name, col, nl, indent = tok_stream.peek(6)
            except ValueError:
                yield t
                continue

            if (sp == spaceerror_tok and dol == dollar_tok and col == col_tok
                    and nl == nl_tok and indent.type == INDENT):
                tok_stream.consume_peeked(6)
                yield from quote_stmt_tokenizer(name, t, tok_stream)
                continue

        elif t == left_bracket_tok:
            try:
                dol, name, pipe = tok_stream.peek(3)
            except ValueError:
                yield t
                continue

            if dol == dollar_tok and pipe == pipe_tok:
                tok_stream.consume_peeked(3)
                yield from quote_expr_tokenizer(name, t, tok_stream)
                continue

        yield t
Ejemplo n.º 9
0
    def test__tokenize_decodes_with_specified_encoding(self):
        literal = '"ЉЊЈЁЂ"'
        line = literal.encode('utf-8')
        first = False
        def readline():
            nonlocal first
            if not first:
                first = True
                return line
            else:
                return b''

        # skip the initial encoding token and the end token
        tokens = list(_tokenize(readline, encoding='utf-8'))[1:-1]
        expected_tokens = [(3, '"ЉЊЈЁЂ"', (1, 0), (1, 7), '"ЉЊЈЁЂ"')]
        self.assertEqual(tokens, expected_tokens,
                         "bytes not decoded with encoding")
Ejemplo n.º 10
0
    def format2(self, raw, out = None, scheme = ''):
        """ Parse and send the colored source.

        If out and scheme are not specified, the defaults (given to
        constructor) are used.

        out should be a file-type object. Optionally, out can be given as the
        string 'str' and the parser will automatically return the output in a
        string."""
        
        string_output = 0
        if out == 'str' or self.out == 'str' or \
           isinstance(self.out,io.TextIOBase):
            # XXX - I don't really like this state handling logic, but at this
            # point I don't want to make major changes, so adding the
            # isinstance() check is the simplest I can do to ensure correct
            # behavior.
            out_old = self.out
            self.out = io.StringIO()
            string_output = 1
        elif out is not None:
            self.out = out

        # Fast return of the unmodified input for NoColor scheme
        if scheme == 'NoColor':
            error = False
            self.out.write(raw)
            if string_output:
                return raw,error
            else:
                return None,error
        
        # local shorthands
        colors = self.color_table[scheme].colors
        self.colors = colors # put in object so __call__ sees it

        # Remove trailing whitespace and normalize tabs
        self.raw = raw.expandtabs().rstrip()
        
        # store line offsets in self.lines
        self.lines = [0, 0]
        pos = 0
        raw_find = self.raw.find
        lines_append = self.lines.append
        while 1:
            pos = raw_find('\n', pos) + 1
            if not pos: break
            lines_append(pos)
        lines_append(len(self.raw))

        # parse the source and write it
        self.pos = 0
        text = io.StringIO(self.raw)

        error = False
        try:
            # We're using _tokenize because tokenize expects bytes, and
            # attempts to find an encoding cookie, which can go wrong, e.g.
            # if the traceback line includes "encoding=encoding".
            # N.B. _tokenize is undocumented. An official API for tokenising
            # strings is proposed in Python Issue 9969.
            for atoken in tokenize._tokenize(text.readline, None):
                self(*atoken)
        except tokenize.TokenError as ex:
            msg = ex.args[0]
            line = ex.args[1][0]
            self.out.write("%s\n\n*** ERROR: %s%s%s\n" %
                           (colors[token.ERRORTOKEN],
                            msg, self.raw[self.lines[line]:],
                            colors.normal)
                           )
            error = True
        self.out.write(colors.normal+'\n')
        if string_output:
            output = self.out.getvalue()
            self.out = out_old
            return (output, error)
        return (None, error)
Ejemplo n.º 11
0
 def update_event(self, inp=-1):
     self.set_output_val(0, tokenize._tokenize(self.input(0),
                                               self.input(1)))
Ejemplo n.º 12
0
    def structured_traceback(self, etype, evalue, etb, tb_offset=None,
                             context=5):
        """Return a nice text document describing the traceback."""

        tb_offset = self.tb_offset if tb_offset is None else tb_offset

        # some locals
        try:
            etype = etype.__name__
        except AttributeError:
            pass
        Colors        = self.Colors   # just a shorthand + quicker name lookup
        ColorsNormal  = Colors.Normal  # used a lot
        col_scheme    = self.color_scheme_table.active_scheme_name
        indent        = ' '*INDENT_SIZE
        em_normal     = '%s\n%s%s' % (Colors.valEm, indent,ColorsNormal)
        undefined     = '%sundefined%s' % (Colors.em, ColorsNormal)
        exc = '%s%s%s' % (Colors.excName,etype,ColorsNormal)

        # some internal-use functions
        def text_repr(value):
            """Hopefully pretty robust repr equivalent."""
            # this is pretty horrible but should always return *something*
            try:
                return pydoc.text.repr(value)
            except KeyboardInterrupt:
                raise
            except:
                try:
                    return repr(value)
                except KeyboardInterrupt:
                    raise
                except:
                    try:
                        # all still in an except block so we catch
                        # getattr raising
                        name = getattr(value, '__name__', None)
                        if name:
                            # ick, recursion
                            return text_repr(name)
                        klass = getattr(value, '__class__', None)
                        if klass:
                            return '%s instance' % text_repr(klass)
                    except KeyboardInterrupt:
                        raise
                    except:
                        return 'UNRECOVERABLE REPR FAILURE'
        def eqrepr(value, repr=text_repr): return '=%s' % repr(value)
        def nullrepr(value, repr=text_repr): return ''

        # meat of the code begins
        try:
            etype = etype.__name__
        except AttributeError:
            pass

        if self.long_header:
            # Header with the exception type, python version, and date
            pyver = 'Python ' + sys.version.split()[0] + ': ' + sys.executable
            date = time.ctime(time.time())
            
            head = '%s%s%s\n%s%s%s\n%s' % (Colors.topline, '-'*75, ColorsNormal,
                                           exc, ' '*(75-len(str(etype))-len(pyver)),
                                           pyver, date.rjust(75) )
            head += "\nA problem occured executing Python code.  Here is the sequence of function"\
                    "\ncalls leading up to the error, with the most recent (innermost) call last."
        else:
            # Simplified header
            head = '%s%s%s\n%s%s' % (Colors.topline, '-'*75, ColorsNormal,exc,
                                     'Traceback (most recent call last)'.\
                                        rjust(75 - len(str(etype)) ) )
        frames = []
        # Flush cache before calling inspect.  This helps alleviate some of the
        # problems with python 2.3's inspect.py.
        ##self.check_cache()
        # Drop topmost frames if requested
        try:
            # Try the default getinnerframes and Alex's: Alex's fixes some
            # problems, but it generates empty tracebacks for console errors
            # (5 blanks lines) where none should be returned.
            #records = inspect.getinnerframes(etb, context)[tb_offset:]
            #print 'python records:', records # dbg
            records = _fixed_getinnerframes(etb, context, tb_offset)
            #print 'alex   records:', records # dbg
        except:

            # FIXME: I've been getting many crash reports from python 2.3
            # users, traceable to inspect.py.  If I can find a small test-case
            # to reproduce this, I should either write a better workaround or
            # file a bug report against inspect (if that's the real problem).
            # So far, I haven't been able to find an isolated example to
            # reproduce the problem.
            inspect_error()
            traceback.print_exc(file=self.ostream)
            info('\nUnfortunately, your original traceback can not be constructed.\n')
            return ''

        # build some color string templates outside these nested loops
        tpl_link       = '%s%%s%s' % (Colors.filenameEm,ColorsNormal)
        tpl_call       = 'in %s%%s%s%%s%s' % (Colors.vName, Colors.valEm,
                                              ColorsNormal)
        tpl_call_fail  = 'in %s%%s%s(***failed resolving arguments***)%s' % \
                         (Colors.vName, Colors.valEm, ColorsNormal)
        tpl_local_var  = '%s%%s%s' % (Colors.vName, ColorsNormal)
        tpl_global_var = '%sglobal%s %s%%s%s' % (Colors.em, ColorsNormal,
                                                 Colors.vName, ColorsNormal)
        tpl_name_val   = '%%s %s= %%s%s' % (Colors.valEm, ColorsNormal)
        tpl_line       = '%s%%s%s %%s' % (Colors.lineno, ColorsNormal)
        tpl_line_em    = '%s%%s%s %%s%s' % (Colors.linenoEm,Colors.line,
                                            ColorsNormal)

        # now, loop over all records printing context and info
        abspath = os.path.abspath
        for frame, file, lnum, func, lines, index in records:
            #print '*** record:',file,lnum,func,lines,index  # dbg
            try:
                file = file and abspath(file) or '?'
            except OSError:
                # if file is '<console>' or something not in the filesystem,
                # the abspath call will throw an OSError.  Just ignore it and
                # keep the original file string.
                pass
            link = tpl_link % file
            try:
                args, varargs, varkw, locals = inspect.getargvalues(frame)
            except:
                # This can happen due to a bug in python2.3.  We should be
                # able to remove this try/except when 2.4 becomes a
                # requirement.  Bug details at http://python.org/sf/1005466
                inspect_error()
                traceback.print_exc(file=self.ostream)
                info("\nIPython's exception reporting continues...\n")
                
            if func == '?':
                call = ''
            else:
                # Decide whether to include variable details or not
                var_repr = self.include_vars and eqrepr or nullrepr
                try:
                    call = tpl_call % (func,inspect.formatargvalues(args,
                                                varargs, varkw,
                                                locals,formatvalue=var_repr))
                except KeyError:
                    # This happens in situations like errors inside generator
                    # expressions, where local variables are listed in the
                    # line, but can't be extracted from the frame.  I'm not
                    # 100% sure this isn't actually a bug in inspect itself,
                    # but since there's no info for us to compute with, the
                    # best we can do is report the failure and move on.  Here
                    # we must *not* call any traceback construction again,
                    # because that would mess up use of %debug later on.  So we
                    # simply report the failure and move on.  The only
                    # limitation will be that this frame won't have locals
                    # listed in the call signature.  Quite subtle problem...
                    # I can't think of a good way to validate this in a unit
                    # test, but running a script consisting of:
                    #  dict( (k,v.strip()) for (k,v) in range(10) )
                    # will illustrate the error, if this exception catch is
                    # disabled.
                    call = tpl_call_fail % func

            # Initialize a list of names on the current line, which the
            # tokenizer below will populate.
            names = []

            def tokeneater(token_type, token, start, end, line):
                """Stateful tokeneater which builds dotted names.

                The list of names it appends to (from the enclosing scope) can
                contain repeated composite names.  This is unavoidable, since
                there is no way to disambguate partial dotted structures until
                the full list is known.  The caller is responsible for pruning
                the final list of duplicates before using it."""
                
                # build composite names
                if token == '.':
                    try:
                        names[-1] += '.'
                        # store state so the next token is added for x.y.z names
                        tokeneater.name_cont = True
                        return
                    except IndexError:
                        pass
                if token_type == tokenize.NAME and token not in keyword.kwlist:
                    if tokeneater.name_cont:
                        # Dotted names
                        names[-1] += token
                        tokeneater.name_cont = False
                    else:
                        # Regular new names.  We append everything, the caller
                        # will be responsible for pruning the list later.  It's
                        # very tricky to try to prune as we go, b/c composite
                        # names can fool us.  The pruning at the end is easy
                        # to do (or the caller can print a list with repeated
                        # names if so desired.
                        names.append(token)
                elif token_type == tokenize.NEWLINE:
                    raise IndexError
            # we need to store a bit of state in the tokenizer to build
            # dotted names
            tokeneater.name_cont = False

            def linereader(file=file, lnum=[lnum], getline=linecache.getline):
                line = getline(file, lnum[0])
                lnum[0] += 1
                return line

            # Build the list of names on this line of code where the exception
            # occurred.
            try:
                # This builds the names list in-place by capturing it from the
                # enclosing scope.
                # We're using _tokenize because tokenize expects bytes, and
                # attempts to find an encoding cookie, which can go wrong
                # e.g. if the traceback line includes "encoding=encoding".
                # N.B. _tokenize is undocumented. An official API for
                # tokenising strings is proposed in Python Issue 9969.
                for atoken in tokenize._tokenize(linereader, None):
                    tokeneater(*atoken)
            except IndexError:
                # signals exit of tokenizer
                pass
            except tokenize.TokenError as msg:
                _m = ("An unexpected error occurred while tokenizing input\n"
                      "The following traceback may be corrupted or invalid\n"
                      "The error message is: %s\n" % msg)
                error(_m)
            
            # prune names list of duplicates, but keep the right order
            unique_names = uniq_stable(names)

            # Start loop over vars
            lvals = []
            if self.include_vars:
                for name_full in unique_names:
                    name_base = name_full.split('.',1)[0]
                    if name_base in frame.f_code.co_varnames:
                        if name_base in locals:
                            try:
                                value = repr(eval(name_full,locals))
                            except:
                                value = undefined
                        else:
                            value = undefined
                        name = tpl_local_var % name_full
                    else:
                        if name_base in frame.f_globals:
                            try:
                                value = repr(eval(name_full,frame.f_globals))
                            except:
                                value = undefined
                        else:
                            value = undefined
                        name = tpl_global_var % name_full
                    lvals.append(tpl_name_val % (name,value))
            if lvals:
                lvals = '%s%s' % (indent,em_normal.join(lvals))
            else:
                lvals = ''

            level = '%s %s\n' % (link,call)

            if index is None:
                frames.append(level)
            else:
                frames.append('%s%s' % (level,''.join(
                    _format_traceback_lines(lnum,index,lines,Colors,lvals,
                                            col_scheme))))

        # Get (safely) a string form of the exception info
        try:
            etype_str,evalue_str = list(map(str,(etype,evalue)))
        except:
            # User exception is improperly defined.
            etype,evalue = str,sys.exc_info()[:2]
            etype_str,evalue_str = list(map(str,(etype,evalue)))
        # ... and format it
        exception = ['%s%s%s: %s' % (Colors.excName, etype_str,
                                     ColorsNormal, evalue_str)]

        # vds: >>
        if records:
             filepath, lnum = records[-1][1:3]
             #print "file:", str(file), "linenb", str(lnum) # dbg
             filepath = os.path.abspath(filepath)
             ipinst = ipapi.get()
             if ipinst is not None:
                 ipinst.hooks.synchronize_with_editor(filepath, lnum, 0)
        # vds: <<
                
        # return all our info assembled as a single string
        # return '%s\n\n%s\n%s' % (head,'\n'.join(frames),''.join(exception[0]) )
        return [head] + frames + [''.join(exception[0])]