def token_iter(self, readline): tokiter = _tokenize(readline, None) out = [] for tok in tokiter: out.append(tok) for tok in out: yield tok
def tokenize(string: typing.AnyStr): if isinstance(string, bytes): # call the internal tokenize func to avoid sniffing the encoding # if it tried to sniff the encoding of a "# encoding: import_expression" file, # it would call our code again, resulting in a RecursionError. return tokenize_._tokenize(io.BytesIO(string).readline, encoding='utf-8') return tokenize_.generate_tokens(io.StringIO(string).readline)
def token_iter(self, readline): tokiter = _tokenize(readline, None) opmap = self.grammar.OPERATOR_MAP for tok in tokiter: ttype = opmap.get(tok.string, None) if ttype: yield tok._replace(type=ttype) else: yield tok
def tokenize(readline): """Tokenizer for the quasiquotes language extension. Parameters ---------- readline : callable A callable that returns the next line to tokenize. Yields ------ t : TokenInfo The token stream. """ # force the token stream to use `utf-8` and ignore the encoding pragma. tok_stream = PeekableIterator(_tokenize( chain(iter(readline, b''), repeat(b'')).__next__, 'utf-8', )) for t in tok_stream: if t == with_tok: try: sp, dol, name, col, nl, indent = tok_stream.peek(6) except ValueError: yield t continue if (sp == spaceerror_tok and dol == dollar_tok and col == col_tok and nl == nl_tok and indent.type == INDENT): tok_stream.consume_peeked(6) yield from quote_stmt_tokenizer(name, t, tok_stream) continue elif t == left_bracket_tok: try: dol, name, pipe = tok_stream.peek(3) except ValueError: yield t continue if dol == dollar_tok and pipe == pipe_tok: tok_stream.consume_peeked(3) yield from quote_expr_tokenizer(name, t, tok_stream) continue yield t
def test__tokenize_does_not_decode_with_encoding_none(self): literal = '"ЉЊЈЁЂ"' first = False def readline(): nonlocal first if not first: first = True return literal else: return b'' # skip the end token tokens = list(_tokenize(readline, encoding=None))[:-1] expected_tokens = [(3, '"ЉЊЈЁЂ"', (1, 0), (1, 7), '"ЉЊЈЁЂ"')] self.assertEqual(tokens, expected_tokens, "string not tokenized when encoding is None")
def test__tokenize_decodes_with_specified_encoding(self): literal = '"ЉЊЈЁЂ"' line = literal.encode('utf-8') first = False def readline(): nonlocal first if not first: first = True return line else: return b'' # skip the initial encoding token and the end token tokens = list(_tokenize(readline, encoding='utf-8'))[1:-1] expected_tokens = [(3, '"ЉЊЈЁЂ"', (1, 0), (1, 7), '"ЉЊЈЁЂ"')] self.assertEqual(tokens, expected_tokens, "bytes not decoded with encoding")
def tokenize(readline): """Tokenizer for the quasiquotes language extension. Parameters ---------- readline : callable A callable that returns the next line to tokenize. Yields ------ t : TokenInfo The token stream. """ # force the token stream to use `utf-8` and ignore the encoding pragma. tok_stream = PeekableIterator( _tokenize( chain(iter(readline, b''), repeat(b'')).__next__, 'utf-8', )) for t in tok_stream: if t == with_tok: try: sp, dol, name, col, nl, indent = tok_stream.peek(6) except ValueError: yield t continue if (sp == spaceerror_tok and dol == dollar_tok and col == col_tok and nl == nl_tok and indent.type == INDENT): tok_stream.consume_peeked(6) yield from quote_stmt_tokenizer(name, t, tok_stream) continue elif t == left_bracket_tok: try: dol, name, pipe = tok_stream.peek(3) except ValueError: yield t continue if dol == dollar_tok and pipe == pipe_tok: tok_stream.consume_peeked(3) yield from quote_expr_tokenizer(name, t, tok_stream) continue yield t
def format2(self, raw, out = None, scheme = ''): """ Parse and send the colored source. If out and scheme are not specified, the defaults (given to constructor) are used. out should be a file-type object. Optionally, out can be given as the string 'str' and the parser will automatically return the output in a string.""" string_output = 0 if out == 'str' or self.out == 'str' or \ isinstance(self.out,io.TextIOBase): # XXX - I don't really like this state handling logic, but at this # point I don't want to make major changes, so adding the # isinstance() check is the simplest I can do to ensure correct # behavior. out_old = self.out self.out = io.StringIO() string_output = 1 elif out is not None: self.out = out # Fast return of the unmodified input for NoColor scheme if scheme == 'NoColor': error = False self.out.write(raw) if string_output: return raw,error else: return None,error # local shorthands colors = self.color_table[scheme].colors self.colors = colors # put in object so __call__ sees it # Remove trailing whitespace and normalize tabs self.raw = raw.expandtabs().rstrip() # store line offsets in self.lines self.lines = [0, 0] pos = 0 raw_find = self.raw.find lines_append = self.lines.append while 1: pos = raw_find('\n', pos) + 1 if not pos: break lines_append(pos) lines_append(len(self.raw)) # parse the source and write it self.pos = 0 text = io.StringIO(self.raw) error = False try: # We're using _tokenize because tokenize expects bytes, and # attempts to find an encoding cookie, which can go wrong, e.g. # if the traceback line includes "encoding=encoding". # N.B. _tokenize is undocumented. An official API for tokenising # strings is proposed in Python Issue 9969. for atoken in tokenize._tokenize(text.readline, None): self(*atoken) except tokenize.TokenError as ex: msg = ex.args[0] line = ex.args[1][0] self.out.write("%s\n\n*** ERROR: %s%s%s\n" % (colors[token.ERRORTOKEN], msg, self.raw[self.lines[line]:], colors.normal) ) error = True self.out.write(colors.normal+'\n') if string_output: output = self.out.getvalue() self.out = out_old return (output, error) return (None, error)
def update_event(self, inp=-1): self.set_output_val(0, tokenize._tokenize(self.input(0), self.input(1)))
def structured_traceback(self, etype, evalue, etb, tb_offset=None, context=5): """Return a nice text document describing the traceback.""" tb_offset = self.tb_offset if tb_offset is None else tb_offset # some locals try: etype = etype.__name__ except AttributeError: pass Colors = self.Colors # just a shorthand + quicker name lookup ColorsNormal = Colors.Normal # used a lot col_scheme = self.color_scheme_table.active_scheme_name indent = ' '*INDENT_SIZE em_normal = '%s\n%s%s' % (Colors.valEm, indent,ColorsNormal) undefined = '%sundefined%s' % (Colors.em, ColorsNormal) exc = '%s%s%s' % (Colors.excName,etype,ColorsNormal) # some internal-use functions def text_repr(value): """Hopefully pretty robust repr equivalent.""" # this is pretty horrible but should always return *something* try: return pydoc.text.repr(value) except KeyboardInterrupt: raise except: try: return repr(value) except KeyboardInterrupt: raise except: try: # all still in an except block so we catch # getattr raising name = getattr(value, '__name__', None) if name: # ick, recursion return text_repr(name) klass = getattr(value, '__class__', None) if klass: return '%s instance' % text_repr(klass) except KeyboardInterrupt: raise except: return 'UNRECOVERABLE REPR FAILURE' def eqrepr(value, repr=text_repr): return '=%s' % repr(value) def nullrepr(value, repr=text_repr): return '' # meat of the code begins try: etype = etype.__name__ except AttributeError: pass if self.long_header: # Header with the exception type, python version, and date pyver = 'Python ' + sys.version.split()[0] + ': ' + sys.executable date = time.ctime(time.time()) head = '%s%s%s\n%s%s%s\n%s' % (Colors.topline, '-'*75, ColorsNormal, exc, ' '*(75-len(str(etype))-len(pyver)), pyver, date.rjust(75) ) head += "\nA problem occured executing Python code. Here is the sequence of function"\ "\ncalls leading up to the error, with the most recent (innermost) call last." else: # Simplified header head = '%s%s%s\n%s%s' % (Colors.topline, '-'*75, ColorsNormal,exc, 'Traceback (most recent call last)'.\ rjust(75 - len(str(etype)) ) ) frames = [] # Flush cache before calling inspect. This helps alleviate some of the # problems with python 2.3's inspect.py. ##self.check_cache() # Drop topmost frames if requested try: # Try the default getinnerframes and Alex's: Alex's fixes some # problems, but it generates empty tracebacks for console errors # (5 blanks lines) where none should be returned. #records = inspect.getinnerframes(etb, context)[tb_offset:] #print 'python records:', records # dbg records = _fixed_getinnerframes(etb, context, tb_offset) #print 'alex records:', records # dbg except: # FIXME: I've been getting many crash reports from python 2.3 # users, traceable to inspect.py. If I can find a small test-case # to reproduce this, I should either write a better workaround or # file a bug report against inspect (if that's the real problem). # So far, I haven't been able to find an isolated example to # reproduce the problem. inspect_error() traceback.print_exc(file=self.ostream) info('\nUnfortunately, your original traceback can not be constructed.\n') return '' # build some color string templates outside these nested loops tpl_link = '%s%%s%s' % (Colors.filenameEm,ColorsNormal) tpl_call = 'in %s%%s%s%%s%s' % (Colors.vName, Colors.valEm, ColorsNormal) tpl_call_fail = 'in %s%%s%s(***failed resolving arguments***)%s' % \ (Colors.vName, Colors.valEm, ColorsNormal) tpl_local_var = '%s%%s%s' % (Colors.vName, ColorsNormal) tpl_global_var = '%sglobal%s %s%%s%s' % (Colors.em, ColorsNormal, Colors.vName, ColorsNormal) tpl_name_val = '%%s %s= %%s%s' % (Colors.valEm, ColorsNormal) tpl_line = '%s%%s%s %%s' % (Colors.lineno, ColorsNormal) tpl_line_em = '%s%%s%s %%s%s' % (Colors.linenoEm,Colors.line, ColorsNormal) # now, loop over all records printing context and info abspath = os.path.abspath for frame, file, lnum, func, lines, index in records: #print '*** record:',file,lnum,func,lines,index # dbg try: file = file and abspath(file) or '?' except OSError: # if file is '<console>' or something not in the filesystem, # the abspath call will throw an OSError. Just ignore it and # keep the original file string. pass link = tpl_link % file try: args, varargs, varkw, locals = inspect.getargvalues(frame) except: # This can happen due to a bug in python2.3. We should be # able to remove this try/except when 2.4 becomes a # requirement. Bug details at http://python.org/sf/1005466 inspect_error() traceback.print_exc(file=self.ostream) info("\nIPython's exception reporting continues...\n") if func == '?': call = '' else: # Decide whether to include variable details or not var_repr = self.include_vars and eqrepr or nullrepr try: call = tpl_call % (func,inspect.formatargvalues(args, varargs, varkw, locals,formatvalue=var_repr)) except KeyError: # This happens in situations like errors inside generator # expressions, where local variables are listed in the # line, but can't be extracted from the frame. I'm not # 100% sure this isn't actually a bug in inspect itself, # but since there's no info for us to compute with, the # best we can do is report the failure and move on. Here # we must *not* call any traceback construction again, # because that would mess up use of %debug later on. So we # simply report the failure and move on. The only # limitation will be that this frame won't have locals # listed in the call signature. Quite subtle problem... # I can't think of a good way to validate this in a unit # test, but running a script consisting of: # dict( (k,v.strip()) for (k,v) in range(10) ) # will illustrate the error, if this exception catch is # disabled. call = tpl_call_fail % func # Initialize a list of names on the current line, which the # tokenizer below will populate. names = [] def tokeneater(token_type, token, start, end, line): """Stateful tokeneater which builds dotted names. The list of names it appends to (from the enclosing scope) can contain repeated composite names. This is unavoidable, since there is no way to disambguate partial dotted structures until the full list is known. The caller is responsible for pruning the final list of duplicates before using it.""" # build composite names if token == '.': try: names[-1] += '.' # store state so the next token is added for x.y.z names tokeneater.name_cont = True return except IndexError: pass if token_type == tokenize.NAME and token not in keyword.kwlist: if tokeneater.name_cont: # Dotted names names[-1] += token tokeneater.name_cont = False else: # Regular new names. We append everything, the caller # will be responsible for pruning the list later. It's # very tricky to try to prune as we go, b/c composite # names can fool us. The pruning at the end is easy # to do (or the caller can print a list with repeated # names if so desired. names.append(token) elif token_type == tokenize.NEWLINE: raise IndexError # we need to store a bit of state in the tokenizer to build # dotted names tokeneater.name_cont = False def linereader(file=file, lnum=[lnum], getline=linecache.getline): line = getline(file, lnum[0]) lnum[0] += 1 return line # Build the list of names on this line of code where the exception # occurred. try: # This builds the names list in-place by capturing it from the # enclosing scope. # We're using _tokenize because tokenize expects bytes, and # attempts to find an encoding cookie, which can go wrong # e.g. if the traceback line includes "encoding=encoding". # N.B. _tokenize is undocumented. An official API for # tokenising strings is proposed in Python Issue 9969. for atoken in tokenize._tokenize(linereader, None): tokeneater(*atoken) except IndexError: # signals exit of tokenizer pass except tokenize.TokenError as msg: _m = ("An unexpected error occurred while tokenizing input\n" "The following traceback may be corrupted or invalid\n" "The error message is: %s\n" % msg) error(_m) # prune names list of duplicates, but keep the right order unique_names = uniq_stable(names) # Start loop over vars lvals = [] if self.include_vars: for name_full in unique_names: name_base = name_full.split('.',1)[0] if name_base in frame.f_code.co_varnames: if name_base in locals: try: value = repr(eval(name_full,locals)) except: value = undefined else: value = undefined name = tpl_local_var % name_full else: if name_base in frame.f_globals: try: value = repr(eval(name_full,frame.f_globals)) except: value = undefined else: value = undefined name = tpl_global_var % name_full lvals.append(tpl_name_val % (name,value)) if lvals: lvals = '%s%s' % (indent,em_normal.join(lvals)) else: lvals = '' level = '%s %s\n' % (link,call) if index is None: frames.append(level) else: frames.append('%s%s' % (level,''.join( _format_traceback_lines(lnum,index,lines,Colors,lvals, col_scheme)))) # Get (safely) a string form of the exception info try: etype_str,evalue_str = list(map(str,(etype,evalue))) except: # User exception is improperly defined. etype,evalue = str,sys.exc_info()[:2] etype_str,evalue_str = list(map(str,(etype,evalue))) # ... and format it exception = ['%s%s%s: %s' % (Colors.excName, etype_str, ColorsNormal, evalue_str)] # vds: >> if records: filepath, lnum = records[-1][1:3] #print "file:", str(file), "linenb", str(lnum) # dbg filepath = os.path.abspath(filepath) ipinst = ipapi.get() if ipinst is not None: ipinst.hooks.synchronize_with_editor(filepath, lnum, 0) # vds: << # return all our info assembled as a single string # return '%s\n\n%s\n%s' % (head,'\n'.join(frames),''.join(exception[0]) ) return [head] + frames + [''.join(exception[0])]