def tokeneater(self, toktype, toktext, xxx_todo_changeme, xxx_todo_changeme1, line): """ A callback function used by C{tokenize.tokenize} to handle each token in the module. C{tokeneater} collects tokens into the C{self.cur_line} list until a complete logical line has been formed; and then calls L{handle_line} to process that line. """ (srow, scol) = xxx_todo_changeme (erow, ecol) = xxx_todo_changeme1 if toktype == token.ERRORTOKEN: raise tokenize.TokenError(toktype) # Did we skip anything whitespace? If so, add a pseudotoken # for it, with toktype=None. (Note -- this skipped string # might also contain continuation slashes; but I won't bother # to colorize them.) startpos = self.line_offsets[srow] + scol if startpos > self.pos: skipped = self.text[self.pos:startpos] self.cur_line.append((None, skipped)) # Update our position. self.pos = startpos + len(toktext) # Update our current line. self.cur_line.append((toktype, toktext)) # When we reach the end of a line, process it. if toktype == token.NEWLINE or toktype == token.ENDMARKER: self.handle_line(self.cur_line) self.cur_line = []
def minify_iteration(input_code): # setup vars output_code = "" last_token = None last_erow = -1 last_ecol = 0 # setup generators linegen = io.StringIO(input_code).readline tokgen = tokenize.generate_tokens(linegen) # read source code in tokens for token, text, (srow, scol), (erow, ecol), line in tokgen: # check for parsing errors if token == tokenize.ERRORTOKEN: raise tokenize.TokenError("Failed to parse python code") # choose when to keep a token keep_token = (token != tokenize.COMMENT) and (token != tokenize.NL) \ and not ((last_token == tokenize.NEWLINE) and (token == tokenize.STRING)) # keep token if flag is set if keep_token: # set indent flag set_indents = (token != tokenize.NEWLINE) \ and (token != tokenize.STRING) and (last_token != tokenize.STRING) \ and (token != tokenize.OP) and (last_token != tokenize.OP) # restore indentation if srow > last_erow: last_ecol = 0 if (scol > last_ecol) and set_indents: indents = scol - last_ecol output_code += " " * indents # convert tabs to spaces if token == tokenize.INDENT: text = text.replace("\t", " ") # convert windows line breaks to proper line breaks if token == tokenize.NEWLINE: text = "\n" # write code to buffer output_code += text # update vars last_erow = erow last_ecol = ecol if (token != tokenize.DEDENT) and (token != tokenize.INDENT): last_token = token # return the read source code return output_code
def tokeneater(self, toktype, toktext, srowcol, erowcol, line): """ A callback function used by C{tokenize.tokenize} to handle each token in the module. C{tokeneater} collects tokens into the C{self.cur_line} list until a complete logical line has been formed; and then calls L{handle_line} to process that line. """ srow, scol = srowcol erow, ecol = erowcol # If we encounter any errors, then just give up. if toktype == token.ERRORTOKEN: raise tokenize.TokenError(toktype) if hasattr(tokenize, 'ENCODING') and toktype == tokenize.ENCODING: if self.coding is None: self.coding = toktext return token_startpos = self.token_line_offsets[srow] + scol if six.binary_type is str: input_startpos = token_startpos input_toktext = toktext else: input_scol = len(line[:scol].encode(self.coding)) input_startpos = self.input_line_offsets[srow] + input_scol input_toktext = toktext.encode(self.coding) # Did we skip anything whitespace? If so, add a pseudotoken # for it, with toktype=None. (Note -- this skipped string # might also contain continuation slashes; but I won't bother # to colorize them.) if input_startpos > self.input_pos: skipped = self.text[self.input_pos:input_startpos] if six.binary_type is not str: skipped = skipped.decode(self.coding) self.cur_line.append((None, skipped)) # Update our position. self.token_pos = token_startpos + len(toktext) self.input_pos = input_startpos + len(input_toktext) # Update our current line. self.cur_line.append((toktype, toktext)) # When we reach the end of a line, process it. if toktype == token.NEWLINE or toktype == token.ENDMARKER: self.handle_line(self.cur_line) self.cur_line = []
def parse_docstring(docstring, keywords): """Parse a docstring, looking for design-by-contract expressions. Returns a list of tuples: the list is the same length as keywords, and matches each keyword. The tuple is (keyword, [decls], [exprs]), namely the keyword, a list of string declarations, and a list of tuples (string, lineno). Examples:: >>> from pprint import pprint >>> pprint( parse_docstring(parse_docstring.__doc__, ['post', 'pre']) ) [('post', [], [('[ x [ 0 ] for x in __return__ ] == keywords', 22)]), ('pre', [], [('docstring is None or isstring ( docstring )', 18), ('forall ( keywords , isstring )', 19)])] pre:: docstring is None or isstring(docstring) forall(keywords, isstring) post[]:: [x[0] for x in __return__] == keywords """ result = [(x, [], []) for x in keywords] if docstring is None: return result # step 1: scan through docstring looking for keyword input = StringIO(docstring) offset = 0 assert input.tell() == 0 line = input.readline() lineno = 0 # zero-origined because tokenizer keeps 1-origined while line != '': a = _re_start.split(line) if len(a) > _RE_KEYWORD and a[_RE_KEYWORD] in keywords: # step 2: found a keyword, now rewind and scan looking # for either an inline expression or a series of sub- # indented expressions input.seek(offset) # ttw005... get lineno info and add to exception's lineno # if a TokenError occurs... try: l = _read_block(input, lineno) lineno = l[3] # returns (keyword, decls, exprs, lineno) except tokenize.TokenError, ex: # reformat to include new line info raise tokenize.TokenError(ex[0], (lineno + ex[1][0], ) + ex[1][1:]) # ...ttw005 # Find the right result index based on keyword r = result[keywords.index(l[0])] r[1].extend(l[1]) r[2].extend(l[2]) else: lineno += 1 if offset == input.tell(): break offset = input.tell() line = input.readline()
def generate_tokens (self): """Creates a generator object that yields Python/Mython tokens. Based on (largely retyped and slightly modified from) the tokenize module by Ka Ping Yee and others, licensed under the PSFL (see .../basil/thirdparty/PSF_LICENSE). """ namechars = string.ascii_letters + '_' numchars = '0123456789' while 1: line = self.readline() pos, max_pos = 0, len(line) if self.parenlev == 0 and self.in_mysuite: # This should only be reached if no non-whitespace # characters were found following the trailing colon # of a mysuite statement, and we've already recognized # the end of line token for the multi-line quotation. # This must go here so the readliner is properly # hijacked. mysuite_lnum = self.lnum mysuite_lns = [] while ((line != '') and (self.empty_line_pattern.match(line) != None)): mysuite_lns.append(line) line = self.readline() pos, max_pos = 0, len(line) indent_ws = '' indent_lnum = -1 if line != '': indent_lnum = self.lnum match_obj = self.ws_pattern.match(line) indent_ws = match_obj.groups(1)[0] while line.startswith(indent_ws): mysuite_lns.append(line) line = self.readline() pos, max_pos = 0, len(line) while ((line != '') and (self.empty_line_pattern.match(line) != None)): mysuite_lns.append(line) line = self.readline() pos, max_pos = 0, len(line) else: # TODO: Shouldn't this be MyFrontIndentError? raise SyntaxError("Empty mysuite block, starting on " "line %d, runs to end of file." % mysuite_lnum) indent_ws_len = len(indent_ws) if indent_ws_len <= self.indents[-1]: # TODO: Shouldn't this be MyFrontIndentError? raise SyntaxError("Improper indentation level at " "line %d; expected %d, got %d." % (indent_lnum, self.indents[-1], indent_ws_len)) normalized_lns = [mysuite_ln[indent_ws_len:] if len(mysuite_ln) > indent_ws_len else (mysuite_ln[-2:] if mysuite_ln.endswith('\r\n') else mysuite_ln[-1:]) for mysuite_ln in mysuite_lns] token = "".join(normalized_lns) lines = "".join(mysuite_lns) spos = (mysuite_lnum, indent_ws_len) epos = (spos[0] + len(mysuite_lns), 0) yield self.make_token(MYSUITE, token, spos, epos, lines) self.in_mysuite = False if self.contstr: if not line: raise tokenize.TokenError("EOF in multi-line string", self.strstart) endmatch = self.endprog.match(line) if endmatch: pos = end = endmatch.end(0) yield self.make_token( tokenize.STRING, self.contstr + line[:end], self.strstart, (self.lnum, end), self.contline + line) self.contstr = '' self.needcont = 0 self.contline = None elif (self.needcont and (line[-2:] != '\\\n') and (line[-3:] != '\\\r\n')): yield self.make_token( tokenize.ERRORTOKEN, self.contstr + line, self.strstart, (self.lnum, len(line)), contline) self.contstr = '' self.contline = None continue else: self.contstr += line self.contline += line continue elif self.parenlev == 0 and not self.continued: if not line: break column = 0 while pos < max_pos: if line[pos] == ' ': column += 1 elif line[pos] == '\t': column = (column/self.tabsize + 1) * self.tabsize elif line[pos] == '\f': column = 0 else: break pos += 1 if pos == max_pos: break if line[pos] in '#\r\n': if line[pos] == "#": if sys.version_info < (2, 6): yield self.make_token( tokenize.COMMENT, line[pos:], (self.lnum, pos), (self.lnum, len(line)), line) else: comment_token = line[pos:].rstrip('\r\n') nl_pos = pos + len(comment_token) yield self.make_token( tokenize.COMMENT, comment_token, (self.lnum, pos), (self.lnum, pos + len(comment_token)), line) yield self.make_token( tokenize.NL, line[nl_pos:], (self.lnum, nl_pos), (self.lnum, len(line)), line) else: yield self.make_token( tokenize.NL, line[pos:], (self.lnum, pos), (self.lnum, len(line)), line) continue if column > self.indents[-1]: self.indents.append(column) yield self.make_token( tokenize.INDENT, line[:pos], (self.lnum, 0), (self.lnum, pos), line) while column < self.indents[-1]: if column not in self.indents: raise IndentationError( "unindent does not match any outer indentation " "level", ("<tokenize>", self.lnum, pos, line)) self.indents.pop() yield self.make_token( tokenize.DEDENT, '', (self.lnum, pos), (self.lnum, pos), line) else: if not line: if __debug__: pprint.pprint(self.__dict__) raise tokenize.TokenError("EOF in multi-line statement", (self.lnum, 0)) self.continued = 0 while pos < max_pos: pseudomatch = pseudoprog.match(line, pos) if pseudomatch: start, end = pseudomatch.span(1) spos, epos, pos = (self.lnum, start), (self.lnum, end), end token, initial = line[start:end], line[start] if ((initial in numchars) or (initial == '.' and token != '.')): yield self.make_token(tokenize.NUMBER, token, spos, epos, line) elif initial in '\r\n': yield self.make_token( tokenize.NL if self.parenlev > 0 else tokenize.NEWLINE, token, spos, epos, line) elif initial == '#': yield self.make_token(tokenize.COMMENT, token, spos, epos, line) elif token in tokenize.triple_quoted: self.endprog = tokenize.endprogs[token] endmatch = self.endprog.match(line, pos) if endmatch: pos = endmatch.end(0) token = line[start:pos] yield self.make_token(tokenize.STRING, token, spos, (self.lnum, pos), line) else: self.strstart = (self.lnum, start) self.contstr = line[start:] self.contline = line break elif ((initial in tokenize.single_quoted) or (token[:2] in tokenize.single_quoted) or (token[:3] in tokenize.single_quoted)): if token[-1] == '\n': self.strstart = (self.lnum, start) self.endprog = (tokenize.endprogs[initial] or tokenize.endprogs[token[1]] or tokenize.endprogs[token[2]]) self.contstr = line[start:] self.needcont = 1 self.contline = line break else: yield self.make_token(tokenize.STRING, token, spos, epos, line) elif ((hasattr(initial, 'isidentifier') and initial.isidentifier()) or (initial in namechars)): yield self.make_token(tokenize.NAME, token, spos, epos, line) elif initial == '\\': self.continued = 1 elif token == '!': yield self.make_token(BANG, token, spos, epos, line) else: if initial in '([{': self.parenlev += 1 elif initial in '}])': self.parenlev -= 1 yield self.make_token(tokenize.OP, token, spos, epos, line) if (token == ':' and self.in_mysuite and self.parenlev == 0): cand_token = line[pos:].strip() if cand_token: token_with_ws_len = len( line[pos:].rstrip('\r\n')) while line[pos].isspace(): pos += 1 yield self.make_token( MYSUITE, cand_token, (self.lnum, pos), (self.lnum, epos[1] + token_with_ws_len), line) pos += len(cand_token) self.in_mysuite = False elif self.in_myexpr: open_delim = self.open_delim close_delim = CLOSERS[open_delim] myexpr_depth = 1 spos = epos myexpr_lns = [line] while 1: while pos < max_pos: if line[pos] == open_delim: myexpr_depth += 1 elif line[pos] == close_delim: myexpr_depth -= 1 pos += 1 if myexpr_depth <= 0: if close_delim in '}])': self.parenlev -= 1 break if myexpr_depth > 0: line = self.readline() myexpr_lns.append(line) pos, max_pos = 0, len(line) else: assert myexpr_depth == 0 break epos = self.lnum, pos if len(myexpr_lns) == 1: token = line[spos[1]:pos] else: token = "".join((myexpr_lns[0][spos[1]:], "".join(myexpr_lns[1:-2]), myexpr_lns[-1][:pos])) lines = "".join(myexpr_lns) yield self.make_token(MYEXPR, token, spos, epos, lines) self.in_myexpr = False del self.open_delim else: yield self.make_token( tokenize.ERRORTOKEN, line[pos] if pos < max_pos else '', (self.lnum, pos), (self.lnum, pos + 1), line) pos += 1 for indent in self.indents[1:]: yield self.make_token(tokenize.DEDENT, '', (self.lnum, 0), (self.lnum, 0), '') yield self.make_token(tokenize.ENDMARKER, '', (self.lnum, 0), (self.lnum, 0), '')
def __init__(self, message, location, start): tokenize.TokenError()