def _process_leading_whitespace(self, token: TokenInfo): if not self.scopes: if token.is_WS_NL or token.type == tk.INDENT: return token, REPLACE else: return TokenInfo(type=tk.WHITESPACE, string=""), INSERT if token.is_WS: orig_lws = token.string action = REPLACE elif not token.is_NL: orig_lws = "" action = INSERT else: return token, REPLACE indentation = (self.spaced_indent_str * self.scopes[-1].indent_level + self.scopes[-1].leading_whitespace) orig_lws = self._to_spaced(orig_lws) if token.leading_whitespace is not None: orig_parent_lws = self._to_spaced(token.leading_whitespace) if orig_lws.startswith(orig_parent_lws): indentation += orig_lws[len(orig_parent_lws):] else: indentation = indentation[:len(orig_lws) - len(orig_parent_lws)] indentation = self._restore_tabbed(indentation) return TokenInfo(type=tk.WHITESPACE, string=indentation), action
def r(ctx: Context, token: TokenInfo): if ctx.pop_op()[0].annotation == A.CLS_BODY_LSQB: ctx.pop_state() ctx.push_state(State.EXPECT_SUBCLS_DOT) sentinel = TokenInfo.new_sentinel_before(token, A.STMT_END) ctx.push_ret(sentinel) token.annotation = A.CLS_BODY_RSQB ctx.push_ret(token) return actions.Default(dont_store=True)
def r(ctx: Context, token: TokenInfo): sentinel = TokenInfo.new_sentinel_before(token, A.STMT_END) ctx.push_ret(sentinel) token.annotation = A.STMT_COMMA ctx.push_ret(token) sentinel = TokenInfo.new_sentinel_after(token, A.STMT_START) ctx.push_ret(sentinel) return actions.Default(dont_store=True)
def r(ctx: Context, token: TokenInfo): ctx.pop_state() ctx.push_state(State.IN_LBDX_BODY_LIST) token.annotation = A.BODY_LSQB ctx.push_op(token) ctx.push_ret(token) sentinel = TokenInfo.new_sentinel_after(token, A.STMT_START) ctx.push_ret(sentinel) return actions.Default(dont_store=True)
def r(ctx: Context, token: TokenInfo): if ctx.last_op[0].annotation not in (A.BODY_LSQB, A.CLS_BODY_LSQB): return sentinel = TokenInfo.new_sentinel_before(token, A.STMT_END) ctx.push_ret(sentinel) token.annotation = A.STMT_COMMA ctx.push_ret(token) sentinel = TokenInfo.new_sentinel_after(token, A.STMT_START) ctx.push_ret(sentinel) return actions.Default(dont_store=True)
def _handle_token(self, token: TokenInfo): if token.annotation == A.DECL_LPAR: self.scope_stack.append(token) elif token.annotation == A.DECL_RPAR: self.scope_stack.pop() elif not self.scope_stack: return if token.is_NL: self.newlined = True return if token.is_WS: if self.newlined: self.leading = True if not self.buffering: self.action = actions.StartBuffer() self.newlined = False return if not token.is_CMT: if self.buffering: self.action = actions.StopBuffer() self.leading = False self.newlined = False return if not self.buffering and not self.newlined: yield TokenInfo(tk.WHITESPACE, " ") yield token self.action = actions.Default(dont_store=True) return if self.buffering: if any("\\" in x.string for x in self.buffer) or self.leading: self.action = actions.StopBuffer() else: yield TokenInfo(tk.WHITESPACE, " ") yield token self.action = actions.StopBuffer(dont_store=True, dont_yield_buffer=True) self.leading = False self.newlined = False return self.newlined = False
def r(ctx: Context, token: TokenInfo): ctx.pop_state() sentinel = TokenInfo.new_sentinel_before(token, A.STMT_END) ctx.push_ret(sentinel) token.annotation = A.STMT_COMMA ctx.push_ret(token) sentinel = TokenInfo.new_sentinel_after(token, A.STMT_START) ctx.push_ret(sentinel) if ctx.is_buffering(): return actions.StopBuffer(dont_store=True) return actions.Default(dont_store=True)
def _handle_token(self, token: TokenInfo): ws_start = ws_end = None last_token = self.last_token if last_token is TokenInfo.fake or last_token.type == tk.ENCODING: pass elif last_token.is_WS_NL: # NEWLINE or CONTINUE assert token.start[0] == last_token.end[0] + 1 if token.start[1] != 0: ws_start = (token.start[0], 0) ws_end = token.start elif last_token.end != token.start: assert last_token.end[0] == token.start[0] ws_start = last_token.end ws_end = token.start if ws_start is not None: whitespace_token = TokenInfo( tk.WHITESPACE, token.line[ws_start[1]:ws_end[1]], ws_start, ws_end, token.line, ) yield whitespace_token self.last_token = token yield token self.action = actions.Default(dont_store=True)
def r(ctx: Context, token: TokenInfo): directive = token.lxfmt_directive() if directive == 'off' and ctx.last_state != State.DISABLED: ctx.push_state(State.DISABLED) elif directive == 'on' and ctx.last_state == State.DISABLED: ctx.pop_state()
def _handle_token(self, token: TokenInfo): if token.annotation == A.STMT_END and not self.buffering: self.action = actions.StartBuffer() elif self.buffering and token.annotation in ( A.BODY_RSQB, A.CLS_BODY_RSQB, A.STMT_START, ): if token.annotation != A.STMT_START: pos = self.insert_last_stmt_at if pos is None: annotation = A.LAST_STMT_WITHOUT_COMMA else: annotation = A.LAST_STMT_WITH_COMMA pos = pos or 1 yield from self.buffer[:pos] yield TokenInfo.new_sentinel_after(self.buffer[pos - 1], annotation) yield from self.buffer[pos:] else: yield from self.buffer self.insert_last_stmt_at = None self.action = actions.StopBuffer(dont_yield_buffer=True) elif self.buffering and token == A.STMT_START: self.insert_last_stmt_at = None self.action = actions.StopBuffer()
def _handle_token(self, token: TokenInfo): if token.annotation == A.DECL_LPAR: self.scope_stack.append(token) elif token.annotation == A.DECL_RPAR: self.scope_stack.pop() elif not self.scope_stack: return if token.is_WS_NL: if not self.buffering: self.action = actions.StartBuffer() return if token.annotation in NORMALIZE_WHITESPACE_BEFORE: replacement = NORMALIZE_WHITESPACE_BEFORE[token.annotation] if (self.prev_non_ws_token.annotation != A.CLS_HEAD_RSQB and token.annotation == A.CLS_BODY_LSQB): replacement = "" whitespace = TokenInfo( type=tk.WHITESPACE, string=replacement, ) yield whitespace if self.buffering: self.action = actions.StopBuffer(dont_yield_buffer=True) elif self.buffering: self.action = actions.StopBuffer() self.prev_non_ws_token = token
def r(ctx: Context, token: TokenInfo): if ctx.is_buffering(): return actions.StopBuffer(dont_consume=True) ctx.pop_state() ctx.push_state(State.IN_LBDX_CLS_BODY) token.annotation = A.CLS_BODY_LSQB ctx.push_op(token) _annotate_clause_declarer(ctx) ctx.cache = None ctx.push_ret(token) sentinel = TokenInfo.new_sentinel_after(token, A.STMT_START) ctx.push_ret(sentinel) return actions.Default(dont_store=True)
def r(ctx: Context, token: TokenInfo): ctx.pop_state() ctx.cache.annotation = A.DECL ctx.push_state(State.IN_LBDX_CALL) token.annotation = A.DECL_LPAR ctx.push_op(token) return actions.StopBuffer()
def r(ctx: Context, token: TokenInfo): t, state = ctx.pop_op() if state not in (State.EXPECT_LBDX_LSQB, State.IN_LBDX_CALL) or not tk.ISMATCHED(t, token): ctx.error() if t.annotation == A.DECL_LPAR: ctx.pop_state() ctx.pop_state() token.annotation = A.DECL_RPAR
def r(ctx: Context, token: TokenInfo): token.annotation = A.CLS_HEAD_LSQB ctx.push_op(token) ctx.pop_state() ctx.push_state(State.IN_LBDX_CLS_HEAD) _annotate_clause_declarer(ctx) ctx.cache = None if ctx.is_buffering(): return actions.StopBuffer()
def _handle_token(self, token: TokenInfo): if self.newlined: if token.is_WS and self.last_token.annotation in ( A.STMT_START, A.CLS_HEAD_LSQB, ): token.leading_whitespace = self.last_leading_whitespace elif token.is_WS: self.last_leading_whitespace = token.string else: self.last_leading_whitespace = '' self.newlined = False if token.is_NL: self.newlined = True return ()
def _handle_token(self, token: TokenInfo): if token.annotation == A.DECL_LPAR: self.scope_stack.append(token) elif token.annotation == A.DECL_RPAR: self.scope_stack.pop() elif not self.scope_stack: return if token.is_WS_NL: if not self.buffering: self.action = actions.StartBuffer() return if token.annotation in NORMALIZE_WHITESPACE_BEFORE: whitespace = TokenInfo( type=tk.WHITESPACE, string=NORMALIZE_WHITESPACE_BEFORE[token.annotation], ) yield whitespace if self.buffering: self.action = actions.StopBuffer(dont_yield_buffer=True) elif self.buffering: self.action = actions.StopBuffer()
def _tokenize(readline, encoding): lnum = parenlev = continued = 0 numchars = '0123456789' contstr, needcont = '', 0 contline = None indents = [0] if encoding is not None: if encoding == "utf-8-sig": # BOM will already have been stripped. encoding = "utf-8" yield TokenInfo(ENCODING, encoding, (0, 0), (0, 0), '') last_line = b'' line = b'' while True: # loop over lines in stream try: # We capture the value of the line variable here because # readline uses the empty string '' to signal end of input, # hence `line` itself will always be overwritten at the end # of this loop. last_line = line line = readline() except StopIteration: line = b'' if encoding is not None: line = line.decode(encoding) lnum += 1 pos, max = 0, len(line) if contstr: # continued string if not line: raise TokenError("EOF in multi-line string", strstart) endmatch = endprog.match(line) if endmatch: pos = end = endmatch.end(0) yield TokenInfo(STRING, contstr + line[:end], strstart, (lnum, end), contline + line) contstr, needcont = '', 0 contline = None elif needcont and line[-2:] != '\\\n' and line[-3:] != '\\\r\n': yield TokenInfo(ERRORTOKEN, contstr + line, strstart, (lnum, len(line)), contline) contstr = '' contline = None continue else: contstr = contstr + line contline = contline + line continue elif parenlev == 0 and not continued: # new statement if not line: break column = 0 while pos < max: # measure leading whitespace if line[pos] == ' ': column += 1 elif line[pos] == '\t': column = (column // tabsize + 1) * tabsize elif line[pos] == '\f': column = 0 else: break pos += 1 if pos == max: break if line[pos] in '#\r\n': # skip comments or blank lines if line[pos] == '#': comment_token = line[pos:].rstrip('\r\n') yield TokenInfo(COMMENT, comment_token, (lnum, pos), (lnum, pos + len(comment_token)), line) pos += len(comment_token) yield TokenInfo(NL, line[pos:], (lnum, pos), (lnum, len(line)), line) continue if column > indents[-1]: # count indents or dedents indents.append(column) yield TokenInfo(INDENT, line[:pos], (lnum, 0), (lnum, pos), line) while column < indents[-1]: if column not in indents: raise IndentationError( "unindent does not match any outer indentation level", ("<tokenize>", lnum, pos, line)) indents = indents[:-1] yield TokenInfo(DEDENT, '', (lnum, pos), (lnum, pos), line) else: # continued statement if not line: raise TokenError("EOF in multi-line statement", (lnum, 0)) continued = 0 while pos < max: pseudomatch = _compile(PseudoToken).match(line, pos) if pseudomatch: # scan for tokens start, end = pseudomatch.span(1) spos, epos, pos = (lnum, start), (lnum, end), end if start == end: continue token, initial = line[start:end], line[start] if (initial in numchars or # ordinary number (initial == '.' and token != '.' and token != '...')): yield TokenInfo(NUMBER, token, spos, epos, line) elif initial in '\r\n': if parenlev > 0: yield TokenInfo(NL, token, spos, epos, line) else: yield TokenInfo(NEWLINE, token, spos, epos, line) elif initial == '#': assert not token.endswith("\n") yield TokenInfo(COMMENT, token, spos, epos, line) elif token in triple_quoted: endprog = _compile(endpats[token]) endmatch = endprog.match(line, pos) if endmatch: # all on one line pos = endmatch.end(0) token = line[start:pos] yield TokenInfo(STRING, token, spos, (lnum, pos), line) else: strstart = (lnum, start) # multiple lines contstr = line[start:] contline = line break # Check up to the first 3 chars of the token to see if # they're in the single_quoted set. If so, they start # a string. # We're using the first 3, because we're looking for # "rb'" (for example) at the start of the token. If # we switch to longer prefixes, this needs to be # adjusted. # Note that initial == token[:1]. # Also note that single quote checking must come after # triple quote checking (above). elif (initial in single_quoted or token[:2] in single_quoted or token[:3] in single_quoted): if token[-1] == '\n': # continued string strstart = (lnum, start) # Again, using the first 3 chars of the # token. This is looking for the matching end # regex for the correct type of quote # character. So it's really looking for # endpats["'"] or endpats['"'], by trying to # skip string prefix characters, if any. endprog = _compile( endpats.get(initial) or endpats.get(token[1]) or endpats.get(token[2])) contstr, needcont = line[start:], 1 contline = line break else: # ordinary string yield TokenInfo(STRING, token, spos, epos, line) elif initial.isidentifier(): # ordinary name yield TokenInfo(NAME, token, spos, epos, line) elif initial == '\\': # continued stmt continued = 1 yield TokenInfo(WHITESPACE, token, spos, epos, line) else: if initial in '([{': parenlev += 1 elif initial in ')]}': parenlev -= 1 yield TokenInfo(OP, token, spos, epos, line) else: yield TokenInfo(ERRORTOKEN, line[pos], (lnum, pos), (lnum, pos + 1), line) pos += 1 # Add an implicit NEWLINE if the input doesn't end in one if last_line and last_line[-1] not in '\r\n': yield TokenInfo(NEWLINE, '', (lnum - 1, len(last_line)), (lnum - 1, len(last_line) + 1), '') for indent in indents[1:]: # pop remaining indent levels yield TokenInfo(DEDENT, '', (lnum, 0), (lnum, 0), '') yield TokenInfo(ENDMARKER, '', (lnum, 0), (lnum, 0), '')
def r(ctx: Context, token: TokenInfo): ctx.push_state(State.IN_LBDX_LAMBDA) token.annotation = A.DECL_LAMBDA
def r(ctx: Context, token: TokenInfo): if ctx.last_op[0].annotation != A.DECL_LPAR: ctx.error() token.annotation = A.DECL_ARG_COMMA
def r(ctx: Context, token: TokenInfo): ctx.pop_state() ctx.cache[0].annotation = A.AUGASSIGN_START token.annotation = A.AUGASSIGN_END return actions.StopBuffer()
def r(ctx: Context, token: TokenInfo): if ctx.pop_op()[1] == State.EXPECT_CLS_HEAD_LSQB: ctx.pop_state() ctx.push_state(State.EXPECT_CLS_BODY_LSQB) token.annotation = A.CLS_HEAD_RSQB
def _ensure_NL_exists_in_buffer(self): if not self.last_has_newline: self.last_NL = TokenInfo(tk.NL, "\n") self.buffer.append(self.last_NL)