def LookAhead(self, lex_mode): # type: (lex_mode_t) -> Id_t """Look ahead for a non-space token, using the given lexer mode. Does NOT advance self.line_pos. Called with at least the following modes: lex_mode_e.Arith -- for ${a[@]} vs ${a[1+2]} lex_mode_e.VSub_1 lex_mode_e.ShCommand """ pos = self.line_pos n = len(self.line) #print('Look ahead from pos %d, line %r' % (pos,self.line)) while True: if pos == n: # We don't allow lookahead while already at end of line, because it # would involve interacting with the line reader, and we never need # it. In the OUTER mode, there is an explicit newline token, but # ARITH doesn't have it. return Id.Unknown_Tok tok_type, end_pos = match.OneToken(lex_mode, self.line, pos) # NOTE: Instead of hard-coding this token, we could pass it in. This # one only appears in OUTER state! LookAhead(lex_mode, past_token_type) if tok_type != Id.WS_Space: break pos = end_pos return tok_type
def Read(self, lex_mode): # type: (lex_mode_t) -> token # Inner loop optimization line = self.line line_pos = self.line_pos tok_type, end_pos = match.OneToken(lex_mode, line, line_pos) if tok_type == Id.Eol_Tok: # Do NOT add a span for this sentinel! return token(tok_type, '', runtime.NO_SPID) tok_val = line[line_pos:end_pos] # NOTE: We're putting the arena hook in LineLexer and not Lexer because we # want it to be "low level". The only thing fabricated here is a newline # added at the last line, so we don't end with \0. if self.arena_skip: # make another token from the last span assert self.last_span_id != runtime.NO_SPID span_id = self.last_span_id self.arena_skip = False else: span_id = self.arena.AddLineSpan(self.line_id, line_pos, len(tok_val)) self.last_span_id = span_id #log('LineLexer.Read() span ID %d for %s', span_id, tok_type) t = token(tok_type, tok_val, span_id) self.line_pos = end_pos return t
def Read(self, lex_mode): # type: (lex_mode_t) -> Token # Inner loop optimization line = self.line line_pos = self.line_pos tok_type, end_pos = match.OneToken(lex_mode, line, line_pos) if tok_type == Id.Eol_Tok: # Do NOT add a span for this sentinel! return _EOL_TOK # Save on allocations! We often don't look at the token value. # TODO: can inline this function with formula on 16-bit Id. kind = consts.GetKind(tok_type) # Whitelist doesn't work well? Use blacklist for now. # - Kind.KW is sometimes a literal in a word # - Kind.Right is for " in here docs. Lexer isn't involved. # - Got an error with Kind.Left too that I don't understand # if kind in (Kind.Lit, Kind.VSub, Kind.Redir, Kind.Char, Kind.Backtick, Kind.KW, Kind.Right): if kind in (Kind.Arith, Kind.Op, Kind.WS, Kind.Ignored, Kind.Eof): tok_val = None # type: Optional[str] else: tok_val = line[line_pos:end_pos] # NOTE: We're putting the arena hook in LineLexer and not Lexer because we # want it to be "low level". The only thing fabricated here is a newline # added at the last line, so we don't end with \0. if self.arena_skip: # make another token from the last span assert self.last_span_id != runtime.NO_SPID span_id = self.last_span_id self.arena_skip = False else: tok_len = end_pos - line_pos span_id = self.arena.AddLineSpan(self.line_id, line_pos, tok_len) self.last_span_id = span_id #log('LineLexer.Read() span ID %d for %s', span_id, tok_type) t = Token(tok_type, span_id, tok_val) self.line_pos = end_pos return t