Esempio n. 1
0
    def LookAhead(self, lex_mode):
        # type: (lex_mode_t) -> Id_t
        """Look ahead for a non-space token, using the given lexer mode.

    Does NOT advance self.line_pos.

    Called with at least the following modes:
      lex_mode_e.Arith -- for ${a[@]} vs ${a[1+2]}
      lex_mode_e.VSub_1
      lex_mode_e.ShCommand
    """
        pos = self.line_pos
        n = len(self.line)
        #print('Look ahead from pos %d, line %r' % (pos,self.line))
        while True:
            if pos == n:
                # We don't allow lookahead while already at end of line, because it
                # would involve interacting with the line reader, and we never need
                # it.  In the OUTER mode, there is an explicit newline token, but
                # ARITH doesn't have it.
                return Id.Unknown_Tok

            tok_type, end_pos = match.OneToken(lex_mode, self.line, pos)

            # NOTE: Instead of hard-coding this token, we could pass it in.  This
            # one only appears in OUTER state!  LookAhead(lex_mode, past_token_type)
            if tok_type != Id.WS_Space:
                break
            pos = end_pos

        return tok_type
Esempio n. 2
0
  def Read(self, lex_mode):
    # type: (lex_mode_t) -> token
    # Inner loop optimization
    line = self.line
    line_pos = self.line_pos

    tok_type, end_pos = match.OneToken(lex_mode, line, line_pos)
    if tok_type == Id.Eol_Tok:  # Do NOT add a span for this sentinel!
      return token(tok_type, '', runtime.NO_SPID)

    tok_val = line[line_pos:end_pos]

    # NOTE: We're putting the arena hook in LineLexer and not Lexer because we
    # want it to be "low level".  The only thing fabricated here is a newline
    # added at the last line, so we don't end with \0.

    if self.arena_skip:  # make another token from the last span
      assert self.last_span_id != runtime.NO_SPID
      span_id = self.last_span_id
      self.arena_skip = False
    else:
      span_id = self.arena.AddLineSpan(self.line_id, line_pos, len(tok_val))
      self.last_span_id = span_id
    #log('LineLexer.Read() span ID %d for %s', span_id, tok_type)

    t = token(tok_type, tok_val, span_id)
    self.line_pos = end_pos
    return t
Esempio n. 3
0
    def Read(self, lex_mode):
        # type: (lex_mode_t) -> Token
        # Inner loop optimization
        line = self.line
        line_pos = self.line_pos

        tok_type, end_pos = match.OneToken(lex_mode, line, line_pos)
        if tok_type == Id.Eol_Tok:  # Do NOT add a span for this sentinel!
            return _EOL_TOK

        # Save on allocations!  We often don't look at the token value.
        # TODO: can inline this function with formula on 16-bit Id.
        kind = consts.GetKind(tok_type)

        # Whitelist doesn't work well?  Use blacklist for now.
        # - Kind.KW is sometimes a literal in a word
        # - Kind.Right is for " in here docs.  Lexer isn't involved.
        # - Got an error with Kind.Left too that I don't understand
        # if kind in (Kind.Lit, Kind.VSub, Kind.Redir, Kind.Char, Kind.Backtick, Kind.KW, Kind.Right):

        if kind in (Kind.Arith, Kind.Op, Kind.WS, Kind.Ignored, Kind.Eof):
            tok_val = None  # type: Optional[str]
        else:
            tok_val = line[line_pos:end_pos]
        # NOTE: We're putting the arena hook in LineLexer and not Lexer because we
        # want it to be "low level".  The only thing fabricated here is a newline
        # added at the last line, so we don't end with \0.

        if self.arena_skip:  # make another token from the last span
            assert self.last_span_id != runtime.NO_SPID
            span_id = self.last_span_id
            self.arena_skip = False
        else:
            tok_len = end_pos - line_pos
            span_id = self.arena.AddLineSpan(self.line_id, line_pos, tok_len)
            self.last_span_id = span_id
        #log('LineLexer.Read() span ID %d for %s', span_id, tok_type)

        t = Token(tok_type, span_id, tok_val)
        self.line_pos = end_pos
        return t