def _ReadLikeDQ(self, left_dq_token, out_parts): # type: (Optional[token], List[word_part_t]) -> None """ Args: left_dq_token: A token if we are reading a double quoted part, or None if we're reading a here doc. out_parts: list of word_part to append to """ done = False while not done: self._Next(lex_mode_e.DQ) self._Peek() if self.token_kind == Kind.Lit: if self.token_type == Id.Lit_EscapedChar: part = word_part.EscapedLiteralPart( self.cur_token) # type: word_part_t else: part = word_part.LiteralPart(self.cur_token) out_parts.append(part) elif self.token_kind == Kind.Left: part = self._ReadDoubleQuotedLeftParts() out_parts.append(part) elif self.token_kind == Kind.VSub: part = word_part.SimpleVarSub(self.cur_token) out_parts.append(part) elif self.token_kind == Kind.Right: assert self.token_type == Id.Right_DoubleQuote, self.token_type if left_dq_token: done = True else: # In a here doc, the right quote is literal! out_parts.append(word_part.LiteralPart(self.cur_token)) elif self.token_kind == Kind.Eof: if left_dq_token: p_die( 'Unexpected EOF reading double-quoted string that began here', token=left_dq_token) else: # here docs will have an EOF in their token stream done = True else: raise AssertionError(self.cur_token)
def _ReadArithWord(self): # type: () -> Tuple[word_t, bool] """Helper function for ReadArithWord.""" self._Peek() if self.token_kind == Kind.Unknown: p_die('Unexpected token in arithmetic context', token=self.cur_token) elif self.token_kind == Kind.Eof: # Just return EOF token w = osh_word.TokenWord(self.cur_token) # type: word_t return w, False elif self.token_kind == Kind.Ignored: # Space should be ignored. TODO: change this to SPACE_SPACE and # SPACE_NEWLINE? or SPACE_TOK. self._Next(lex_mode_e.Arith) return None, True # Tell wrapper to try again elif self.token_kind in (Kind.Arith, Kind.Right): # Id.Right_ArithSub IS just a normal token, handled by ArithParser self._Next(lex_mode_e.Arith) w = osh_word.TokenWord(self.cur_token) return w, False elif self.token_kind in (Kind.Lit, Kind.Left): w = self._ReadCompoundWord(lex_mode=lex_mode_e.Arith) return w, False elif self.token_kind == Kind.VSub: part = word_part.SimpleVarSub(self.cur_token) self._Next(lex_mode_e.Arith) w = osh_word.CompoundWord([part]) return w, False else: assert False, ("Unexpected token parsing arith sub: %s" % self.cur_token) raise AssertionError("Shouldn't get here")
def _ReadCompoundWord(self, eof_type=Id.Undefined_Tok, lex_mode=lex_mode_e.Outer, empty_ok=True): # type: (Id_t, lex_mode_t, bool) -> word__CompoundWord """ Precondition: Looking at the first token of the first word part Postcondition: Looking at the token after, e.g. space or operator NOTE: eof_type is necessary because / is a literal, i.e. Lit_Slash, but it could be an operator delimiting a compound word. Can we change lexer modes and remove this special case? """ word = osh_word.CompoundWord() num_parts = 0 done = False while not done: self._Peek() allow_done = empty_ok or num_parts != 0 if allow_done and self.token_type == eof_type: done = True # e.g. for ${foo//pat/replace} # Keywords like "for" are treated like literals elif self.token_kind in (Kind.Lit, Kind.History, Kind.KW, Kind.Assign, Kind.ControlFlow, Kind.BoolUnary, Kind.BoolBinary): if self.token_type == Id.Lit_EscapedChar: part = word_part.EscapedLiteralPart( self.cur_token) # type: word_part_t else: part = word_part.LiteralPart(self.cur_token) word.parts.append(part) if self.token_type == Id.Lit_VarLike: # foo= t = self.lexer.LookAhead(lex_mode_e.Outer) if t.id == Id.Op_LParen: self.lexer.PushHint(Id.Op_RParen, Id.Right_ArrayLiteral) part2 = self._ReadArrayLiteralPart() word.parts.append(part2) elif self.token_kind == Kind.VSub: part = word_part.SimpleVarSub(self.cur_token) word.parts.append(part) elif self.token_kind == Kind.ExtGlob: part = self._ReadExtGlobPart() word.parts.append(part) elif self.token_kind == Kind.Left: part = self._ReadLeftParts() word.parts.append(part) # NOT done yet, will advance below elif self.token_kind == Kind.Right: # Still part of the word; will be done on the next iter. if self.token_type == Id.Right_DoubleQuote: pass elif self.token_type == Id.Right_CommandSub: pass elif self.token_type == Id.Right_Subshell: # LEXER HACK for (case x in x) ;; esac ) assert self.next_lex_mode is None # Rewind before it's used if self.lexer.MaybeUnreadOne(): self.lexer.PushHint(Id.Op_RParen, Id.Right_Subshell) self._Next(lex_mode) done = True else: done = True elif self.token_kind == Kind.Ignored: done = True else: # LEXER HACK for unbalanced case clause. 'case foo in esac' is valid, # so to test for ESAC, we can read ) before getting a chance to # PushHint(Id.Op_RParen, Id.Right_CasePat). So here we unread one # token and do it again. # We get Id.Op_RParen at top level: case x in x) ;; esac # We get Id.Eof_RParen inside ComSub: $(case x in x) ;; esac ) if self.token_type in (Id.Op_RParen, Id.Eof_RParen): assert self.next_lex_mode is None # Rewind before it's used if self.lexer.MaybeUnreadOne(): if self.token_type == Id.Eof_RParen: # Redo translation self.lexer.PushHint(Id.Op_RParen, Id.Eof_RParen) self._Next(lex_mode) done = True # anything we don't recognize means we're done if not done: self._Next(lex_mode) num_parts += 1 return word