def _ReadLikeDQ(self, left_dq_token, out_parts): # type: (Optional[token], List[word_part_t]) -> None """ Args: left_dq_token: A token if we are reading a double quoted part, or None if we're reading a here doc. out_parts: list of word_part to append to """ done = False while not done: self._Next(lex_mode_e.DQ) self._Peek() if self.token_kind == Kind.Lit: if self.token_type == Id.Lit_EscapedChar: part = word_part.EscapedLiteralPart( self.cur_token) # type: word_part_t else: part = word_part.LiteralPart(self.cur_token) out_parts.append(part) elif self.token_kind == Kind.Left: part = self._ReadDoubleQuotedLeftParts() out_parts.append(part) elif self.token_kind == Kind.VSub: part = word_part.SimpleVarSub(self.cur_token) out_parts.append(part) elif self.token_kind == Kind.Right: assert self.token_type == Id.Right_DoubleQuote, self.token_type if left_dq_token: done = True else: # In a here doc, the right quote is literal! out_parts.append(word_part.LiteralPart(self.cur_token)) elif self.token_kind == Kind.Eof: if left_dq_token: p_die( 'Unexpected EOF reading double-quoted string that began here', token=left_dq_token) else: # here docs will have an EOF in their token stream done = True else: raise AssertionError(self.cur_token)
def _ReadCompoundWord(self, eof_type=Id.Undefined_Tok, lex_mode=lex_mode_e.Outer, empty_ok=True): # type: (Id_t, lex_mode_t, bool) -> word__CompoundWord """ Precondition: Looking at the first token of the first word part Postcondition: Looking at the token after, e.g. space or operator NOTE: eof_type is necessary because / is a literal, i.e. Lit_Slash, but it could be an operator delimiting a compound word. Can we change lexer modes and remove this special case? """ word = osh_word.CompoundWord() num_parts = 0 done = False while not done: self._Peek() allow_done = empty_ok or num_parts != 0 if allow_done and self.token_type == eof_type: done = True # e.g. for ${foo//pat/replace} # Keywords like "for" are treated like literals elif self.token_kind in (Kind.Lit, Kind.History, Kind.KW, Kind.Assign, Kind.ControlFlow, Kind.BoolUnary, Kind.BoolBinary): if self.token_type == Id.Lit_EscapedChar: part = word_part.EscapedLiteralPart( self.cur_token) # type: word_part_t else: part = word_part.LiteralPart(self.cur_token) word.parts.append(part) if self.token_type == Id.Lit_VarLike: # foo= t = self.lexer.LookAhead(lex_mode_e.Outer) if t.id == Id.Op_LParen: self.lexer.PushHint(Id.Op_RParen, Id.Right_ArrayLiteral) part2 = self._ReadArrayLiteralPart() word.parts.append(part2) elif self.token_kind == Kind.VSub: part = word_part.SimpleVarSub(self.cur_token) word.parts.append(part) elif self.token_kind == Kind.ExtGlob: part = self._ReadExtGlobPart() word.parts.append(part) elif self.token_kind == Kind.Left: part = self._ReadLeftParts() word.parts.append(part) # NOT done yet, will advance below elif self.token_kind == Kind.Right: # Still part of the word; will be done on the next iter. if self.token_type == Id.Right_DoubleQuote: pass elif self.token_type == Id.Right_CommandSub: pass elif self.token_type == Id.Right_Subshell: # LEXER HACK for (case x in x) ;; esac ) assert self.next_lex_mode is None # Rewind before it's used if self.lexer.MaybeUnreadOne(): self.lexer.PushHint(Id.Op_RParen, Id.Right_Subshell) self._Next(lex_mode) done = True else: done = True elif self.token_kind == Kind.Ignored: done = True else: # LEXER HACK for unbalanced case clause. 'case foo in esac' is valid, # so to test for ESAC, we can read ) before getting a chance to # PushHint(Id.Op_RParen, Id.Right_CasePat). So here we unread one # token and do it again. # We get Id.Op_RParen at top level: case x in x) ;; esac # We get Id.Eof_RParen inside ComSub: $(case x in x) ;; esac ) if self.token_type in (Id.Op_RParen, Id.Eof_RParen): assert self.next_lex_mode is None # Rewind before it's used if self.lexer.MaybeUnreadOne(): if self.token_type == Id.Eof_RParen: # Redo translation self.lexer.PushHint(Id.Op_RParen, Id.Eof_RParen) self._Next(lex_mode) done = True # anything we don't recognize means we're done if not done: self._Next(lex_mode) num_parts += 1 return word