def _ThreeArgs(w_parser): # type: (_StringWordEmitter) -> bool_expr_t """Returns an expression tree to be evaluated.""" w0 = w_parser.Read() w1 = w_parser.Read() w2 = w_parser.Read() # NOTE: Order is important here. binary_id = match.BracketBinary(w1.s) if binary_id != Id.Undefined_Tok: return bool_expr.Binary(binary_id, w0, w2) if w1.s == '-a': return bool_expr.LogicalAnd(bool_expr.WordTest(w0), bool_expr.WordTest(w2)) if w1.s == '-o': return bool_expr.LogicalOr(bool_expr.WordTest(w0), bool_expr.WordTest(w2)) if w0.s == '!': w_parser.Rewind(2) child = _TwoArgs(w_parser) return bool_expr.LogicalNot(child) if w0.s == '(' and w2.s == ')': return bool_expr.WordTest(w1) p_die('Expected binary operator, got %r (3 args)', w1.s, word=w1)
def _ParseStep(self): # type: () -> int self._Next() # past Dots step = int(self._Eat(Id.Range_Int)) if step == 0: p_die("Step can't be 0", span_id=self.span_id) return step
def LeftIndex(p, w, left, unused_bp): # type: (TdopParser, word_t, arith_expr_t, int) -> arith_expr_t """Array indexing, in both LValue and RValue context. LValue: f[0] = 1 f[x+1] = 2 RValue: a = f[0] b = f[x+1] On RHS, you can have: 1. a = f[0] 2. a = f(x, y)[0] 3. a = f[0][0] # in theory, if we want character indexing? NOTE: a = f[0].charAt() is probably better On LHS, you can only have: 1. a[0] = 1 Nothing else is valid: 2. function calls return COPIES. They need a name, at least in osh. 3. strings don't have mutable characters. """ if not tdop.IsIndexable(left, p.parse_opts.parse_dynamic_arith()): p_die("The [ operator doesn't apply to this expression", word=w) index = p.ParseUntil(0) # ] has bp = -1 p.Eat(Id.Arith_RBracket) return arith_expr.Binary(word_.ArithId(w), left, index)
def _NameInRegex(self, negated_tok, tok): # type: (Token, Token) -> re_t if negated_tok: # For error messages negated_speck = speck(negated_tok.id, negated_tok.span_id) else: negated_speck = None val = tok.val if val == 'dot': if negated_tok: p_die("Can't negate this symbol", token=tok) return tok if val in POSIX_CLASSES: return posix_class(negated_speck, val) perl = PERL_CLASSES.get(val) if perl is not None: return perl_class(negated_speck, perl) if val[0].isupper(): # e.g. HexDigit return re.Splice(tok) p_die("%r isn't a character class", val, token=tok)
def _PlaceList(self, p_node): # type: (PNode) -> List[place_expr_t] """ place_list: expr (',' expr)* """ assert p_node.typ == grammar_nt.place_list places = [] # type: List[place_expr_t] n = len(p_node.children) for i in xrange(0, n, 2): # was children[::2] p = p_node.children[i] e = self.Expr(p) UP_e = e tag = e.tag_() if tag == expr_e.Var: # COMPATIBILITY hack e = cast(expr__Var, UP_e) places.append(place_expr.Var(e.name)) elif tag in (place_expr_e.Var, place_expr_e.Subscript, place_expr_e.Attribute): places.append(cast(place_expr_t, UP_e)) else: # This blame mechanism seems to work. Otherwise we don't have a method # to blame an arbitrary expr_t. p_die("Can't assign to this expression", token=p.tok if p.tok else None) return places
def _NameInClass(self, negated_tok, tok): # type: (Token, Token) -> class_literal_term_t """ Like the above, but 'dot' doesn't mean anything. And `d` is a literal 'd', not `digit`. """ if negated_tok: # For error messages negated_speck = speck(negated_tok.id, negated_tok.span_id) else: negated_speck = None val = tok.val # A bare, unquoted character literal. In the grammar, this is expressed as # range_char without an ending. # d is NOT 'digit', it's a literal 'd'! if len(val) == 1: # Expr_Name matches VAR_NAME_RE, which starts with [a-zA-Z_] assert tok.id in (Id.Expr_Name, Id.Expr_DecInt) if negated_tok: # [~d] is not allowed, only [~digit] p_die("Can't negate this symbol", token=tok) return class_literal_term.CharLiteral(tok) # digit, word, but not d, w, etc. if val in POSIX_CLASSES: return posix_class(negated_speck, val) perl = PERL_CLASSES.get(val) if perl is not None: return perl_class(negated_speck, perl) p_die("%r isn't a character class", val, token=tok)
def ParseUntil(self, rbp): # type: (int) -> arith_expr_t """ Parse to the right, eating tokens until we encounter a token with binding power LESS THAN OR EQUAL TO rbp. """ # TODO: use Kind.Eof if self.op_id in (Id.Eof_Real, Id.Eof_RParen, Id.Eof_Backtick): p_die('Unexpected end of input', word=self.cur_word) t = self.cur_word null_info = self.spec.LookupNud(self.op_id) self.Next() # skip over the token, e.g. ! ~ + - node = null_info.nud(self, t, null_info.bp) while True: t = self.cur_word left_info = self.spec.LookupLed(self.op_id) # Examples: # If we see 1*2+ , rbp = 27 and lbp = 25, so stop. # If we see 1+2+ , rbp = 25 and lbp = 25, so stop. # If we see 1**2**, rbp = 26 and lbp = 27, so keep going. if rbp >= left_info.lbp: break self.Next() # skip over the token, e.g. / * node = left_info.led(self, t, node, left_info.rbp) return node
def _TwoArgs(w_parser): # type: (_StringWordEmitter) -> bool_expr_t """Returns an expression tree to be evaluated.""" w0 = w_parser.Read() w1 = w_parser.Read() s0 = w0.s if s0 == '!': return bool_expr.LogicalNot(bool_expr.WordTest(w1)) unary_id = Id.Undefined_Tok # Oil's preferred long flags if w0.s.startswith('--'): if s0 == '--dir': unary_id = Id.BoolUnary_d elif s0 == '--exists': unary_id = Id.BoolUnary_e elif s0 == '--file': unary_id = Id.BoolUnary_f elif s0 == '--symlink': unary_id = Id.BoolUnary_L if unary_id == Id.Undefined_Tok: unary_id = match.BracketUnary(w0.s) if unary_id == Id.Undefined_Tok: p_die('Expected unary operator, got %r (2 args)', w0.s, word=w0) return bool_expr.Unary(unary_id, w1)
def Eat(self, token_type): # type: (Id_t) -> None """Assert that we're at the current token and advance.""" if not self.AtToken(token_type): p_die('Parser expected %s, got %s', ui.PrettyId(token_type), ui.PrettyId(self.op_id), word=self.cur_word) self.Next()
def ParseForBuiltin(self): # type: () -> bool_expr_t """For test builtin.""" self._Next() node = self.ParseExpr() if self.bool_id != Id.Eof_Real: p_die('Unexpected trailing word %s', word_.Pretty(self.cur_word), word=self.cur_word) return node
def Parse(self): # type: () -> bool_expr_t self._Next() node = self.ParseExpr() if self.op_id != Id.Lit_DRightBracket: #p_die("Expected ]], got %r", self.cur_word, word=self.cur_word) # NOTE: This might be better as unexpected token, since ]] doesn't always # make sense. p_die('Expected ]]', word=self.cur_word) return node
def _ParseFormatStr(self): # type: () -> printf_part_t self._Next(lex_mode_e.PrintfPercent) # move past % part = printf_part.Percent() while self.token_type in (Id.Format_Flag, Id.Format_Zero): # space and + could be implemented flag = self.cur_token.val if flag in '# +': p_die("osh printf doesn't support the %r flag", flag, token=self.cur_token) part.flags.append(self.cur_token) self._Next(lex_mode_e.PrintfPercent) if self.token_type in (Id.Format_Num, Id.Format_Star): part.width = self.cur_token self._Next(lex_mode_e.PrintfPercent) if self.token_type == Id.Format_Dot: part.precision = self.cur_token self._Next(lex_mode_e.PrintfPercent) # past dot if self.token_type in (Id.Format_Num, Id.Format_Star, Id.Format_Zero): part.precision = self.cur_token self._Next(lex_mode_e.PrintfPercent) if self.token_type in (Id.Format_Type, Id.Format_Time): part.type = self.cur_token # ADDITIONAL VALIDATION outside the "grammar". if part.type.val in 'eEfFgG': p_die("osh printf doesn't support floating point", token=part.type) # These two could be implemented. %c needs utf-8 decoding. if part.type.val == 'c': p_die("osh printf doesn't support single characters (bytes)", token=part.type) else: if self.cur_token.val: msg = 'Invalid printf format character' else: # for printf '%' msg = 'Expected a printf format character' p_die(msg, token=self.cur_token) # Do this check AFTER the floating point checks if part.precision and part.type.val[-1] not in 'fsT': p_die("printf precision can't be specified with type %r" % part.type.val, token=part.precision) return part
def _TwoArgs(w_parser): # type: (_StringWordEmitter) -> bool_expr_t """Returns an expression tree to be evaluated.""" w0 = w_parser.Read() w1 = w_parser.Read() if w0.s == '!': return bool_expr.LogicalNot(bool_expr.WordTest(w1)) unary_id = match.BracketUnary(w0.s) if unary_id == -1: # TODO: # - separate lookup by unary p_die('Expected unary operator, got %r (2 args)', w0.s, word=w0) return bool_expr.Unary(unary_id, w1)
def _ParseFormatStr(self): # type: () -> printf_part_t """ fmt production """ self._Next(lex_mode_e.PrintfPercent) # move past % part = printf_part.Percent() while self.token_type in (Id.Format_Flag, Id.Format_Zero): # space and + could be implemented flag = self.cur_token.val if flag in '# +': p_die("osh printf doesn't support the %r flag", flag, token=self.cur_token) part.flags.append(self.cur_token) self._Next(lex_mode_e.PrintfPercent) if self.token_type in (Id.Format_Num, Id.Format_Star): part.width = self.cur_token self._Next(lex_mode_e.PrintfPercent) if self.token_type == Id.Format_Dot: part.precision = self.cur_token self._Next(lex_mode_e.PrintfPercent) # past dot if self.token_type in (Id.Format_Num, Id.Format_Star, Id.Format_Zero): part.precision = self.cur_token self._Next(lex_mode_e.PrintfPercent) if self.token_type in (Id.Format_Type, Id.Format_Time): part.type = self.cur_token # ADDITIONAL VALIDATION outside the "grammar". if part.type.val in 'eEfFgG': p_die("osh printf doesn't support floating point", token=part.type) # These two could be implemented. %c needs utf-8 decoding. if part.type.val == 'c': p_die("osh printf doesn't support single characters (bytes)", token=part.type) elif self.token_type == Id.Unknown_Tok: p_die('Invalid printf format character', token=self.cur_token) else: p_die('Expected a printf format character', token=self.cur_token) return part
def _TwoArgs(w_parser): # type: (_StringWordEmitter) -> bool_expr_t """Returns an expression tree to be evaluated.""" w0 = w_parser.Read() # TODO: Implement --dir, --file, --exists here # --symlink, maybe --executable w1 = w_parser.Read() if w0.s == '!': return bool_expr.LogicalNot(bool_expr.WordTest(w1)) unary_id = match.BracketUnary(w0.s) if unary_id == Id.Undefined_Tok: # TODO: # - separate lookup by unary p_die('Expected unary operator, got %r (2 args)', w0.s, word=w0) return bool_expr.Unary(unary_id, w1)
def _Tuple(self, children): # type: (List[PNode]) -> expr_t n = len(children) # (x) -- not a tuple if n == 1: return self.Expr(children[0]) # x, and (x,) aren't allowed if n == 2: p_die('Write singleton tuples with tup(), not a trailing comma', token=children[1].tok) elts = [] # type: List[expr_t] for i in xrange(0, n, 2): # skip commas p_node = children[i] elts.append(self.Expr(p_node)) return expr.Tuple(elts, expr_context_e.Store) # unused expr_context_e
def _Classify(gr, tok): # type: (Grammar, Token) -> int # We have to match up what ParserGenerator.make_grammar() did when # calling make_label() and make_first(). See classify() in # opy/pgen2/driver.py. # 'x' and 'for' are both tokenized as Expr_Name. This handles the 'for' # case. if tok.id == Id.Expr_Name: if tok.val in gr.keywords: return gr.keywords[tok.val] # This handles 'x'. typ = tok.id if typ in gr.tokens: return gr.tokens[typ] type_str = '' if tok.id == Id.Unknown_Tok else (' (%s)' % ui.PrettyId(tok.id)) p_die('Unexpected token in expression mode%s', type_str, token=tok)
def CheckLhsExpr(node, dynamic_arith, blame_word): # type: (arith_expr_t, bool, word_t) -> None """Determine if a node is a valid L-value by whitelisting tags. Valid: x = y a[1] = y Invalid: a[0][0] = y """ UP_node = node if node.tag_() == arith_expr_e.Binary: node = cast(arith_expr__Binary, UP_node) if node.op_id == Id.Arith_LBracket and _VarRefOrWord(node.left, dynamic_arith): return # But a[0][0] = 1 is NOT valid. if _VarRefOrWord(node, dynamic_arith): return p_die("Left-hand side of this assignment is invalid", word=blame_word)
def ParseVarDecl(self, kw_token, lexer): # type: (Token, Lexer) -> Tuple[command__VarDecl, Token] """ var mylist = [1, 2, 3] """ # TODO: We do need re-entrancy for var x = @[ (1+2) ] and such if self.parsing_expr: p_die("ShAssignment expression can't be nested like this", token=kw_token) self.parsing_expr = True try: pnode, last_token = self.e_parser.Parse(lexer, grammar_nt.oil_var_decl) finally: self.parsing_expr = False if 0: self.p_printer.Print(pnode) ast_node = self.tr.MakeVarDecl(pnode) ast_node.keyword = kw_token # VarDecl didn't fill this in return ast_node, last_token
def Parse(self, lexer, start_symbol): # type: (Lexer, int) -> Tuple[PNode, Token] # Reuse the parser self.push_parser.setup(start_symbol) try: last_token = _PushOilTokens(self.parse_ctx, self.gr, self.push_parser, lexer) except parse.ParseError as e: #log('ERROR %s', e) # TODO: # - Describe what lexer mode we're in (Invalid syntax in regex) # - Maybe say where the mode started # - Id.Unknown_Tok could say "This character is invalid" # ParseError has a "too much input" case but I haven't been able to # tickle it. Mabye it's because of the Eof tokens? p_die('Syntax error in expression (near %s)', ui.PrettyId(e.tok.id), token=e.tok) #raise error.Parse('Syntax error in expression', token=e.tok) return self.push_parser.rootnode, last_token
def Parse(lexer): # type: (Lexer) -> List[Token] """Given a QSN literal in a string, return the corresponding byte string. Grammar: qsn = SingleQuote Kind.Char* SingleQuote Whitespace? Eof_Real """ tok = lexer.Read(lex_mode_e.QSN) # Caller ensures this. It's really a left single quote. assert tok.id == Id.Right_SingleQuote result = [] # type: List[Token] while True: tok = lexer.Read(lex_mode_e.QSN) #log('tok = %s', tok) if tok.id == Id.Unknown_Tok: # extra error p_die('Unexpected token in QSN string', token=tok) kind = consts.GetKind(tok.id) if kind != Kind.Char: break result.append(tok) if tok.id != Id.Right_SingleQuote: p_die('Expected closing single quote in QSN string', token=tok) # HACK: read in shell's SQ_C mode to get whitespace, which is disallowe # INSIDE QSN. This gets Eof_Real too. tok = lexer.Read(lex_mode_e.SQ_C) # Doesn't work because we want to allow literal newlines / tabs if tok.id == Id.Char_Literals: if not _IsWhitespace(tok.val): p_die("Unexpected data after closing quote", token=tok) tok = lexer.Read(lex_mode_e.QSN) if tok.id != Id.Eof_Real: p_die('Unexpected token after QSN string', token=tok) return result
def _RangeChar(self, p_node): # type: (PNode) -> str """Evaluate a range endpoints. - the 'a' in 'a'-'z' - the \x00 in \x00-\x01 etc. TODO: This function doesn't respect the LST invariant. """ assert p_node.typ == grammar_nt.range_char, p_node children = p_node.children typ = children[0].typ if ISNONTERMINAL(typ): # 'a' in 'a'-'b' if typ == grammar_nt.sq_string: sq_part = cast(single_quoted, children[0].children[1].tok) tokens = sq_part.tokens if len( tokens ) > 1: # Can happen with multiline single-quoted strings p_die(RANGE_POINT_TOO_LONG, part=sq_part) if len(tokens[0].val) > 1: p_die(RANGE_POINT_TOO_LONG, part=sq_part) s = tokens[0].val[0] return s if typ == grammar_nt.char_literal: raise AssertionError('TODO') # TODO: This brings in a lot of dependencies, and this type checking # errors. We want to respect the LST invariant anyway. #from osh import word_compile #tok = children[0].children[0].tok #s = word_compile.EvalCStringToken(tok.id, tok.val) #return s raise NotImplementedError() else: # Expr_Name or Expr_DecInt tok = p_node.tok if tok.id in (Id.Expr_Name, Id.Expr_DecInt): # For the a in a-z, 0 in 0-9 if len(tok.val) != 1: p_die(RANGE_POINT_TOO_LONG, token=tok) return tok.val[0] raise NotImplementedError()
def _GetLine(self): # type: () -> Optional[str] p_die("Here docs aren't allowed in expressions", token=self.blame_token)
def _ReAtom(self, p_atom): # type: (PNode) -> re_t """ re_atom: ( char_literal """ assert p_atom.typ == grammar_nt.re_atom, p_atom.typ children = p_atom.children typ = children[0].typ if ISNONTERMINAL(typ): p_child = p_atom.children[0] if typ == grammar_nt.class_literal: return re.ClassLiteral(False, self._ClassLiteral(p_child)) if typ == grammar_nt.braced_var_sub: return cast(braced_var_sub, p_child.children[1].tok) if typ == grammar_nt.dq_string: return cast(double_quoted, p_child.children[1].tok) if typ == grammar_nt.sq_string: return cast(single_quoted, p_child.children[1].tok) if typ == grammar_nt.simple_var_sub: return simple_var_sub(children[0].tok) if typ == grammar_nt.char_literal: return children[0].tok raise NotImplementedError(typ) else: tok = children[0].tok # Special punctuation if tok.id in (Id.Expr_Dot, Id.Arith_Caret, Id.Expr_Dollar): return speck(tok.id, tok.span_id) # TODO: d digit can turn into PosixClass and PerlClass right here! # It's parsing. if tok.id == Id.Expr_Name: return self._NameInRegex(None, tok) if tok.id == Id.Expr_Symbol: # Validate symbols here, like we validate PerlClass, etc. if tok.val in ('%start', '%end', 'dot'): return tok p_die("Unexpected token %r in regex", tok.val, token=tok) if tok.id == Id.Expr_At: # | '@' Expr_Name return re.Splice(children[1].tok) if tok.id == Id.Arith_Tilde: # | '~' [Expr_Name | class_literal] typ = children[1].typ if ISNONTERMINAL(typ): return re.ClassLiteral(True, self._ClassLiteral(children[1])) else: return self._NameInRegex(tok, children[1].tok) if tok.id == Id.Op_LParen: # | '(' regex ')' # Note: in ERE (d+) is the same as <d+>. That is, Group becomes # Capture. return re.Group(self._Regex(children[1])) if tok.id == Id.Arith_Less: # | '<' regex [':' name_type] '>' regex = self._Regex(children[1]) n = len(children) if n == 5: # TODO: Add type expression # YES # < d+ '.' d+ : ratio Float > # < d+ : month Int > # INVALID # < d+ : month List[int] > name_tok = children[3].children[0].tok else: name_tok = None return re.Capture(regex, name_tok) if tok.id == Id.Arith_Colon: # | ':' '(' regex ')' raise NotImplementedError(Id_str(tok.id)) raise NotImplementedError(Id_str(tok.id))
def ParseFactor(self): # type: () -> bool_expr_t """ Factor : WORD | UNARY_OP WORD | WORD BINARY_OP WORD | '(' Expr ')' """ if self.b_kind == Kind.BoolUnary: # Just save the type and not the token itself? op = self.op_id self._Next() w = self.cur_word # e.g. [[ -f < ]]. But [[ -f '<' ]] is OK tag = w.tag_() if tag != word_e.Compound and tag != word_e.String: p_die('Invalid argument to unary operator', word=w) self._Next() node = bool_expr.Unary(op, w) # type: bool_expr_t return node if self.b_kind == Kind.Word: # Peek ahead another token. t2 = self._LookAhead() t2_op_id = word_.BoolId(t2) t2_b_kind = consts.GetKind(t2_op_id) #log('t2 %s / t2_op_id %s / t2_b_kind %s', t2, t2_op_id, t2_b_kind) # Op for < and >, -a and -o pun if t2_b_kind == Kind.BoolBinary or t2_op_id in (Id.Op_Less, Id.Op_Great): left = self.cur_word self._Next() op = self.op_id # TODO: Need to change to lex_mode_e.BashRegex. # _Next(lex_mode) then? is_regex = t2_op_id == Id.BoolBinary_EqualTilde if is_regex: self._Next(lex_mode=lex_mode_e.BashRegex) else: self._Next() right = self.cur_word if is_regex: # NOTE: StaticEval for checking regex syntax isn't enough. We could # need to pass do_ere so that the quoted parts get escaped. #ok, s, unused_quoted = word_.StaticEval(right) pass self._Next() return bool_expr.Binary(op, left, right) else: # [[ foo ]] w = self.cur_word self._Next() return bool_expr.WordTest(w) if self.op_id == Id.Op_LParen: self._Next() node = self.ParseExpr() if self.op_id != Id.Op_RParen: p_die('Expected ), got %s', word_.Pretty(self.cur_word), word=self.cur_word) self._Next() return node # It's not WORD, UNARY_OP, or '(' p_die('Unexpected token in boolean expression', word=self.cur_word)
def Parse(self): # type: () -> word_part__BracedRange self._Next() if self.token_type == Id.Range_Int: part = self._ParseRange(self.token_type) # Check step validity and fill in a default start = int(part.start) end = int(part.end) if start < end: if part.step == NO_STEP: part.step = 1 if part.step <= 0: # 0 step is not allowed p_die('Invalid step %d for ascending integer range', part.step, span_id=self.span_id) elif start > end: if part.step == NO_STEP: part.step = -1 if part.step >= 0: # 0 step is not allowed p_die('Invalid step %d for descending integer range', part.step, span_id=self.span_id) else: # {1..1} singleton range is dumb but I suppose consistent part.step = 1 elif self.token_type == Id.Range_Char: part = self._ParseRange(self.token_type) # Compare integers because mycpp doesn't support < on strings! start_num = ord(part.start[0]) end_num = ord(part.end[0]) # Check step validity and fill in a default if start_num < end_num: if part.step == NO_STEP: part.step = 1 if part.step <= 0: # 0 step is not allowed p_die('Invalid step %d for ascending character range', part.step, span_id=self.span_id) elif start_num > end_num: if part.step == NO_STEP: part.step = -1 if part.step >= 0: # 0 step is not allowed p_die('Invalid step %d for descending character range', part.step, span_id=self.span_id) else: # {a..a} singleton range is dumb but I suppose consistent part.step = 1 # Check matching cases upper1 = part.start.isupper() upper2 = part.end.isupper() if upper1 != upper2: p_die('Mismatched cases in character range', span_id=self.span_id) else: raise _NotARange('') # prevent unexpected trailing tokens self._Eat(Id.Eol_Tok) return part
def _PushOilTokens(parse_ctx, gr, p, lex): # type: (ParseContext, Grammar, parse.Parser, Lexer) -> Token """Push tokens onto pgen2's parser. Returns the last token so it can be reused/seen by the CommandParser. """ #log('keywords = %s', gr.keywords) #log('tokens = %s', gr.tokens) last_token = None # type: Optional[Token] prev_was_newline = False balance = 0 # to ignore newlines while True: if last_token: # e.g. left over from WordParser tok = last_token #log('last_token = %s', last_token) last_token = None else: tok = lex.Read(lex_mode_e.Expr) #log('tok = %s', tok) # Comments and whitespace. Newlines aren't ignored. if consts.GetKind(tok.id) == Kind.Ignored: continue # For multiline lists, maps, etc. if tok.id == Id.Op_Newline: if balance > 0: #log('*** SKIPPING NEWLINE') continue # Eliminate duplicate newline tokens. It makes the grammar simpler, and # it's consistent with CPython's lexer and our own WordParser. if prev_was_newline: continue prev_was_newline = True else: prev_was_newline = False balance += _OTHER_BALANCE.get(tok.id, 0) #log('BALANCE after seeing %s = %d', tok.id, balance) #if tok.id == Id.Expr_Name and tok.val in KEYWORDS: # tok.id = KEYWORDS[tok.val] # log('Replaced with %s', tok.id) assert tok.id < 256, Id_str(tok.id) ilabel = _Classify(gr, tok) #log('tok = %s, ilabel = %d', tok, ilabel) if p.addtoken(tok.id, tok, ilabel): return tok # # Mututally recursive calls into the command/word parsers. # if mylib.PYTHON: if tok.id == Id.Left_PercentParen: # %( left_tok = tok lex.PushHint(Id.Op_RParen, Id.Right_ShArrayLiteral) # Blame the opening token line_reader = reader.DisallowedLineReader(parse_ctx.arena, tok) w_parser = parse_ctx.MakeWordParser(lex, line_reader) words = [] close_tok = None # type: Optional[Token] while True: w = w_parser.ReadWord(lex_mode_e.ShCommand) if 0: log('w = %s', w) if w.tag_() == word_e.Token: tok = cast(Token, w) if tok.id == Id.Right_ShArrayLiteral: close_tok = tok break elif tok.id == Id.Op_Newline: # internal newlines allowed continue else: # Token p_die('Unexpected token in array literal: %r', tok.val, word=w) assert isinstance(w, compound_word) # for MyPy words.append(w) words2 = braces.BraceDetectAll(words) words3 = word_.TildeDetectAll(words2) typ = Id.Expr_CastedDummy lit_part = sh_array_literal(left_tok, words3) opaque = cast(Token, lit_part) # HACK for expr_to_ast done = p.addtoken(typ, opaque, gr.tokens[typ]) assert not done # can't end the expression # Now push the closing ) ilabel = _Classify(gr, close_tok) done = p.addtoken(tok.id, close_tok, ilabel) assert not done # can't end the expression continue # $( @( &( if tok.id in (Id.Left_DollarParen, Id.Left_AtParen, Id.Left_AmpParen): left_token = tok lex.PushHint(Id.Op_RParen, Id.Eof_RParen) line_reader = reader.DisallowedLineReader(parse_ctx.arena, tok) c_parser = parse_ctx.MakeParserForCommandSub( line_reader, lex, Id.Eof_RParen) node = c_parser.ParseCommandSub() # A little gross: Copied from osh/word_parse.py right_token = c_parser.w_parser.cur_token cs_part = command_sub(left_token, node) cs_part.spids.append(left_token.span_id) cs_part.spids.append(right_token.span_id) typ = Id.Expr_CastedDummy opaque = cast(Token, cs_part) # HACK for expr_to_ast done = p.addtoken(typ, opaque, gr.tokens[typ]) assert not done # can't end the expression # Now push the closing ) ilabel = _Classify(gr, right_token) done = p.addtoken(right_token.id, right_token, ilabel) assert not done # can't end the expression continue if tok.id == Id.Left_DoubleQuote: left_token = tok line_reader = reader.DisallowedLineReader(parse_ctx.arena, tok) w_parser = parse_ctx.MakeWordParser(lex, line_reader) parts = [] # type: List[word_part_t] last_token = w_parser.ReadDoubleQuoted(left_token, parts) expr_dq_part = double_quoted(left_token, parts) typ = Id.Expr_CastedDummy opaque = cast(Token, expr_dq_part) # HACK for expr_to_ast done = p.addtoken(typ, opaque, gr.tokens[typ]) assert not done # can't end the expression continue if tok.id == Id.Left_DollarBrace: left_token = tok line_reader = reader.DisallowedLineReader(parse_ctx.arena, tok) w_parser = parse_ctx.MakeWordParser(lex, line_reader) part, last_token = w_parser.ReadBracedVarSub(left_token) # It's casted word_part__BracedVarSub -> dummy -> expr__BracedVarSub! typ = Id.Expr_CastedDummy opaque = cast(Token, part) # HACK for expr_to_ast done = p.addtoken(typ, opaque, gr.tokens[typ]) assert not done # can't end the expression continue # '' and r'' and c'' if tok.id in (Id.Left_SingleQuote, Id.Left_RSingleQuote, Id.Left_CSingleQuote): if tok.id == Id.Left_CSingleQuote: sq_mode = lex_mode_e.SQ_C else: sq_mode = lex_mode_e.SQ_Raw left_token = tok line_reader = reader.DisallowedLineReader(parse_ctx.arena, tok) w_parser = parse_ctx.MakeWordParser(lex, line_reader) tokens = [] # type: List[Token] last_token = w_parser.ReadSingleQuoted(sq_mode, left_token, tokens, True) sq_part = single_quoted(left_token, tokens) typ = Id.Expr_CastedDummy opaque = cast(Token, sq_part) # HACK for expr_to_ast done = p.addtoken(typ, opaque, gr.tokens[typ]) assert not done # can't end the expression continue else: # We never broke out -- EOF is too soon (how can this happen???) raise parse.ParseError("incomplete input", tok.id, tok)
def LeftError(p, t, left, rbp): # type: (TdopParser, word_t, arith_expr_t, int) -> arith_expr_t # Hm is this not called because of binding power? p_die("Token can't be used in infix position", word=t) return None # never reached
def DoRedirect(self, node, local_symbols): #print(node, file=sys.stderr) op_spid = node.op.span_id op_id = node.op.id self.cursor.PrintUntil(op_spid) # TODO: # - Do < and <& the same way. # - How to handle here docs and here docs? # - >> becomes >+ or >-, or maybe >>> #if node.tag == redir_e.Redir: if False: if node.fd == runtime.NO_SPID: if op_id == Id.Redir_Great: self.f.write('>') # Allow us to replace the operator self.cursor.SkipUntil(op_spid + 1) elif op_id == Id.Redir_GreatAnd: self.f.write('> !') # Replace >& 2 with > !2 spid = word_.LeftMostSpanForWord(node.arg_word) self.cursor.SkipUntil(spid) #self.DoWordInCommand(node.arg_word) else: # NOTE: Spacing like !2>err.txt vs !2 > err.txt can be done in the # formatter. self.f.write('!%d ' % node.fd) if op_id == Id.Redir_Great: self.f.write('>') self.cursor.SkipUntil(op_spid + 1) elif op_id == Id.Redir_GreatAnd: self.f.write('> !') # Replace 1>& 2 with !1 > !2 spid = word_.LeftMostSpanForWord(node.arg_word) self.cursor.SkipUntil(spid) self.DoWordInCommand(node.arg_word, local_symbols) #elif node.tag == redir_e.HereDoc: elif False: ok, delimiter, delim_quoted = word_.StaticEval(node.here_begin) if not ok: p_die('Invalid here doc delimiter', word=node.here_begin) # Turn everything into <<. We just change the quotes self.f.write('<<') #here_begin_spid2 = word_.RightMostSpanForWord(node.here_begin) if delim_quoted: self.f.write(" '''") else: self.f.write(' """') delim_end_spid = word_.RightMostSpanForWord(node.here_begin) self.cursor.SkipUntil(delim_end_spid + 1) #self.cursor.SkipUntil(here_begin_spid + 1) # Now print the lines. TODO: Have a flag to indent these to the level of # the owning command, e.g. # cat <<EOF # EOF # Or since most here docs are the top level, you could just have a hack # for a fixed indent? TODO: Look at real use cases. for part in node.stdin_parts: self.DoWordPart(part, local_symbols) self.cursor.SkipUntil(node.here_end_span_id + 1) if delim_quoted: self.f.write("'''\n") else: self.f.write('"""\n') # Need #self.cursor.SkipUntil(here_end_spid2) else: raise AssertionError(node.__class__.__name__) # <<< 'here word' # << 'here word' # # 2> out.txt # !2 > out.txt # cat 1<< EOF # hello $name # EOF # cat !1 << """ # hello $name # """ # # cat << 'EOF' # no expansion # EOF # cat <<- 'EOF' # no expansion and indented # # cat << ''' # no expansion # ''' # cat << ''' # no expansion and indented # ''' # Warn about multiple here docs on a line. # As an obscure feature, allow # cat << \'ONE' << \"TWO" # 123 # ONE # 234 # TWO # The _ is an indicator that it's not a string to be piped in. pass
def NullError(p, t, bp): # type: (TdopParser, word_t, int) -> arith_expr_t # TODO: I need position information p_die("Token can't be used in prefix position", word=t) return None # never reached