def _ReadExtGlobPart(self): # type: () -> word_part__ExtGlobPart """ Grammar: Item = CompoundWord | EPSILON # important: @(foo|) is allowed LEFT = '@(' | '*(' | '+(' | '?(' | '!(' RIGHT = ')' ExtGlob = LEFT (Item '|')* Item RIGHT # ITEM may be empty CompoundWord includes ExtGlobPart """ left_token = self.cur_token arms = [] # type: List[word_t] spids = [] spids.append(left_token.span_id) self.lexer.PushHint(Id.Op_RParen, Id.Right_ExtGlob) self._Next(lex_mode_e.ExtGlob) # advance past LEFT read_word = False # did we just a read a word? To handle @(||). while True: self._Peek() if self.token_type == Id.Right_ExtGlob: if not read_word: arms.append(osh_word.CompoundWord()) spids.append(self.cur_token.span_id) break elif self.token_type == Id.Op_Pipe: if not read_word: arms.append(osh_word.CompoundWord()) read_word = False self._Next(lex_mode_e.ExtGlob) # lex mode EXTGLOB should only produce these 4 kinds of tokens elif self.token_kind in (Kind.Lit, Kind.Left, Kind.VSub, Kind.ExtGlob): w = self._ReadCompoundWord(lex_mode=lex_mode_e.ExtGlob) arms.append(w) read_word = True elif self.token_kind == Kind.Eof: p_die('Unexpected EOF reading extended glob that began here', token=left_token) else: raise AssertionError('Unexpected token %r' % self.cur_token) part = word_part.ExtGlobPart(left_token, arms) part.spids.extend(spids) return part
def EvalPrompt(self, val): """Perform the two evaluations that bash does. Used by $PS1 and ${x@P}.""" if val.tag != value_e.Str: return self.default_prompt # no evaluation necessary # Parse backslash escapes (cached) try: tokens = self.tokens_cache[val.s] except KeyError: tokens = list(match.PS1_LEXER.Tokens(val.s)) self.tokens_cache[val.s] = tokens # Replace values. ps1_str = self._ReplaceBackslashCodes(tokens) # Parse it like a double-quoted word (cached). # NOTE: This is copied from the PS4 logic in Tracer. try: ps1_word = self.parse_cache[ps1_str] except KeyError: w_parser = self.parse_ctx.MakeWordParserForPlugin( ps1_str, self.arena) try: ps1_word = w_parser.ReadForPlugin() except Exception as e: error_str = '<ERROR: cannot parse PS1>' t = token(Id.Lit_Chars, error_str, const.NO_INTEGER) ps1_word = word.CompoundWord([word_part.LiteralPart(t)]) self.parse_cache[ps1_str] = ps1_word # Evaluate, e.g. "${debian_chroot}\u" -> '\u' # TODO: Handle runtime errors like unset variables, etc. val2 = self.ex.word_ev.EvalWordToString(ps1_word) return val2.s
def testVarOps(self): ev = InitEvaluator() # initializes x=xxx and y=yyy unset_sub = word_part.BracedVarSub(token(Id.VSub_Name, 'unset')) part_vals = [] ev._EvalWordPart(unset_sub, part_vals) print(part_vals) set_sub = word_part.BracedVarSub(token(Id.VSub_Name, 'x')) part_vals = [] ev._EvalWordPart(set_sub, part_vals) print(part_vals) # Now add some ops part = word_part.LiteralPart(token(Id.Lit_Chars, 'default')) arg_word = osh_word.CompoundWord([part]) test_op = suffix_op.StringUnary(Id.VTest_ColonHyphen, arg_word) unset_sub.suffix_op = test_op set_sub.suffix_op = test_op part_vals = [] ev._EvalWordPart(unset_sub, part_vals) print(part_vals) part_vals = [] ev._EvalWordPart(set_sub, part_vals) print(part_vals)
def BraceExpandWords(words): # type: (List[word__CompoundWord]) -> List[word__CompoundWord] out = [] # type: List[word__CompoundWord] for w in words: if isinstance(w, word__BracedWordTree): parts_list = _BraceExpand(w.parts) out.extend(word.CompoundWord(p) for p in parts_list) else: out.append(w) return out
def ReadForPlugin(self): # type: () -> word__CompoundWord """For $PS1, $PS4, etc. This is just like reading a here doc line. "\n" is allowed, as well as the typical substitutions ${x} $(echo hi) $((1 + 2)). """ w = osh_word.CompoundWord() self._ReadLikeDQ(None, w.parts) return w
def _EvalRedirect(self, n): fd = REDIR_DEFAULT_FD[n.op.id] if n.fd == const.NO_INTEGER else n.fd if n.tag == redir_e.Redir: redir_type = REDIR_ARG_TYPES[n.op.id] # could be static in the LST? if redir_type == redir_arg_type_e.Path: # NOTE: no globbing. You can write to a file called '*.py'. val = self.word_ev.EvalWordToString(n.arg_word) if val.tag != value_e.Str: # TODO: This error never fires util.error("Redirect filename must be a string, got %s", val) return None filename = val.s if not filename: # Whether this is fatal depends on errexit. util.error("Redirect filename can't be empty") return None return redirect.PathRedirect(n.op.id, fd, filename) elif redir_type == redir_arg_type_e.Desc: # e.g. 1>&2 val = self.word_ev.EvalWordToString(n.arg_word) if val.tag != value_e.Str: # TODO: This error never fires util.error("Redirect descriptor should be a string, got %s", val) return None t = val.s if not t: util.error("Redirect descriptor can't be empty") return None try: target_fd = int(t) except ValueError: util.error( "Redirect descriptor should look like an integer, got %s", val) return None return redirect.DescRedirect(n.op.id, fd, target_fd) elif redir_type == redir_arg_type_e.Here: # here word val = self.word_ev.EvalWordToString(n.arg_word) assert val.tag == value_e.Str, val # NOTE: bash and mksh both add \n return redirect.HereRedirect(fd, val.s + '\n') else: raise AssertionError('Unknown redirect op') elif n.tag == redir_e.HereDoc: # HACK: Wrap it in a word to evaluate. w = osh_word.CompoundWord(n.stdin_parts) val = self.word_ev.EvalWordToString(w) assert val.tag == value_e.Str, val return redirect.HereRedirect(fd, val.s) else: raise AssertionError('Unknown redirect type')
def TildeDetect(w): # type: (word_t) -> Optional[word_t] """Detect tilde expansion in a word. It might begin with LiteralPart that needs to be turned into a TildeSubPart. (It depends on whether the second token begins with slash). If so, it return a new word. Otherwise return None. NOTE: - The regex for Lit_TildeLike could be expanded. Right now it's conservative, like Lit_Chars without the /. - It's possible to write this in a mutating style, since only the first token is changed. But note that we CANNOT know this during lexing. """ # NOTE: BracedWordTree, EmptyWord, etc. can't be tilde expanded if not isinstance(w, word__CompoundWord): return None assert w.parts, w part0 = w.parts[0] if _LiteralPartId(part0) != Id.Lit_TildeLike: return None assert isinstance(part0, word_part__LiteralPart) # for MyPy if len(w.parts) == 1: # can't be zero tilde_part = word_part.TildeSubPart(part0.token) return word.CompoundWord([tilde_part]) part1 = w.parts[1] # NOTE: We could inspect the raw tokens. if _LiteralPartId(part1) == Id.Lit_Chars: assert isinstance(part1, word_part__LiteralPart) # for MyPy if part1.token.val.startswith('/'): tilde_part_ = word_part.TildeSubPart( part0.token) # type: word_part_t return word.CompoundWord([tilde_part_] + w.parts[1:]) # It could be something like '~foo:bar', which doesn't have a slash. return None
def testBraceExpand(self): w = _assertReadWord(self, 'hi') results = braces._BraceExpand(w.parts) self.assertEqual(1, len(results)) for parts in results: _PrettyPrint(osh_word.CompoundWord(parts)) print('') w = _assertReadWord(self, 'B-{a,b}-E') tree = braces._BraceDetect(w) self.assertEqual(3, len(tree.parts)) _PrettyPrint(tree) results = braces._BraceExpand(tree.parts) self.assertEqual(2, len(results)) for parts in results: _PrettyPrint(osh_word.CompoundWord(parts)) print('') w = _assertReadWord(self, 'B-{a,={b,c,d}=,e}-E') tree = braces._BraceDetect(w) self.assertEqual(3, len(tree.parts)) _PrettyPrint(tree) results = braces._BraceExpand(tree.parts) self.assertEqual(5, len(results)) for parts in results: _PrettyPrint(osh_word.CompoundWord(parts)) print('') w = _assertReadWord(self, 'B-{a,b}-{c,d}-E') tree = braces._BraceDetect(w) self.assertEqual(5, len(tree.parts)) _PrettyPrint(tree) results = braces._BraceExpand(tree.parts) self.assertEqual(4, len(results)) for parts in results: _PrettyPrint(osh_word.CompoundWord(parts)) print('')
def DetectAssocPair(w): # type: (word__CompoundWord) -> Optional[Tuple[word__CompoundWord, word__CompoundWord]] """ Like DetectAssignment, but for A=(['k']=v ['k2']=v) The key and the value are both strings. So we just pick out word_part. Unlike a[k]=v, A=([k]=v) is NOT ambiguous, because the [k] syntax is only used for associative array literals, as opposed to indexed array literals. """ parts = w.parts if _LiteralPartId(parts[0]) != Id.Lit_LBracket: return None n = len(parts) for i in xrange(n): id_ = _LiteralPartId(parts[i]) if id_ == Id.Lit_ArrayLhsClose: # ]= # e.g. if we have [$x$y]=$a$b key = word.CompoundWord(parts[1:i]) # $x$y value = word.CompoundWord(parts[i+1:]) # $a$b from return key, value return None
def testMultiLine(self): w_parser = _InitWordParser("""\ ls foo # Multiple newlines and comments should be ignored ls bar """) print('--MULTI') w = w_parser.ReadWord(lex_mode_e.Outer) parts = [word_part.LiteralPart(token(Id.Lit_Chars, 'ls'))] test_lib.AssertAsdlEqual(self, osh_word.CompoundWord(parts), w) w = w_parser.ReadWord(lex_mode_e.Outer) parts = [word_part.LiteralPart(token(Id.Lit_Chars, 'foo'))] test_lib.AssertAsdlEqual(self, osh_word.CompoundWord(parts), w) w = w_parser.ReadWord(lex_mode_e.Outer) t = token(Id.Op_Newline, '\n') test_lib.AssertAsdlEqual(self, osh_word.TokenWord(t), w) w = w_parser.ReadWord(lex_mode_e.Outer) parts = [word_part.LiteralPart(token(Id.Lit_Chars, 'ls'))] test_lib.AssertAsdlEqual(self, osh_word.CompoundWord(parts), w) w = w_parser.ReadWord(lex_mode_e.Outer) parts = [word_part.LiteralPart(token(Id.Lit_Chars, 'bar'))] test_lib.AssertAsdlEqual(self, osh_word.CompoundWord(parts), w) w = w_parser.ReadWord(lex_mode_e.Outer) t = token(Id.Op_Newline, '\n') test_lib.AssertAsdlEqual(self, osh_word.TokenWord(t), w) w = w_parser.ReadWord(lex_mode_e.Outer) t = token(Id.Eof_Real, '') test_lib.AssertAsdlEqual(self, osh_word.TokenWord(t), w)
def _EvalPS4(self): """For set -x.""" val = self.mem.GetVar('PS4') assert val.tag == value_e.Str s = val.s if s: first_char, ps4 = s[0], s[1:] else: first_char, ps4 = '+', ' ' # default # NOTE: This cache is slightly broken because aliases are mutable! I think # thati s more or less harmless though. try: ps4_word = self.parse_cache[ps4] except KeyError: # We have to parse this at runtime. PS4 should usually remain constant. w_parser = self.parse_ctx.MakeWordParserForPlugin(ps4, self.arena) try: ps4_word = w_parser.ReadForPlugin() except util.ParseError as e: error_str = '<ERROR: cannot parse PS4>' t = token(Id.Lit_Chars, error_str, const.NO_INTEGER) ps4_word = osh_word.CompoundWord([word_part.LiteralPart(t)]) self.parse_cache[ps4] = ps4_word #print(ps4_word) # TODO: Repeat first character according process stack depth. Where is # that stored? In the executor itself? It should be stored along with # the PID. Need some kind of ShellProcessState or something. # # We should come up with a better mechanism. Something like $PROC_INDENT # and $OIL_XTRACE_PREFIX. # TODO: Handle runtime errors! For example, you could PS4='$(( 1 / 0 ))' # <ERROR: cannot evaluate PS4> prefix = self.word_ev.EvalWordToString(ps4_word) return first_char, prefix.s
def _ReadArithWord(self): # type: () -> Tuple[word_t, bool] """Helper function for ReadArithWord.""" self._Peek() if self.token_kind == Kind.Unknown: p_die('Unexpected token in arithmetic context', token=self.cur_token) elif self.token_kind == Kind.Eof: # Just return EOF token w = osh_word.TokenWord(self.cur_token) # type: word_t return w, False elif self.token_kind == Kind.Ignored: # Space should be ignored. TODO: change this to SPACE_SPACE and # SPACE_NEWLINE? or SPACE_TOK. self._Next(lex_mode_e.Arith) return None, True # Tell wrapper to try again elif self.token_kind in (Kind.Arith, Kind.Right): # Id.Right_ArithSub IS just a normal token, handled by ArithParser self._Next(lex_mode_e.Arith) w = osh_word.TokenWord(self.cur_token) return w, False elif self.token_kind in (Kind.Lit, Kind.Left): w = self._ReadCompoundWord(lex_mode=lex_mode_e.Arith) return w, False elif self.token_kind == Kind.VSub: part = word_part.SimpleVarSub(self.cur_token) self._Next(lex_mode_e.Arith) w = osh_word.CompoundWord([part]) return w, False else: assert False, ("Unexpected token parsing arith sub: %s" % self.cur_token) raise AssertionError("Shouldn't get here")
def _ReadCompoundWord(self, eof_type=Id.Undefined_Tok, lex_mode=lex_mode_e.Outer, empty_ok=True): # type: (Id_t, lex_mode_t, bool) -> word__CompoundWord """ Precondition: Looking at the first token of the first word part Postcondition: Looking at the token after, e.g. space or operator NOTE: eof_type is necessary because / is a literal, i.e. Lit_Slash, but it could be an operator delimiting a compound word. Can we change lexer modes and remove this special case? """ word = osh_word.CompoundWord() num_parts = 0 done = False while not done: self._Peek() allow_done = empty_ok or num_parts != 0 if allow_done and self.token_type == eof_type: done = True # e.g. for ${foo//pat/replace} # Keywords like "for" are treated like literals elif self.token_kind in (Kind.Lit, Kind.History, Kind.KW, Kind.Assign, Kind.ControlFlow, Kind.BoolUnary, Kind.BoolBinary): if self.token_type == Id.Lit_EscapedChar: part = word_part.EscapedLiteralPart( self.cur_token) # type: word_part_t else: part = word_part.LiteralPart(self.cur_token) word.parts.append(part) if self.token_type == Id.Lit_VarLike: # foo= t = self.lexer.LookAhead(lex_mode_e.Outer) if t.id == Id.Op_LParen: self.lexer.PushHint(Id.Op_RParen, Id.Right_ArrayLiteral) part2 = self._ReadArrayLiteralPart() word.parts.append(part2) elif self.token_kind == Kind.VSub: part = word_part.SimpleVarSub(self.cur_token) word.parts.append(part) elif self.token_kind == Kind.ExtGlob: part = self._ReadExtGlobPart() word.parts.append(part) elif self.token_kind == Kind.Left: part = self._ReadLeftParts() word.parts.append(part) # NOT done yet, will advance below elif self.token_kind == Kind.Right: # Still part of the word; will be done on the next iter. if self.token_type == Id.Right_DoubleQuote: pass elif self.token_type == Id.Right_CommandSub: pass elif self.token_type == Id.Right_Subshell: # LEXER HACK for (case x in x) ;; esac ) assert self.next_lex_mode is None # Rewind before it's used if self.lexer.MaybeUnreadOne(): self.lexer.PushHint(Id.Op_RParen, Id.Right_Subshell) self._Next(lex_mode) done = True else: done = True elif self.token_kind == Kind.Ignored: done = True else: # LEXER HACK for unbalanced case clause. 'case foo in esac' is valid, # so to test for ESAC, we can read ) before getting a chance to # PushHint(Id.Op_RParen, Id.Right_CasePat). So here we unread one # token and do it again. # We get Id.Op_RParen at top level: case x in x) ;; esac # We get Id.Eof_RParen inside ComSub: $(case x in x) ;; esac ) if self.token_type in (Id.Op_RParen, Id.Eof_RParen): assert self.next_lex_mode is None # Rewind before it's used if self.lexer.MaybeUnreadOne(): if self.token_type == Id.Eof_RParen: # Redo translation self.lexer.PushHint(Id.Op_RParen, Id.Eof_RParen) self._Next(lex_mode) done = True # anything we don't recognize means we're done if not done: self._Next(lex_mode) num_parts += 1 return word
def _BraceDetect(w): # type: (word__CompoundWord) -> Optional[word__BracedWordTree] """Return a new word if the input word looks like a brace expansion. e.g. {a,b} or {1..10..2} (TODO) Do we want to accept {01..02} ? zsh does make some attempt to do this too. NOTE: This is an iterative algorithm that uses a stack. The grammar-based approach didn't seem natural. It's not LL(1) because of 'part*'. And not LL(k) even? Maybe it be handled with an LR parser? In any case the imperative algorithm with 'early return' for a couple cases is fairly simple. Grammar: # an alternative is a literal, possibly empty, or another brace_expr part = <any part except LiteralPart> alt = part* | brace_expr # a brace_expr is group of at least 2 braced and comma-separated # alternatives, with optional prefix and suffix. brace_expr = part* '{' alt ',' alt (',' alt)* '}' part* """ # Errors: # }a{ - stack depth dips below 0 # {a,b}{ - Stack depth doesn't end at 0 # {a} - no comma, and also not an numeric range cur_parts = [] # type: List[word_part_t] stack = [] # type: List[_StackFrame] found = False for i, part in enumerate(w.parts): append = True if isinstance(part, word_part__LiteralPart): id_ = part.token.id if id_ == Id.Lit_LBrace: # Save prefix parts. Start new parts list. new_frame = _StackFrame(cur_parts) stack.append(new_frame) cur_parts = [] append = False found = True # assume found, but can early exit with None later elif id_ == Id.Lit_Comma: # Append a new alternative. # NOTE: Should we allow this: # ,{a,b} # or force this: # \,{a,b} # ? We're forcing braces right now but not commas. if stack: stack[-1].saw_comma = True stack[-1].alt_part.words.append( word.CompoundWord(cur_parts)) cur_parts = [] # clear append = False elif id_ == Id.Lit_RBrace: if not stack: # e.g. echo {a,b}{ -- unbalanced { return None # do not expand ANYTHING because of invalid syntax # Detect {1..10} and {1..10..2} #log('stack[-1]: %s', stack[-1]) #log('cur_parts: %s', cur_parts) range_part = None # only allow {1..3}, not {a,1..3} if not stack[-1].saw_comma and len(cur_parts) == 1: # It must be ONE part. For example, -1..-100..-2 is initially # lexed as a single Lit_Chars token. part = cur_parts[0] if (isinstance(part, word_part__LiteralPart) and part.token.id == Id.Lit_Chars): range_part = _RangePartDetect(part.token) if range_part: frame = stack.pop() cur_parts = frame.cur_parts cur_parts.append(range_part) append = False # It doesn't look like a range -- process it as the last element in # {a,b,c} if not range_part: if not stack[ -1].saw_comma: # {foo} is not a real alternative return None # early return stack[-1].alt_part.words.append( word.CompoundWord(cur_parts)) frame = stack.pop() cur_parts = frame.cur_parts cur_parts.append(frame.alt_part) append = False if append: cur_parts.append(part) if len(stack) != 0: return None if found: return word.BracedWordTree(cur_parts) else: return None
def _BraceDetect(w): # type: (word__CompoundWord) -> Optional[word__BracedWordTree] """ Args: CompoundWord Returns: CompoundWord or None? Another option: Grammar: # an alternative is a literal, possibly empty, or another brace_expr part = <any part except LiteralPart> alt = part* | brace_expr # a brace_expr is group of at least 2 braced and comma-separated # alternatives, with optional prefix and suffix. brace_expr = part* '{' alt ',' alt (',' alt)* '}' part* Problem this grammar: it's not LL(1) Is it indirect left-recursive? What's the best way to handle it? LR(1) parser? Iterative algorithm: Parse it with a stack? It's a stack that asserts there is at least one , in between {} Yeah just go through and when you see {, push another list. When you get , append to list When you get } and at least one ',', appendt o list When you get } without, then pop If there is no matching }, then abort with error if not balanced, return error too? """ # Errors: # }a{ - stack depth dips below 0 # {a,b}{ - Stack depth doesn't end at 0 # {a} - no comma, and also not an numeric range cur_parts = [] # type: List[word_part_t] stack = [] # type: List[_StackFrame] found = False for i, part in enumerate(w.parts): append = True if isinstance(part, word_part__LiteralPart): id_ = part.token.id if id_ == Id.Lit_LBrace: # Save prefix parts. Start new parts list. new_frame = _StackFrame(cur_parts) stack.append(new_frame) cur_parts = [] append = False found = True # assume found, but can early exit with None later elif id_ == Id.Lit_Comma: # Append a new alternative. #print('*** Appending after COMMA', cur_parts) # NOTE: Should we allow this: # ,{a,b} # or force this: # \,{a,b} # ? We're forcing braces right now but not commas. if stack: stack[-1].saw_comma = True stack[-1].alt_part.words.append( word.CompoundWord(cur_parts)) cur_parts = [] # clear append = False elif id_ == Id.Lit_RBrace: # TODO: # - Detect lack of , -- abort the whole thing # - Detect {1..10} and {1..10..2} # - bash and zsh only -- this is NOT implemented by mksh # - Use a regex on the middle part: # - digit+ '..' digit+ ( '..' digit+ )? # - Char ranges are bash only! # # word_part.BracedIntRangePart() # word_part.CharRangePart() if not stack: # e.g. echo } -- unbalancd { return None if not stack[-1].saw_comma: # {foo} is not a real alternative return None stack[-1].alt_part.words.append(word.CompoundWord(cur_parts)) frame = stack.pop() cur_parts = frame.cur_parts cur_parts.append(frame.alt_part) append = False if append: cur_parts.append(part) if len(stack) != 0: return None if found: return word.BracedWordTree(cur_parts) else: return None
def ErrorWord(fmt, err): # type: (str, _ErrorWithLocation) -> word__CompoundWord error_str = fmt % err.UserErrorString() t = token(Id.Lit_Chars, error_str, const.NO_INTEGER) return word.CompoundWord([word_part.LiteralPart(t)])
def Expr(self, pnode): # type: (PNode) -> expr_t """Walk the homogeneous parse tree and create a typed AST.""" typ = pnode.typ tok = pnode.tok children = pnode.children #if typ in self.number2symbol: # non-terminal if ISNONTERMINAL(typ): c = '-' if not children else len(children) #log('non-terminal %s %s', nt_name, c) if typ == grammar_nt.lvalue_list: return self._AssocBinary(children) if typ == grammar_nt.atom: if children[0].tok.id == Id.Op_LParen: return self.Expr(children[1]) else: raise NotImplementedError if typ == grammar_nt.eval_input: # testlist_input: testlist NEWLINE* ENDMARKER return self.Expr(children[0]) if typ == grammar_nt.testlist: # testlist: test (',' test)* [','] return self._AssocBinary(children) elif typ == grammar_nt.arith_expr: # expr: term (('+'|'-') term)* return self._AssocBinary(children) elif typ == grammar_nt.term: # term: factor (('*'|'/'|'div'|'mod') factor)* return self._AssocBinary(children) elif typ == grammar_nt.expr: # expr: xor_expr ('|' xor_expr)* return self._AssocBinary(children) elif typ == grammar_nt.shift_expr: # shift_expr: arith_expr (('<<'|'>>') arith_expr)* return self._AssocBinary(children) elif typ == grammar_nt.comparison: # comparison: expr (comp_op expr)* return self._AssocBinary(children) elif typ == grammar_nt.factor: # factor: ('+'|'-'|'~') factor | power # the power would have already been reduced assert len(children) == 2, children op, e = children assert isinstance(op.tok, syntax_asdl.token) return expr.Unary(op.tok, self.Expr(e)) elif typ == grammar_nt.atom_expr: # atom_expr: ['await'] atom trailer* # NOTE: This would be shorter in a recursive style. base = self.Expr(children[0]) n = len(children) for i in xrange(1, n): pnode = children[i] tok = pnode.tok base = self._Trailer(base, pnode) return base elif typ == grammar_nt.power: # power: atom_expr ['^' factor] # This doesn't repeat, so it doesn't matter if it's left or right # associative. return self._AssocBinary(children) elif typ == grammar_nt.array_literal: left_tok = children[0].tok # Approximation for now. tokens = [ pnode.tok for pnode in children[1:-1] if pnode.tok.id == Id.Lit_Chars ] array_words = [ word.CompoundWord([word_part.LiteralPart(t)]) for t in tokens ] # type: List[word_t] return expr.ArrayLiteral(left_tok, array_words) elif typ == grammar_nt.regex_literal: left_tok = children[0].tok # Approximation for now. tokens = [ pnode.tok for pnode in children[1:-1] if pnode.tok.id == Id.Expr_Name ] parts = [regex.Var(t) for t in tokens] # type: List[regex_t] return expr.RegexLiteral(left_tok, regex.Concat(parts)) elif typ == grammar_nt.command_sub: left_tok = children[0].tok # Approximation for now. tokens = [ pnode.tok for pnode in children[1:-1] if pnode.tok.id == Id.Lit_Chars ] words = [ word.CompoundWord([word_part.LiteralPart(t)]) for t in tokens ] # type: List[word_t] return expr.CommandSub(left_tok, command.SimpleCommand(words)) elif typ == grammar_nt.expr_sub: left_tok = children[0].tok return expr.ExprSub(left_tok, self.Expr(children[1])) elif typ == grammar_nt.var_sub: left_tok = children[0].tok return expr.VarSub(left_tok, self.Expr(children[1])) elif typ == grammar_nt.dq_string: left_tok = children[0].tok tokens = [ pnode.tok for pnode in children[1:-1] if pnode.tok.id == Id.Lit_Chars ] parts2 = [oil_word_part.Literal(t) for t in tokens] # type: List[oil_word_part_t] return expr.DoubleQuoted(left_tok, parts2) else: nt_name = self.number2symbol[typ] raise AssertionError("PNode type %d (%s) wasn't handled" % (typ, nt_name)) else: # Terminals should have a token #log('terminal %s', tok) if tok.id == Id.Expr_Name: return expr.Var(tok) elif tok.id == Id.Expr_Digits: return expr.Const(tok) else: raise AssertionError(tok.id)