def testShellFuncExecution(self): ex = cmd_exec_test.InitExecutor() func_node = ast.FuncDef() c1 = ast.CompoundWord() t1 = ast.token(Id.Lit_Chars, 'f1') c1.parts.append(ast.LiteralPart(t1)) c2 = ast.CompoundWord() t2 = ast.token(Id.Lit_Chars, 'f2') c2.parts.append(ast.LiteralPart(t2)) a = ast.ArrayLiteralPart() a.words = [c1, c2] w = ast.CompoundWord() w.parts.append(a) # Set global COMPREPLY=(f1 f2) pair = ast.assign_pair(ast.LhsName('COMPREPLY'), assign_op_e.Equal, w) pair.spids.append(0) # dummy pairs = [pair] body_node = ast.Assignment(Id.Assign_None, [], pairs) func_node.name = 'myfunc' func_node.body = body_node a = completion.ShellFuncAction(ex, func_node) matches = list(a.Matches([], 0, 'f')) self.assertEqual(['f1 ', 'f2 '], matches)
def testPipeline2(self): Banner('ls | cut -d . -f 1 | head') p = process.Pipeline() p.Add(_ExtProc(['ls'])) p.Add(_ExtProc(['cut', '-d', '.', '-f', '1'])) p.Add(_ExtProc(['head'])) print(p.Run(_WAITER)) ex = InitExecutor() # Simulating subshell for each command w1 = ast.CompoundWord() w1.parts.append(ast.LiteralPart(ast.token(Id.Lit_Chars, 'ls'))) node1 = ast.SimpleCommand() node1.words = [w1] w2 = ast.CompoundWord() w2.parts.append(ast.LiteralPart(ast.token(Id.Lit_Chars, 'head'))) node2 = ast.SimpleCommand() node2.words = [w2] w3 = ast.CompoundWord() w3.parts.append(ast.LiteralPart(ast.token(Id.Lit_Chars, 'sort'))) w4 = ast.CompoundWord() w4.parts.append(ast.LiteralPart(ast.token(Id.Lit_Chars, '--reverse'))) node3 = ast.SimpleCommand() node3.words = [w3, w4] p = process.Pipeline() p.Add(Process(process.SubProgramThunk(ex, node1))) p.Add(Process(process.SubProgramThunk(ex, node2))) p.Add(Process(process.SubProgramThunk(ex, node3))) print(p.Run(_WAITER))
def _ReadExtGlobPart(self): """ Grammar: Item = CompoundWord | EPSILON # important: @(foo|) is allowed LEFT = '@(' | '*(' | '+(' | '?(' | '!(' RIGHT = ')' ExtGlob = LEFT (Item '|')* Item RIGHT # ITEM may be empty CompoundWord includes ExtGlobPart """ left_token = self.cur_token arms = [] part = ast.ExtGlobPart(left_token, arms) # return value part.spids.append(left_token.span_id) self.lexer.PushHint(Id.Op_RParen, Id.Right_ExtGlob) self._Next(lex_mode_e.EXTGLOB) # advance past LEFT read_word = False # did we just a read a word? To handle @(||). while True: self._Peek() #log('t %r', self.cur_token) if self.token_type == Id.Right_ExtGlob: if not read_word: arms.append(ast.CompoundWord()) part.spids.append(self.cur_token.span_id) break elif self.token_type == Id.Op_Pipe: if not read_word: arms.append(ast.CompoundWord()) read_word = False self._Next(lex_mode_e.EXTGLOB) # lex mode EXTGLOB should only produce these 4 kinds of tokens elif self.token_kind in (Kind.Lit, Kind.Left, Kind.VSub, Kind.ExtGlob): w = self._ReadCompoundWord(lex_mode=lex_mode_e.EXTGLOB) arms.append(w) read_word = True elif self.token_kind == Kind.Eof: self.AddErrorContext( 'Unexpected EOF reading extended glob that began here', token=left_token) return None else: raise AssertionError('Unexpected token %r' % self.cur_token) return part
def testVarOps(self): ev = InitEvaluator() # initializes x=xxx and y=yyy unset_sub = ast.BracedVarSub(ast.token(Id.VSub_Name, 'unset')) part_vals = [] ev._EvalWordPart(unset_sub, part_vals) print(part_vals) set_sub = ast.BracedVarSub(ast.token(Id.VSub_Name, 'x')) part_vals = [] ev._EvalWordPart(set_sub, part_vals) print(part_vals) # Now add some ops part = ast.LiteralPart(ast.token(Id.Lit_Chars, 'default')) arg_word = ast.CompoundWord([part]) test_op = ast.StringUnary(Id.VTest_ColonHyphen, arg_word) unset_sub.suffix_op = test_op set_sub.suffix_op = test_op part_vals = [] ev._EvalWordPart(unset_sub, part_vals) print(part_vals) part_vals = [] ev._EvalWordPart(set_sub, part_vals) print(part_vals)
def EvalPrompt(self, val): """Perform the two evaluations that bash does. Used by $PS1 and ${x@P}.""" if val.tag != value_e.Str: return DEFAULT_PS1 # no evaluation necessary try: tokens = self.tokens_cache[val.s] except KeyError: tokens = match.PS1_LEXER.Tokens(val.s) self.tokens_cache[val.s] = tokens # First replacements. TODO: Should we cache this too? ps1_str = self._ReplaceBackslashCodes(tokens) # The prompt is often constant, so we can avoid parsing it. # NOTE: This is copied from the PS4 logic in Tracer. try: ps1_word = self.parse_cache[ps1_str] except KeyError: w_parser = self.parse_ctx.MakeWordParserForPlugin( ps1_str, self.arena) try: ps1_word = w_parser.ReadPS() except Exception as e: error_str = '<ERROR: cannot parse PS1>' t = ast.token(Id.Lit_Chars, error_str, const.NO_INTEGER) ps1_word = ast.CompoundWord([ast.LiteralPart(t)]) self.parse_cache[ps1_str] = ps1_word # e.g. "${debian_chroot}\u" -> '\u' val2 = self.ex.word_ev.EvalWordToString(ps1_word) return val2.s
def EvalPrompt(self, val): """Perform the two evaluations that bash does. Used by $PS1 and ${x@P}.""" if val.tag != value_e.Str: return DEFAULT_PS1 # no evaluation necessary # Parse backslash escapes (cached) try: tokens = self.tokens_cache[val.s] except KeyError: tokens = list(match.PS1_LEXER.Tokens(val.s)) self.tokens_cache[val.s] = tokens # Replace values. ps1_str = self._ReplaceBackslashCodes(tokens) # Parse it like a double-quoted word (cached). # NOTE: This is copied from the PS4 logic in Tracer. try: ps1_word = self.parse_cache[ps1_str] except KeyError: w_parser = self.parse_ctx.MakeWordParserForPlugin( ps1_str, self.arena) try: ps1_word = w_parser.ReadPS() except Exception as e: error_str = '<ERROR: cannot parse PS1>' t = ast.token(Id.Lit_Chars, error_str, const.NO_INTEGER) ps1_word = ast.CompoundWord([ast.LiteralPart(t)]) self.parse_cache[ps1_str] = ps1_word # Evaluate, e.g. "${debian_chroot}\u" -> '\u' val2 = self.ex.word_ev.EvalWordToString(ps1_word) return val2.s
def ReadPS(self): """For $PS1, $PS4, etc. This is just like reading a here doc line. "\n" is allowed, as well as the typical substitutions ${x} $(echo hi) $((1 + 2)). """ w = ast.CompoundWord() self._ReadLikeDQ(None, w.parts) return w
def _MaybeReadHereDocs(self): for h in self.pending_here_docs: lines = [] #log('HERE %r' % h.here_end) while True: # If op is <<-, strip off all leading tabs (NOT spaces). # (in C++, just bump the start?) line_id, line = self.line_reader.GetLine() #print("LINE %r %r" % (line, h.here_end)) if not line: # EOF # An unterminated here doc is just a warning in bash. We make it # fatal because we want to be strict, and because it causes problems # reporting other errors. # Attribute it to the << in <<EOF for now. self.AddErrorContext('Unterminated here doc', span_id=h.spids[0]) return False # NOTE: Could do this runtime to preserve LST. if h.op_id == Id.Redir_DLessDash: line = line.lstrip('\t') if line.rstrip() == h.here_end: break lines.append((line_id, line)) parts = [] if h.do_expansion: # NOTE: We read all lines at once, instead of doing it line-by-line, # because of cases like this: # cat <<EOF # 1 $(echo 2 # echo 3) 4 # EOF from osh import parse_lib # Avoid circular import w_parser = parse_lib.MakeWordParserForHereDoc(lines, self.arena) word = w_parser.ReadHereDocBody() if not word: self.AddErrorContext( 'Error reading here doc body: %s', w_parser.Error()) return False h.body = word h.was_filled = True else: # Each line is a single span. TODO: Add span_id to token. tokens = [ ast.token(Id.Lit_Chars, line, const.NO_INTEGER) for _, line in lines] parts = [ast.LiteralPart(t) for t in tokens] h.body = ast.CompoundWord(parts) h.was_filled = True # No .clear() until Python 3.3. del self.pending_here_docs[:] return True
def BraceExpandWords(words): out = [] for w in words: if w.tag == word_e.BracedWordTree: parts_list = _BraceExpand(w.parts) out.extend(ast.CompoundWord(p) for p in parts_list) else: out.append(w) return out
def _EvalRedirect(self, n): fd = REDIR_DEFAULT_FD[n.op.id] if n.fd == const.NO_INTEGER else n.fd if n.tag == redir_e.Redir: redir_type = REDIR_ARG_TYPES[n.op.id] # could be static in the LST? if redir_type == redir_arg_type_e.Path: # NOTE: no globbing. You can write to a file called '*.py'. val = self.word_ev.EvalWordToString(n.arg_word) if val.tag != value_e.Str: # TODO: This error never fires util.error("Redirect filename must be a string, got %s", val) return None filename = val.s if not filename: # Whether this is fatal depends on errexit. util.error("Redirect filename can't be empty") return None return runtime.PathRedirect(n.op.id, fd, filename) elif redir_type == redir_arg_type_e.Desc: # e.g. 1>&2 val = self.word_ev.EvalWordToString(n.arg_word) if val.tag != value_e.Str: # TODO: This error never fires util.error("Redirect descriptor should be a string, got %s", val) return None t = val.s if not t: util.error("Redirect descriptor can't be empty") return None try: target_fd = int(t) except ValueError: util.error( "Redirect descriptor should look like an integer, got %s", val) return None return runtime.DescRedirect(n.op.id, fd, target_fd) elif redir_type == redir_arg_type_e.Here: # here word val = self.word_ev.EvalWordToString(n.arg_word) assert val.tag == value_e.Str, val # NOTE: bash and mksh both add \n return runtime.HereRedirect(fd, val.s + '\n') else: raise AssertionError('Unknown redirect op') elif n.tag == redir_e.HereDoc: # HACK: Wrap it in a word to evaluate. w = ast.CompoundWord(n.stdin_parts) val = self.word_ev.EvalWordToString(w) assert val.tag == value_e.Str, val return runtime.HereRedirect(fd, val.s) else: raise AssertionError('Unknown redirect type')
def testBraceExpand(self): w = _assertReadWord(self, 'hi') results = braces._BraceExpand(w.parts) self.assertEqual(1, len(results)) for parts in results: _PrettyPrint(ast.CompoundWord(parts)) print('') w = _assertReadWord(self, 'B-{a,b}-E') tree = braces._BraceDetect(w) self.assertEqual(3, len(tree.parts)) pprint(tree) results = braces._BraceExpand(tree.parts) self.assertEqual(2, len(results)) for parts in results: _PrettyPrint(ast.CompoundWord(parts)) print('') w = _assertReadWord(self, 'B-{a,={b,c,d}=,e}-E') tree = braces._BraceDetect(w) self.assertEqual(3, len(tree.parts)) pprint(tree) results = braces._BraceExpand(tree.parts) self.assertEqual(5, len(results)) for parts in results: _PrettyPrint(ast.CompoundWord(parts)) print('') w = _assertReadWord(self, 'B-{a,b}-{c,d}-E') tree = braces._BraceDetect(w) self.assertEqual(5, len(tree.parts)) pprint(tree) results = braces._BraceExpand(tree.parts) self.assertEqual(4, len(results)) for parts in results: _PrettyPrint(ast.CompoundWord(parts)) print('')
def _ReadArithWord(self): """Helper function for ReadArithWord.""" #assert self.token_type != Id.Undefined_Tok self._Peek() #print('_ReadArithWord', self.cur_token) if self.token_kind == Kind.Unknown: self.AddErrorContext("Unknown token in arith context: %s", self.cur_token, token=self.cur_token) return None, False elif self.token_kind == Kind.Eof: # Just return EOF token w = ast.TokenWord(self.cur_token) return w, False #self.AddErrorContext("Unexpected EOF in arith context: %s", # self.cur_token, token=self.cur_token) #return None, False elif self.token_kind == Kind.Ignored: # Space should be ignored. TODO: change this to SPACE_SPACE and # SPACE_NEWLINE? or SPACE_TOK. self._Next(lex_mode_e.ARITH) return None, True # Tell wrapper to try again elif self.token_kind in (Kind.Arith, Kind.Right): # Id.Right_ArithSub IS just a normal token, handled by ArithParser self._Next(lex_mode_e.ARITH) w = ast.TokenWord(self.cur_token) return w, False elif self.token_kind in (Kind.Lit, Kind.Left): w = self._ReadCompoundWord(lex_mode=lex_mode_e.ARITH) if not w: return None, True return w, False elif self.token_kind == Kind.VSub: part = ast.SimpleVarSub(self.cur_token) self._Next(lex_mode_e.ARITH) w = ast.CompoundWord([part]) return w, False else: self._BadToken("Unexpected token parsing arith sub: %s", self.cur_token) return None, False raise AssertionError("Shouldn't get here")
def TildeDetect(word): """Detect tilde expansion in a word. It might begin with LiteralPart that needs to be turned into a TildeSubPart. (It depends on whether the second token begins with slash). If so, it return a new word. Otherwise return None. NOTE: - The regex for Lit_TildeLike could be expanded. Right now it's conservative, like Lit_Chars without the /. - It's possible to write this in a mutating style, since only the first token is changed. But note that we CANNOT know this during lexing. """ # NOTE: BracedWordTree, EmptyWord, etc. can't be tilde expanded if word.tag != word_e.CompoundWord: return None assert word.parts, word part0 = word.parts[0] if _LiteralPartId(part0) != Id.Lit_TildeLike: return None if len(word.parts) == 1: # can't be zero tilde_part = ast.TildeSubPart(part0.token) return ast.CompoundWord([tilde_part]) part1 = word.parts[1] # NOTE: We could inspect the raw tokens. if _LiteralPartId(part1) == Id.Lit_Chars and part1.token.val.startswith( '/'): tilde_part = ast.TildeSubPart(part0.token) return ast.CompoundWord([tilde_part] + word.parts[1:]) # It could be something like '~foo:bar', which doesn't have a slash. return None
def testMultiLine(self): w_parser = InitWordParser("""\ ls foo # Multiple newlines and comments should be ignored ls bar """) print('--MULTI') w = w_parser.ReadWord(lex_mode_e.OUTER) parts = [ast.LiteralPart(ast.token(Id.Lit_Chars, 'ls'))] test_lib.AssertAsdlEqual(self, ast.CompoundWord(parts), w) w = w_parser.ReadWord(lex_mode_e.OUTER) parts = [ast.LiteralPart(ast.token(Id.Lit_Chars, 'foo'))] test_lib.AssertAsdlEqual(self, ast.CompoundWord(parts), w) w = w_parser.ReadWord(lex_mode_e.OUTER) t = ast.token(Id.Op_Newline, '\n') test_lib.AssertAsdlEqual(self, ast.TokenWord(t), w) w = w_parser.ReadWord(lex_mode_e.OUTER) parts = [ast.LiteralPart(ast.token(Id.Lit_Chars, 'ls'))] test_lib.AssertAsdlEqual(self, ast.CompoundWord(parts), w) w = w_parser.ReadWord(lex_mode_e.OUTER) parts = [ast.LiteralPart(ast.token(Id.Lit_Chars, 'bar'))] test_lib.AssertAsdlEqual(self, ast.CompoundWord(parts), w) w = w_parser.ReadWord(lex_mode_e.OUTER) t = ast.token(Id.Op_Newline, '\n') test_lib.AssertAsdlEqual(self, ast.TokenWord(t), w) w = w_parser.ReadWord(lex_mode_e.OUTER) t = ast.token(Id.Eof_Real, '') test_lib.AssertAsdlEqual(self, ast.TokenWord(t), w)
def ReadHereDocBody(self): """ Sort of like Read(), except we're in a double quoted context, but not using double quotes. Returns: CompoundWord. NOTE: We could also just use a DoubleQuotedPart for both cases? """ w = ast.CompoundWord() dq = self._ReadDoubleQuotedPart(here_doc=True) if not dq: self.AddErrorContext('Error parsing here doc body') return False w.parts.append(dq) return w
def LooksLikeAssignment(w): """Tests whether a word looks like FOO=bar. Returns: (string, CompoundWord) if it looks like FOO=bar False if it doesn't s=1 s+=1 s[x]=1 s[x]+=1 a=() a+=() a[x]=() a[x]+=() # Not valid because arrays can't be nested. NOTE: a[ and s[ might be parsed separately? """ assert w.tag == word_e.CompoundWord if len(w.parts) == 0: return False part0 = w.parts[0] if _LiteralPartId(part0) != Id.Lit_VarLike: return False s = part0.token.val assert s.endswith('=') if s[-2] == '+': op = assign_op_e.PlusEqual name = s[:-2] else: op = assign_op_e.Equal name = s[:-1] rhs = ast.CompoundWord() if len(w.parts) == 1: # This fake SingleQuotedPart is necesssary so that EmptyUnquoted elision # isn't applied. EMPTY= is like EMPTY=''. # TODO: This part doesn't have spids, so it might break some invariants. rhs.parts.append(ast.EmptyPart()) else: for p in w.parts[1:]: rhs.parts.append(p) return name, op, rhs
def _EvalPS4(self): """For set -x.""" val = self.mem.GetVar('PS4') assert val.tag == value_e.Str s = val.s if s: first_char, ps4 = s[0], s[1:] else: first_char, ps4 = '+', ' ' # default try: ps4_word = self.parse_cache[ps4] except KeyError: # We have to parse this at runtime. PS4 should usually remain constant. w_parser = parse_lib.MakeWordParserForPlugin(ps4, self.arena) # NOTE: Reading PS4 is just like reading a here doc line. "\n" is # allowed too. The OUTER mode would stop at spaces, and ReadWord # doesn't allow lex_mode_e.DQ. ps4_word = w_parser.ReadHereDocBody() if not ps4_word: error_str = '<ERROR: cannot parse PS4>' t = ast.token(Id.Lit_Chars, error_str, const.NO_INTEGER) ps4_word = ast.CompoundWord([ast.LiteralPart(t)]) self.parse_cache[ps4] = ps4_word #print(ps4_word) # TODO: Repeat first character according process stack depth. Where is # that stored? In the executor itself? It should be stored along with # the PID. Need some kind of ShellProcessState or something. # # We should come up with a better mechanism. Something like $PROC_INDENT # and $OIL_XTRACE_PREFIX. # TODO: Handle runtime errors! For example, you could PS4='$(( 1 / 0 ))' # <ERROR: cannot evaluate PS4> prefix = self.word_ev.EvalWordToString(ps4_word) return first_char, prefix.s
def _EvalPS4(self): """For set -x.""" val = self.mem.GetVar('PS4') assert val.tag == value_e.Str s = val.s if s: first_char, ps4 = s[0], s[1:] else: first_char, ps4 = '+', ' ' # default # NOTE: This cache is slightly broken because aliases are mutable! I think # thati s more or less harmless though. try: ps4_word = self.parse_cache[ps4] except KeyError: # We have to parse this at runtime. PS4 should usually remain constant. w_parser = self.parse_ctx.MakeWordParserForPlugin(ps4, self.arena) try: ps4_word = w_parser.ReadPS() except util.ParseError as e: error_str = '<ERROR: cannot parse PS4>' t = ast.token(Id.Lit_Chars, error_str, const.NO_INTEGER) ps4_word = ast.CompoundWord([ast.LiteralPart(t)]) self.parse_cache[ps4] = ps4_word #print(ps4_word) # TODO: Repeat first character according process stack depth. Where is # that stored? In the executor itself? It should be stored along with # the PID. Need some kind of ShellProcessState or something. # # We should come up with a better mechanism. Something like $PROC_INDENT # and $OIL_XTRACE_PREFIX. # TODO: Handle runtime errors! For example, you could PS4='$(( 1 / 0 ))' # <ERROR: cannot evaluate PS4> prefix = self.word_ev.EvalWordToString(ps4_word) return first_char, prefix.s
def _ReadArithWord(self): """Helper function for ReadArithWord.""" self._Peek() if self.token_kind == Kind.Unknown: p_die('Unexpected token in arithmetic context', token=self.cur_token) elif self.token_kind == Kind.Eof: # Just return EOF token w = ast.TokenWord(self.cur_token) return w, False elif self.token_kind == Kind.Ignored: # Space should be ignored. TODO: change this to SPACE_SPACE and # SPACE_NEWLINE? or SPACE_TOK. self._Next(lex_mode_e.ARITH) return None, True # Tell wrapper to try again elif self.token_kind in (Kind.Arith, Kind.Right): # Id.Right_ArithSub IS just a normal token, handled by ArithParser self._Next(lex_mode_e.ARITH) w = ast.TokenWord(self.cur_token) return w, False elif self.token_kind in (Kind.Lit, Kind.Left): w = self._ReadCompoundWord(lex_mode=lex_mode_e.ARITH) return w, False elif self.token_kind == Kind.VSub: part = ast.SimpleVarSub(self.cur_token) self._Next(lex_mode_e.ARITH) w = ast.CompoundWord([part]) return w, False else: assert False, ("Unexpected token parsing arith sub: %s" % self.cur_token) raise AssertionError("Shouldn't get here")
def _ReadCompoundWord(self, eof_type=Id.Undefined_Tok, lex_mode=lex_mode_e.OUTER, empty_ok=True): """ Precondition: Looking at the first token of the first word part Postcondition: Looking at the token after, e.g. space or operator NOTE: eof_type is necessary because / is a literal, i.e. Lit_Slash, but it could be an operator delimiting a compound word. Can we change lexer modes and remove this special case? """ #print('_ReadCompoundWord', lex_mode) word = ast.CompoundWord() num_parts = 0 done = False while not done: allow_done = empty_ok or num_parts != 0 self._Peek() #print('CW',self.cur_token) if allow_done and self.token_type == eof_type: done = True # e.g. for ${foo//pat/replace} # Keywords like "for" are treated like literals elif self.token_kind in (Kind.Lit, Kind.KW, Kind.Assign, Kind.ControlFlow, Kind.BoolUnary, Kind.BoolBinary): if self.token_type == Id.Lit_EscapedChar: part = ast.EscapedLiteralPart(self.cur_token) else: part = ast.LiteralPart(self.cur_token) #part.xspans.append(self.cur_token.span_id) word.parts.append(part) if self.token_type == Id.Lit_VarLike: #print('@', self.cursor) #print('@', self.cur_token) t = self.lexer.LookAhead(lex_mode_e.OUTER) if t.id == Id.Op_LParen: self.lexer.PushHint(Id.Op_RParen, Id.Right_ArrayLiteral) part2 = self._ReadArrayLiteralPart() if not part2: self.AddErrorContext( '_ReadArrayLiteralPart failed') return False word.parts.append(part2) elif self.token_kind == Kind.VSub: part = ast.SimpleVarSub(self.cur_token) word.parts.append(part) elif self.token_kind == Kind.ExtGlob: part = self._ReadExtGlobPart() if not part: return None word.parts.append(part) elif self.token_kind == Kind.Left: #print('_ReadLeftParts') part = self._ReadLeftParts() if not part: return None word.parts.append(part) # NOT done yet, will advance below elif self.token_kind == Kind.Right: # Still part of the word; will be done on the next iter. if self.token_type == Id.Right_DoubleQuote: pass elif self.token_type == Id.Right_CommandSub: pass elif self.token_type == Id.Right_Subshell: # LEXER HACK for (case x in x) ;; esac ) assert self.next_lex_mode is None # Rewind before it's used if self.lexer.MaybeUnreadOne(): self.lexer.PushHint(Id.Op_RParen, Id.Right_Subshell) self._Next(lex_mode) done = True else: done = True elif self.token_kind == Kind.Ignored: done = True else: # LEXER HACK for unbalanced case clause. 'case foo in esac' is valid, # so to test for ESAC, we can read ) before getting a chance to # PushHint(Id.Op_RParen, Id.Right_CasePat). So here we unread one # token and do it again. # We get Id.Op_RParen at top level: case x in x) ;; esac # We get Id.Eof_RParen inside ComSub: $(case x in x) ;; esac ) if self.token_type in (Id.Op_RParen, Id.Eof_RParen): assert self.next_lex_mode is None # Rewind before it's used if self.lexer.MaybeUnreadOne(): if self.token_type == Id.Eof_RParen: # Redo translation self.lexer.PushHint(Id.Op_RParen, Id.Eof_RParen) self._Next(lex_mode) done = True # anything we don't recognize means we're done if not done: self._Next(lex_mode) num_parts += 1 return word
def _BraceDetect(w): """ Args: CompoundWord Returns: CompoundWord or None? Another option: Grammar: # an alternative is a literal, possibly empty, or another brace_expr part = <any part except LiteralPart> alt = part* | brace_expr # a brace_expr is group of at least 2 braced and comma-separated # alternatives, with optional prefix and suffix. brace_expr = part* '{' alt ',' alt (',' alt)* '}' part* Problem this grammar: it's not LL(1) Is it indirect left-recursive? What's the best way to handle it? LR(1) parser? Iterative algorithm: Parse it with a stack? It's a stack that asserts there is at least one , in between {} Yeah just go through and when you see {, push another list. When you get , append to list When you get } and at least one ',', appendt o list When you get } without, then pop If there is no matching }, then abort with error if not balanced, return error too? """ # Errors: # }a{ - stack depth dips below 0 # {a,b}{ - Stack depth doesn't end at 0 # {a} - no comma, and also not an numeric range cur_parts = [] stack = [] found = False for i, part in enumerate(w.parts): append = True if part.tag == word_part_e.LiteralPart: id_ = part.token.id if id_ == Id.Lit_LBrace: # Save prefix parts. Start new parts list. new_frame = _StackFrame(cur_parts) stack.append(new_frame) cur_parts = [] append = False found = True # assume found, but can early exit with None later elif id_ == Id.Lit_Comma: # Append a new alternative. #print('*** Appending after COMMA', cur_parts) # NOTE: Should we allow this: # ,{a,b} # or force this: # \,{a,b} # ? We're forcing braces right now but not commas. if stack: stack[-1].saw_comma = True stack[-1].alt_part.words.append( ast.CompoundWord(cur_parts)) cur_parts = [] # clear append = False elif id_ == Id.Lit_RBrace: # TODO: # - Detect lack of , -- abort the whole thing # - Detect {1..10} and {1..10..2} # - bash and zsh only -- this is NOT implemented by mksh # - Use a regex on the middle part: # - digit+ '..' digit+ ( '..' digit+ )? # - Char ranges are bash only! # # ast.BracedIntRangePart() # ast.CharRangePart() if not stack: # e.g. echo } -- unbalancd { return None if not stack[-1].saw_comma: # {foo} is not a real alternative return None stack[-1].alt_part.words.append(ast.CompoundWord(cur_parts)) frame = stack.pop() cur_parts = frame.cur_parts cur_parts.append(frame.alt_part) append = False if append: cur_parts.append(part) if len(stack) != 0: return None if found: return ast.BracedWordTree(cur_parts) else: return None
def TildeDetect(word): """Detect tilde expansion. If it needs to include a TildeSubPart, return a new word. Otherwise return None. NOTE: This algorithm would be a simpler if 1. We could assume some regex for user names. 2. We didn't need to do brace expansion first, like {~foo,~bar} OR - If Lit_Slash were special (it is in the VAROP states, but not OUTER state). We could introduce another lexer mode after you hit Lit_Tilde? So we have to scan all LiteralPart instances until they contain a '/'. http://unix.stackexchange.com/questions/157426/what-is-the-regex-to-validate-linux-users "It is usually recommended to only use usernames that begin with a lower case letter or an underscore, followed by lower case letters, digits, underscores, or dashes. They can end with a dollar sign. In regular expression terms: [a-z_][a-z0-9_-]*[$]? On Debian, the only constraints are that usernames must neither start with a dash ('-') nor contain a colon (':') or a whitespace (space: ' ', end of line: '\n', tabulation: '\t', etc.). Note that using a slash ('/') may break the default algorithm for the definition of the user's home directory. """ if not word.parts: return None part0 = word.parts[0] if _LiteralPartId(part0) != Id.Lit_Tilde: return None prefix = '' found_slash = False # search for the next / for i in range(1, len(word.parts)): # Not a literal part, and we did NOT find a slash. So there is no # TildeSub applied. This would be something like ~X$var, ~$var, # ~$(echo), etc.. The slash is necessary. if word.parts[i].tag != word_part_e.LiteralPart: return None val = word.parts[i].token.val p = val.find('/') if p == -1: # no slash yet prefix += val elif p >= 0: # e.g. for ~foo!bar/baz, extract "bar" # NOTE: requires downcast to LiteralPart pre, post = val[:p], val[p:] prefix += pre tilde_part = ast.TildeSubPart(prefix) # NOTE: no span_id here. It would be nicer to use a different algorithm # that didn't require this. t = ast.token(Id.Lit_Chars, post, const.NO_INTEGER) remainder_part = ast.LiteralPart(t) found_slash = True break w = ast.CompoundWord() if found_slash: w.parts.append(tilde_part) w.parts.append(remainder_part) j = i + 1 while j < len(word.parts): w.parts.append(word.parts[j]) j += 1 else: # The whole thing is a tilde sub, e.g. ~foo or ~foo!bar w.parts.append(ast.TildeSubPart(prefix)) return w