def _MakeAssignment(parse_ctx, assign_kw, suffix_words): """Create an command.Assignment node from a keyword and a list of words. NOTE: We don't allow dynamic assignments like: local $1 This can be replaced with eval 'local $1' """ # First parse flags, e.g. -r -x -a -A. None of the flags have arguments. flags = [] n = len(suffix_words) i = 1 while i < n: w = suffix_words[i] ok, static_val, quoted = word.StaticEval(w) if not ok or quoted: break # can't statically evaluate if static_val.startswith('-'): flags.append(static_val) else: break # not a flag, rest are args i += 1 # Now parse bindings or variable names pairs = [] while i < n: w = suffix_words[i] # declare x[y]=1 is valid left_token, close_token, part_offset = word.DetectAssignment(w) if left_token: pair = _MakeAssignPair(parse_ctx, (left_token, close_token, part_offset, w)) else: # In aboriginal in variables/sources: export_if_blank does export "$1". # We should allow that. # Parse this differently then? # dynamic-export? It sets global # variables. ok, static_val, quoted = word.StaticEval(w) if not ok or quoted: p_die("Variable names must be unquoted constants", word=w) # No value is equivalent to '' if not match.IsValidVarName(static_val): p_die('Invalid variable name %r', static_val, word=w) lhs = lhs_expr.LhsName(static_val) lhs.spids.append(word.LeftMostSpanForWord(w)) pair = syntax_asdl.assign_pair(lhs, assign_op_e.Equal, None) left_spid = word.LeftMostSpanForWord(w) pair.spids.append(left_spid) pairs.append(pair) i += 1 node = command.Assignment(assign_kw, flags, pairs) return node
def _Visit(self, node): """ """ #log('VISIT %s', node.__class__.__name__) # NOTE: The tags are not unique!!! We would need this: # if isinstance(node, ast.command) and node.tag == command_e.SimpleCommand: # But it's easier to check the __class__ attribute. cls = node.__class__ if cls is command.SimpleCommand: #log('SimpleCommand %s', node.words) #log('--') #node.PrettyPrint() # Things to consider: # - source and . # - DONE builtins: get a list from builtin.py # - DONE functions: have to enter function definitions into a dictionary # - Commands that call others: sudo, su, find, xargs, etc. # - builtins that call others: exec, command # - except not command -v! if not node.words: return w = node.words[0] ok, argv0, _ = word.StaticEval(w) if not ok: log("Couldn't statically evaluate %r", w) return if (builtin.ResolveSpecial(argv0) == builtin_e.NONE and builtin.ResolveAssign(argv0) == builtin_e.NONE and builtin.Resolve(argv0) == builtin_e.NONE): self.progs_used[argv0] = True # NOTE: If argv1 is $0, then we do NOT print a warning! if argv0 == 'sudo': if len(node.words) < 2: return w1 = node.words[1] ok, argv1, _ = word.StaticEval(w1) if not ok: log("Couldn't statically evaluate %r", w) return # Should we mark them behind 'sudo'? e.g. "sudo apt install"? self.progs_used[argv1] = True elif cls is command.FuncDef: self.funcs_defined[node.name] = True
def _ParseHereDocBody(parse_ctx, h, line_reader, arena): """Fill in attributes of a pending here doc node.""" # "If any character in word is quoted, the delimiter shall be formed by # performing quote removal on word, and the here-document lines shall not # be expanded. Otherwise, the delimiter shall be the word itself." # NOTE: \EOF counts, or even E\OF ok, delimiter, delim_quoted = word.StaticEval(h.here_begin) if not ok: p_die('Invalid here doc delimiter', word=h.here_begin) here_lines, last_line = _ReadHereLines(line_reader, h, delimiter) if delim_quoted: # << 'EOF' # LiteralPart for each line. h.stdin_parts = _MakeLiteralHereLines(here_lines, arena) else: line_reader = reader.VirtualLineReader(here_lines, arena) w_parser = parse_ctx.MakeWordParserForHereDoc(line_reader) w_parser.ReadHereDocBody(h.stdin_parts) # fills this in end_line_id, end_line, end_pos = last_line # Create a span with the end terminator. Maintains the invariant that # the spans "add up". line_span = syntax_asdl.line_span(end_line_id, end_pos, len(end_line)) h.here_end_span_id = arena.AddLineSpan(line_span)
def testStaticEvalWord(self): expr = r'\EOF' # Quoted here doc delimiter w_parser = _InitWordParser(expr) w = w_parser.ReadWord(lex_mode_e.Outer) ok, s, quoted = word.StaticEval(w) self.assertEqual(True, ok) self.assertEqual('EOF', s) self.assertEqual(True, quoted)
def _ReadPatSubVarOp(self, lex_mode): # type: (lex_mode_t) -> suffix_op__PatSub """ Match = ('/' | '#' | '%') WORD VarSub = ... | VarOf '/' Match '/' WORD """ pat = self._ReadVarOpArg(lex_mode, eof_type=Id.Lit_Slash, empty_ok=False) assert isinstance(pat, word__CompoundWord) # Because empty_ok=False if len(pat.parts) == 1: ok, s, quoted = word.StaticEval(pat) if ok and s == '/' and not quoted: # Looks like ${a////c}, read again self._Next(lex_mode) self._Peek() p = word_part.LiteralPart(self.cur_token) pat.parts.append(p) if len(pat.parts) == 0: p_die('Pattern in ${x/pat/replace} must not be empty', token=self.cur_token) replace_mode = Id.Undefined_Tok # Check for / # % modifier on pattern. first_part = pat.parts[0] if isinstance(first_part, word_part__LiteralPart): lit_id = first_part.token.id if lit_id in (Id.Lit_Slash, Id.Lit_Pound, Id.Lit_Percent): pat.parts.pop(0) replace_mode = lit_id # NOTE: If there is a modifier, the pattern can be empty, e.g. # ${s/#/foo} and ${a/%/foo}. if self.token_type == Id.Right_VarSub: # e.g. ${v/a} is the same as ${v/a/} -- empty replacement string return suffix_op.PatSub(pat, None, replace_mode) if self.token_type == Id.Lit_Slash: replace = self._ReadVarOpArg(lex_mode) # do not stop at / self._Peek() if self.token_type != Id.Right_VarSub: # NOTE: I think this never happens. # We're either in the VS_ARG_UNQ or VS_ARG_DQ lex state, and everything # there is Lit_ or Left_, except for }. p_die("Expected } after replacement string, got %s", self.cur_token, token=self.cur_token) return suffix_op.PatSub(pat, replace, replace_mode) # Happens with ${x//} and ${x///foo}, see test/parse-errors.sh p_die("Expected } after pat sub, got %r", self.cur_token.val, token=self.cur_token)
def testGitComment(self): # ;# is a comment! Gah. # Conclusion: Comments are NOT LEXICAL. They are part of word parsing. node = assert_ParseCommandList( self, """\ . "$TEST_DIRECTORY"/diff-lib.sh ;# test-lib chdir's into trash """) self.assertEqual(command_e.Sentence, node.tag) self.assertEqual(2, len(node.child.words)) # This is NOT a comment node = assert_ParseCommandList(self, """\ echo foo#bar """) self.assertEqual(command_e.SimpleCommand, node.tag) self.assertEqual(2, len(node.words)) _, s, _ = word.StaticEval(node.words[1]) self.assertEqual('foo#bar', s) # This is a comment node = assert_ParseCommandList(self, """\ echo foo #comment """) self.assertEqual(command_e.SimpleCommand, node.tag) self.assertEqual(2, len(node.words)) _, s, _ = word.StaticEval(node.words[1]) self.assertEqual('foo', s) # Empty comment node = assert_ParseCommandList(self, """\ echo foo # """) self.assertEqual(command_e.SimpleCommand, node.tag) self.assertEqual(2, len(node.words)) _, s, _ = word.StaticEval(node.words[1]) self.assertEqual('foo', s)
def _ParseForEachLoop(self): node = command.ForEach() node.do_arg_iter = False ok, iter_name, quoted = word.StaticEval(self.cur_word) if not ok or quoted: p_die("Loop variable name should be a constant", word=self.cur_word) if not match.IsValidVarName(iter_name): p_die("Invalid loop variable name", word=self.cur_word) node.iter_name = iter_name self._Next() # skip past name self._NewlineOk() in_spid = const.NO_INTEGER semi_spid = const.NO_INTEGER self._Peek() if self.c_id == Id.KW_In: self._Next() # skip in in_spid = word.LeftMostSpanForWord(self.cur_word) + 1 iter_words, semi_spid = self.ParseForWords() assert iter_words is not None words2 = braces.BraceDetectAll(iter_words) words3 = word.TildeDetectAll(words2) node.iter_words = words3 elif self.c_id == Id.Op_Semi: node.do_arg_iter = True # implicit for loop self._Next() elif self.c_id == Id.KW_Do: node.do_arg_iter = True # implicit for loop # do not advance else: # for foo BAD p_die('Unexpected word after for loop variable', word=self.cur_word) node.spids.extend((in_spid, semi_spid)) body_node = self.ParseDoGroup() assert body_node is not None node.body = body_node return node
def DoCommand(self, node, local_symbols, at_top_level=False): if node.tag == command_e.CommandList: # TODO: How to distinguish between echo hi; echo bye; and on separate # lines for child in node.children: self.DoCommand(child, local_symbols, at_top_level=at_top_level) elif node.tag == command_e.SimpleCommand: # How to preserve spaces between words? Do you want to do it? # Well you need to test this: # # echo foo \ # bar # TODO: Need to print until the left most part of the phrase? the phrase # is a word, binding, redirect. #self.cursor.PrintUntil() if node.more_env: (left_spid, ) = node.more_env[0].spids self.cursor.PrintUntil(left_spid) self.f.write('env ') # We only need to transform the right side, not left side. for pair in node.more_env: self.DoWordInCommand(pair.val, local_symbols) # More translations: # - . to source # - eval to sh-eval if node.words: first_word = node.words[0] ok, val, quoted = word.StaticEval(first_word) word0_spid = word.LeftMostSpanForWord(first_word) if ok and not quoted: if val == '[': last_word = node.words[-1] # Check if last word is ] ok, val, quoted = word.StaticEval(last_word) if ok and not quoted and val == ']': # Replace [ with 'test' self.cursor.PrintUntil(word0_spid) self.cursor.SkipUntil(word0_spid + 1) self.f.write('test') for w in node.words[1:-1]: self.DoWordInCommand(w, local_symbols) # Now omit ] last_spid = word.LeftMostSpanForWord(last_word) self.cursor.PrintUntil(last_spid - 1) # Get the space before self.cursor.SkipUntil(last_spid + 1) # ] takes one spid return else: raise RuntimeError('Got [ without ]') elif val == '.': self.cursor.PrintUntil(word0_spid) self.cursor.SkipUntil(word0_spid + 1) self.f.write('source') return for w in node.words: self.DoWordInCommand(w, local_symbols) # NOTE: This will change to "phrase"? Word or redirect. for r in node.redirects: self.DoRedirect(r, local_symbols) # TODO: Print the terminator. Could be \n or ; # Need to print env like PYTHONPATH = 'foo' && ls # Need to print redirects: # < > are the same. << is here string, and >> is assignment. # append is >+ # TODO: static_eval of simple command # - [ -> "test". Eliminate trailing ]. # - . -> source, etc. elif node.tag == command_e.Assignment: self.DoAssignment(node, at_top_level, local_symbols) elif node.tag == command_e.Pipeline: # Obscure: |& turns into |- or |+ for stderr. # TODO: # if ! true; then -> if not true { # if ! echo | grep; then -> if not { echo | grep } { # } # not is like do {}, but it negates the return value I guess. for child in node.children: self.DoCommand(child, local_symbols) elif node.tag == command_e.AndOr: for child in node.children: self.DoCommand(child, local_symbols) elif node.tag == command_e.Sentence: # 'ls &' to 'fork ls' # Keep ; the same. self.DoCommand(node.child, local_symbols) # This has to be different in the function case. elif node.tag == command_e.BraceGroup: # { echo hi; } -> do { echo hi } # For now it might be OK to keep 'do { echo hi; } #left_spid, right_spid = node.spids (left_spid, ) = node.spids self.cursor.PrintUntil(left_spid) self.cursor.SkipUntil(left_spid + 1) self.f.write('do {') for child in node.children: self.DoCommand(child, local_symbols) elif node.tag == command_e.Subshell: # (echo hi) -> shell echo hi # (echo hi; echo bye) -> shell {echo hi; echo bye} (left_spid, right_spid) = node.spids self.cursor.PrintUntil(left_spid) self.cursor.SkipUntil(left_spid + 1) self.f.write('shell {') self.DoCommand(node.command_list, local_symbols) #self._DebugSpid(right_spid) #self._DebugSpid(right_spid + 1) #print('RIGHT SPID', right_spid) self.cursor.PrintUntil(right_spid) self.cursor.SkipUntil(right_spid + 1) self.f.write('}') elif node.tag == command_e.DParen: # (( a == 0 )) is sh-expr ' a == 0 ' # # NOTE: (( n++ )) is auto-translated to sh-expr 'n++', but could be set # n++. left_spid, right_spid = node.spids self.cursor.PrintUntil(left_spid) self.cursor.SkipUntil(left_spid + 1) self.f.write("sh-expr '") self.cursor.PrintUntil(right_spid - 1) # before )) self.cursor.SkipUntil(right_spid + 1) # after )) -- each one is a token self.f.write("'") elif node.tag == command_e.DBracket: # [[ 1 -eq 2 ]] to (1 == 2) self.DoBoolExpr(node.expr) elif node.tag == command_e.FuncDef: # TODO: skip name #self.f.write('proc %s' % node.name) # New symbol table for every function. new_local_symbols = {} # Should be the left most span, including 'function' self.cursor.PrintUntil(node.spids[0]) self.f.write('proc ') self.f.write(node.name) self.cursor.SkipUntil(node.spids[1]) if node.body.tag == command_e.BraceGroup: # Don't add "do" like a standalone brace group. Just use {}. for child in node.body.children: self.DoCommand(child, new_local_symbols) else: pass # Add {}. # proc foo { # shell {echo hi; echo bye} # } #self.DoCommand(node.body) elif node.tag == command_e.BraceGroup: for child in node.children: self.DoCommand(child, local_symbols) elif node.tag == command_e.DoGroup: do_spid, done_spid = node.spids self.cursor.PrintUntil(do_spid) self.cursor.SkipUntil(do_spid + 1) self.f.write('{') for child in node.children: self.DoCommand(child, local_symbols) self.cursor.PrintUntil(done_spid) self.cursor.SkipUntil(done_spid + 1) self.f.write('}') elif node.tag == command_e.ForEach: # Need to preserve spaces between words, because there can be line # wrapping. # for x in a b c \ # d e f; do in_spid, semi_spid = node.spids if in_spid == const.NO_INTEGER: #self.cursor.PrintUntil() # 'for x' and then space self.f.write('for %s in @Argv ' % node.iter_name) self.cursor.SkipUntil(node.body.spids[0]) else: self.cursor.PrintUntil(in_spid + 1) # 'for x in' and then space self.f.write('[') for w in node.iter_words: self.DoWordInCommand(w, local_symbols) self.f.write(']') #print("SKIPPING SEMI %d" % semi_spid, file=sys.stderr) if semi_spid != const.NO_INTEGER: self.cursor.PrintUntil(semi_spid) self.cursor.SkipUntil(semi_spid + 1) self.DoCommand(node.body, local_symbols) elif node.tag == command_e.ForExpr: # Change (( )) to ( ), and then _FixDoGroup pass elif node.tag == command_e.WhileUntil: # Skip 'until', and replace it with 'while not' if node.keyword.id == Id.KW_Until: kw_spid = node.keyword.span_id self.cursor.PrintUntil(kw_spid) self.f.write('while not') self.cursor.SkipUntil(kw_spid + 1) cond = node.cond # Skip the semi-colon in the condition, which is ususally a Sentence if len(cond) == 1 and cond[0].tag == command_e.Sentence: self.DoCommand(cond[0].child, local_symbols) semi_spid = cond[0].terminator.span_id self.cursor.SkipUntil(semi_spid + 1) self.DoCommand(node.body, local_symbols) elif node.tag == command_e.If: else_spid, fi_spid = node.spids # if foo; then -> if foo { # elif foo; then -> } elif foo { for arm in node.arms: elif_spid, then_spid = arm.spids if elif_spid != const.NO_INTEGER: self.cursor.PrintUntil(elif_spid) self.f.write('} ') cond = arm.cond if len(cond) == 1 and cond[0].tag == command_e.Sentence: sentence = cond[0] self.DoCommand(sentence, local_symbols) # Remove semi-colon semi_spid = sentence.terminator.span_id self.cursor.PrintUntil(semi_spid) self.cursor.SkipUntil(semi_spid + 1) else: for child in arm.cond: self.DoCommand(child, local_symbols) self.cursor.PrintUntil(then_spid) self.cursor.SkipUntil(then_spid + 1) self.f.write('{') for child in arm.action: self.DoCommand(child, local_symbols) # else -> } else { if node.else_action: self.cursor.PrintUntil(else_spid) self.f.write('} ') self.cursor.PrintUntil(else_spid + 1) self.f.write(' {') for child in node.else_action: self.DoCommand(child, local_symbols) # fi -> } self.cursor.PrintUntil(fi_spid) self.cursor.SkipUntil(fi_spid + 1) self.f.write('}') elif node.tag == command_e.Case: case_spid, in_spid, esac_spid = node.spids self.cursor.PrintUntil(case_spid) self.cursor.SkipUntil(case_spid + 1) self.f.write('match') # Reformat "$1" to $1 self.DoWordInCommand(node.to_match, local_symbols) self.cursor.PrintUntil(in_spid) self.cursor.SkipUntil(in_spid + 1) self.f.write('{') # matchstr $var { # each arm needs the ) and the ;; node to skip over? for arm in node.arms: left_spid, rparen_spid, dsemi_spid, last_spid = arm.spids #print(left_spid, rparen_spid, dsemi_spid) self.cursor.PrintUntil(left_spid) # Hm maybe keep | because it's semi-deprecated? You acn use # reload|force-relaod { # } # e/reload|force-reload/ { # } # / 'reload' or 'force-reload' / { # } # # Yeah it's the more abbreviated syntax. # change | to 'or' for pat in arm.pat_list: pass self.f.write('with ') # Remove the ) self.cursor.PrintUntil(rparen_spid) self.cursor.SkipUntil(rparen_spid + 1) for child in arm.action: self.DoCommand(child, local_symbols) if dsemi_spid != const.NO_INTEGER: # Remove ;; self.cursor.PrintUntil(dsemi_spid) self.cursor.SkipUntil(dsemi_spid + 1) elif last_spid != const.NO_INTEGER: self.cursor.PrintUntil(last_spid) else: raise AssertionError( "Expected with dsemi_spid or last_spid in case arm") self.cursor.PrintUntil(esac_spid) self.cursor.SkipUntil(esac_spid + 1) self.f.write('}') # strmatch $var { elif node.tag == command_e.NoOp: pass elif node.tag == command_e.ControlFlow: # No change for break / return / continue pass elif node.tag == command_e.TimeBlock: self.DoCommand(node.pipeline, local_symbols) else: #log('Command not handled: %s', node) raise AssertionError(node.__class__.__name__)
def DoRedirect(self, node, local_symbols): #print(node, file=sys.stderr) op_spid = node.op.span_id op_id = node.op.id self.cursor.PrintUntil(op_spid) # TODO: # - Do < and <& the same way. # - How to handle here docs and here docs? # - >> becomes >+ or >-, or maybe >>> if node.tag == redir_e.Redir: if node.fd == const.NO_INTEGER: if op_id == Id.Redir_Great: self.f.write('>') # Allow us to replace the operator self.cursor.SkipUntil(op_spid + 1) elif op_id == Id.Redir_GreatAnd: self.f.write('> !') # Replace >& 2 with > !2 spid = word.LeftMostSpanForWord(node.arg_word) self.cursor.SkipUntil(spid) #self.DoWordInCommand(node.arg_word) else: # NOTE: Spacing like !2>err.txt vs !2 > err.txt can be done in the # formatter. self.f.write('!%d ' % node.fd) if op_id == Id.Redir_Great: self.f.write('>') self.cursor.SkipUntil(op_spid + 1) elif op_id == Id.Redir_GreatAnd: self.f.write('> !') # Replace 1>& 2 with !1 > !2 spid = word.LeftMostSpanForWord(node.arg_word) self.cursor.SkipUntil(spid) self.DoWordInCommand(node.arg_word, local_symbols) elif node.tag == redir_e.HereDoc: ok, delimiter, delim_quoted = word.StaticEval(node.here_begin) if not ok: p_die('Invalid here doc delimiter', word=node.here_begin) # Turn everything into <<. We just change the quotes self.f.write('<<') #here_begin_spid2 = word.RightMostSpanForWord(node.here_begin) if delim_quoted: self.f.write(" '''") else: self.f.write(' """') delim_end_spid = word.RightMostSpanForWord(node.here_begin) self.cursor.SkipUntil(delim_end_spid + 1) #self.cursor.SkipUntil(here_begin_spid + 1) # Now print the lines. TODO: Have a flag to indent these to the level of # the owning command, e.g. # cat <<EOF # EOF # Or since most here docs are the top level, you could just have a hack # for a fixed indent? TODO: Look at real use cases. for part in node.stdin_parts: self.DoWordPart(part, local_symbols) self.cursor.SkipUntil(node.here_end_span_id + 1) if delim_quoted: self.f.write("'''\n") else: self.f.write('"""\n') # Need #self.cursor.SkipUntil(here_end_spid2) else: raise AssertionError(node.__class__.__name__) # <<< 'here word' # << 'here word' # # 2> out.txt # !2 > out.txt # cat 1<< EOF # hello $name # EOF # cat !1 << """ # hello $name # """ # # cat << 'EOF' # no expansion # EOF # cat <<- 'EOF' # no expansion and indented # # cat << ''' # no expansion # ''' # cat << ''' # no expansion and indented # ''' # Warn about multiple here docs on a line. # As an obscure feature, allow # cat << \'ONE' << \"TWO" # 123 # ONE # 234 # TWO # The _ is an indicator that it's not a string to be piped in. pass
def ParseSimpleCommand(self, cur_aliases): """ Fixed transcription of the POSIX grammar (TODO: port to grammar/Shell.g) io_file : '<' filename | LESSAND filename ... io_here : DLESS here_end | DLESSDASH here_end redirect : IO_NUMBER (io_redirect | io_here) prefix_part : ASSIGNMENT_WORD | redirect cmd_part : WORD | redirect assign_kw : Declare | Export | Local | Readonly # Without any words it is parsed as a command, not an assigment assign_listing : assign_kw # Now we have something to do (might be changing assignment flags too) # NOTE: any prefixes should be a warning, but they are allowed in shell. assignment : prefix_part* assign_kw (WORD | ASSIGNMENT_WORD)+ # an external command, a function call, or a builtin -- a "word_command" word_command : prefix_part* cmd_part+ simple_command : assign_listing | assignment | proc_command Simple imperative algorithm: 1) Read a list of words and redirects. Append them to separate lists. 2) Look for the first non-assignment word. If it's declare, etc., then keep parsing words AND assign words. Otherwise, just parse words. 3) If there are no non-assignment words, then it's a global assignment. { redirects, global assignments } OR { redirects, prefix_bindings, words } OR { redirects, ERROR_prefix_bindings, keyword, assignments, words } THEN CHECK that prefix bindings don't have any array literal parts! global assignment and keyword assignments can have the of course. well actually EXPORT shouldn't have them either -- WARNING 3 cases we want to warn: prefix_bindings for assignment, and array literal in prefix bindings, or export A command can be an assignment word, word, or redirect on its own. ls >out.txt >out.txt FOO=bar # this touches the file, and hten Or any sequence: ls foo bar <in.txt ls foo bar >out.txt <in.txt ls >out.txt foo bar Or add one or more environment bindings: VAR=val env >out.txt VAR=val env here_end vs filename is a matter of whether we test that it's quoted. e.g. <<EOF vs <<'EOF'. """ result = self._ScanSimpleCommand() redirects, words = result if not words: # e.g. >out.txt # redirect without words node = command.SimpleCommand() node.redirects = redirects return node preparsed_list, suffix_words = _SplitSimpleCommandPrefix(words) if not suffix_words: # ONE=1 a[x]=1 TWO=2 (with no other words) if redirects: left_token, _, _, _ = preparsed_list[0] p_die("Global assignment shouldn't have redirects", token=left_token) pairs = [] for preparsed in preparsed_list: pairs.append(_MakeAssignPair(self.parse_ctx, preparsed)) node = command.Assignment(Id.Assign_None, [], pairs) left_spid = word.LeftMostSpanForWord(words[0]) node.spids.append(left_spid) # no keyword spid to skip past return node kind, kw_token = word.KeywordToken(suffix_words[0]) if kind == Kind.Assign: # Here we StaticEval suffix_words[1] to see if we have an ASSIGNMENT COMMAND # like 'typeset -p', which lists variables -- a SimpleCommand rather than # an Assignment. # # Note we're not handling duplicate flags like 'typeset -pf'. I see this # in bashdb (bash debugger) but it can just be changed to 'typeset -p # -f'. is_command = False if len(suffix_words) > 1: ok, val, _ = word.StaticEval(suffix_words[1]) if ok and (kw_token.id, val) in self._ASSIGN_COMMANDS: is_command = True if is_command: # declare -f, declare -p, typeset -p, etc. node = _MakeSimpleCommand(preparsed_list, suffix_words, redirects) return node if redirects: # Attach the error location to the keyword. It would be more precise # to attach it to the p_die("Assignments shouldn't have redirects", token=kw_token) if preparsed_list: # FOO=bar local spam=eggs not allowed # Use the location of the first value. TODO: Use the whole word # before splitting. left_token, _, _, _ = preparsed_list[0] p_die("Assignments shouldn't have environment bindings", token=left_token) # declare str='', declare -a array=() node = _MakeAssignment(self.parse_ctx, kw_token.id, suffix_words) node.spids.append(kw_token.span_id) return node if kind == Kind.ControlFlow: if redirects: p_die("Control flow shouldn't have redirects", token=kw_token) if preparsed_list: # FOO=bar local spam=eggs not allowed # TODO: Change location as above left_token, _, _, _ = preparsed_list[0] p_die("Control flow shouldn't have environment bindings", token=left_token) # Attach the token for errors. (Assignment may not need it.) if len(suffix_words) == 1: arg_word = None elif len(suffix_words) == 2: arg_word = suffix_words[1] else: p_die('Unexpected argument to %r', kw_token.val, word=suffix_words[2]) return command.ControlFlow(kw_token, arg_word) # If any expansions were detected, then parse again. node = self._MaybeExpandAliases(suffix_words, cur_aliases) if node: # NOTE: There are other types of nodes with redirects. Do they matter? if node.tag == command_e.SimpleCommand: node.redirects = redirects _AppendMoreEnv(preparsed_list, node.more_env) return node # TODO check that we don't have env1=x x[1]=y env2=z here. # FOO=bar printenv.py FOO node = _MakeSimpleCommand(preparsed_list, suffix_words, redirects) return node
def _MaybeExpandAliases(self, words, cur_aliases): """Try to expand aliases. Our implementation of alias has two design choices: - Where to insert it in parsing. We do it at the end of ParseSimpleCommand. - What grammar rule to parse the expanded alias buffer with. In our case it's ParseCommand(). This doesn't quite match what other shells do, but I can't figure out a better places. Most test cases pass, except for ones like: alias LBRACE='{' LBRACE echo one; echo two; } alias MULTILINE='echo 1 echo 2 echo 3' MULTILINE NOTE: dash handles aliases in a totally diferrent way. It has a global variable checkkwd in parser.c. It assigns it all over the grammar, like this: checkkwd = CHKNL | CHKKWD | CHKALIAS; The readtoken() function checks (checkkwd & CHKALIAS) and then calls lookupalias(). This seems to provide a consistent behavior among shells, but it's less modular and testable. Bash also uses a global 'parser_state & PST_ALEXPNEXT'. Returns: A command node if any aliases were expanded, or None otherwise. """ # The last char that we might parse. right_spid = word.RightMostSpanForWord(words[-1]) first_word_str = None # for error message expanded = [] i = 0 n = len(words) while i < n: w = words[i] ok, word_str, quoted = word.StaticEval(w) if not ok or quoted: break alias_exp = self.aliases.get(word_str) if alias_exp is None: break # Prevent infinite loops. This is subtle: we want to prevent infinite # expansion of alias echo='echo x'. But we don't want to prevent # expansion of the second word in 'echo echo', so we add 'i' to # "cur_aliases". if (word_str, i) in cur_aliases: break if i == 0: first_word_str = word_str # for error message #log('%r -> %r', word_str, alias_exp) cur_aliases.append((word_str, i)) expanded.append(alias_exp) i += 1 if not alias_exp.endswith(' '): # alias e='echo [ ' is the same expansion as # alias e='echo [' # The trailing space indicates whether we should continue to expand # aliases; it's not part of it. expanded.append(' ') break # No more expansions if not expanded: # No expansions; caller does parsing. return None # We got some expansion. Now copy the rest of the words. # We need each NON-REDIRECT word separately! For example: # $ echo one >out two # dash/mksh/zsh go beyond the first redirect! while i < n: w = words[i] left_spid = word.LeftMostSpanForWord(w) right_spid = word.RightMostSpanForWord(w) # Adapted from tools/osh2oil.py Cursor.PrintUntil for span_id in xrange(left_spid, right_spid + 1): span = self.arena.GetLineSpan(span_id) line = self.arena.GetLine(span.line_id) piece = line[span.col : span.col + span.length] expanded.append(piece) expanded.append(' ') # Put space back between words. i += 1 code_str = ''.join(expanded) lines = code_str.splitlines(True) # Keep newlines line_info = [] # TODO: Add location information self.arena.PushSource( '<expansion of alias %r at line %d of %s>' % (first_word_str, -1, 'TODO')) try: for i, line in enumerate(lines): line_id = self.arena.AddLine(line, i+1) line_info.append((line_id, line, 0)) finally: self.arena.PopSource() line_reader = reader.VirtualLineReader(line_info, self.arena) cp = self.parse_ctx.MakeOshParser(line_reader) try: node = cp.ParseCommand(cur_aliases=cur_aliases) except util.ParseError as e: # Failure to parse alias expansion is a fatal error # We don't need more handling here/ raise if 0: log('AFTER expansion:') from osh import ast_lib ast_lib.PrettyPrint(node) return node
def testPatSub(self): w = _assertReadWord(self, '${var/pat/replace}') op = _GetSuffixOp(self, w) self.assertUnquoted('pat', op.pat) self.assertUnquoted('replace', op.replace) self.assertEqual(Id.Undefined_Tok, op.replace_mode) w = _assertReadWord(self, '${var//pat/replace}') # sub all op = _GetSuffixOp(self, w) self.assertUnquoted('pat', op.pat) self.assertUnquoted('replace', op.replace) self.assertEqual(Id.Lit_Slash, op.replace_mode) w = _assertReadWord(self, '${var/%pat/replace}') # prefix op = _GetSuffixOp(self, w) self.assertUnquoted('pat', op.pat) self.assertUnquoted('replace', op.replace) self.assertEqual(Id.Lit_Percent, op.replace_mode) w = _assertReadWord(self, '${var/#pat/replace}') # suffix op = _GetSuffixOp(self, w) self.assertUnquoted('pat', op.pat) self.assertUnquoted('replace', op.replace) self.assertEqual(Id.Lit_Pound, op.replace_mode) w = _assertReadWord(self, '${var/pat}') # no replacement w = _assertReadWord(self, '${var//pat}') # no replacement op = _GetSuffixOp(self, w) self.assertUnquoted('pat', op.pat) self.assertEqual(None, op.replace) self.assertEqual(Id.Lit_Slash, op.replace_mode) # replace with slash w = _assertReadWord(self, '${var/pat//}') op = _GetSuffixOp(self, w) self.assertUnquoted('pat', op.pat) self.assertUnquoted('/', op.replace) # replace with two slashes unquoted w = _assertReadWord(self, '${var/pat///}') op = _GetSuffixOp(self, w) self.assertUnquoted('pat', op.pat) self.assertUnquoted('//', op.replace) # replace with two slashes quoted w = _assertReadWord(self, '${var/pat/"//"}') op = _GetSuffixOp(self, w) self.assertUnquoted('pat', op.pat) ok, s, quoted = word.StaticEval(op.replace) self.assertTrue(ok) self.assertEqual('//', s) self.assertTrue(quoted) # Real example found in the wild! # http://www.oilshell.org/blog/2016/11/07.html w = _assertReadWord(self, r'${var////\\/}') op = _GetSuffixOp(self, w) self.assertEqual(Id.Lit_Slash, op.replace_mode) self.assertUnquoted('/', op.pat) ok, s, quoted = word.StaticEval(op.replace) self.assertTrue(ok) self.assertEqual(r'\/', s)
def assertUnquoted(self, expected, w): ok, s, quoted = word.StaticEval(w) self.assertTrue(ok) self.assertEqual(expected, s) self.assertFalse(quoted)