def _Visit(self, node): """ """ #log('VISIT %s', node.__class__.__name__) # NOTE: The tags are not unique!!! We would need this: # if isinstance(node, ast.command) and node.tag == command_e.SimpleCommand: # But it's easier to check the __class__ attribute. cls = node.__class__ if cls is ast.SimpleCommand: #log('SimpleCommand %s', node.words) #log('--') #ast_lib.PrettyPrint(node) # Things to consider: # - source and . # - DONE builtins: get a list from builtin.py # - DONE functions: have to enter function definitions into a dictionary # - Commands that call others: sudo, su, find, xargs, etc. # - builtins that call others: exec, command # - except not command -v! if not node.words: return w = node.words[0] ok, argv0, _ = word.StaticEval(w) if not ok: log("Couldn't statically evaluate %r", w) return if (builtin.ResolveSpecial(argv0) == builtin_e.NONE and builtin.Resolve(argv0) == builtin_e.NONE): self.progs_used[argv0] = True # NOTE: If argv1 is $0, then we do NOT print a warning! if argv0 == 'sudo': if len(node.words) < 2: return w1 = node.words[1] ok, argv1, _ = word.StaticEval(w1) if not ok: log("Couldn't statically evaluate %r", w) return # Should we mark them behind 'sudo'? e.g. "sudo apt install"? self.progs_used[argv1] = True elif cls is ast.FuncDef: self.funcs_defined[node.name] = True
def ParseRedirect(self): """ Problem: You don't know which kind of redir_node to instantiate before this? You could stuff them all in one node, and then have a switch() on the type. You need different types. """ if not self._Peek(): return None assert self.c_kind == Kind.Redir, self.cur_word left_spid = self.cur_word.token.span_id # For now only supporting single digit descriptor first_char = self.cur_word.token.val[0] if first_char.isdigit(): fd = int(first_char) else: fd = -1 if self.c_id in (Id.Redir_DLess, Id.Redir_DLessDash): # here doc node = ast.HereDoc() node.op_id = self.c_id node.arg_word = None # not read yet node.fd = fd node.was_filled = False node.spids.append(left_spid) self._Next() if not self._Peek(): return None # "If any character in word is quoted, the delimiter shall be formed by # performing quote removal on word, and the here-document lines shall not # be expanded. Otherwise, the delimiter shall be the word itself." # NOTE: \EOF counts, or even E\OF ok, node.here_end, quoted = word.StaticEval(self.cur_word) if not ok: self._BadWord('Error evaluating here doc delimiter: %s', self.cur_word) return None node.do_expansion = not quoted self._Next() else: node = ast.Redirect() node.op_id = self.c_id node.fd = fd node.spids.append(left_spid) self._Next() if not self._Peek(): return None if self.c_kind != Kind.Word: self.AddErrorContext( 'Expected word after redirect operator', word=self.cur_word) return None new_word = word.TildeDetect(self.cur_word) node.arg_word = new_word or self.cur_word self._Next() return node
def testStaticEvalWord(self): expr = r'\EOF' # Quoted here doc delimiter w_parser = InitWordParser(expr) w = w_parser.ReadWord(LexMode.OUTER) ok, s, quoted = word.StaticEval(w) self.assertEqual(True, ok) self.assertEqual('EOF', s) self.assertEqual(True, quoted)
def _ReadPatSubVarOp(self, lex_mode): """ Match = ('/' | '#' | '%') WORD VarSub = ... | VarOf '/' Match '/' WORD """ do_all = False do_prefix = False do_suffix = False pat = self._ReadVarOpArg(lex_mode, eof_type=Id.Lit_Slash, empty_ok=False) if not pat: return None if len(pat.parts) == 1: ok, s, quoted = word.StaticEval(pat) if ok and s == '/' and not quoted: # Looks like ${a////c}, read again self._Next(lex_mode) self._Peek() p = ast.LiteralPart(self.cur_token) pat.parts.append(p) if len(pat.parts) == 0: self._BadToken("Pattern must not be empty: %r", token=self.cur_token) return None else: first_part = pat.parts[0] if first_part.tag == word_part_e.LiteralPart: lit_id = first_part.token.id if lit_id == Id.Lit_Slash: do_all = True pat.parts.pop(0) elif lit_id == Id.Lit_Pound: do_prefix = True pat.parts.pop(0) elif lit_id == Id.Lit_Percent: do_suffix = True pat.parts.pop(0) #self._Peek() if self.token_type == Id.Right_VarSub: # e.g. ${v/a} is the same as ${v/a/} -- empty replacement string return ast.PatSub(pat, None, do_all, do_prefix, do_suffix) elif self.token_type == Id.Lit_Slash: replace = self._ReadVarOpArg(lex_mode) # do not stop at / if not replace: return None self._Peek() if self.token_type == Id.Right_VarSub: return ast.PatSub(pat, replace, do_all, do_prefix, do_suffix) else: self._BadToken("Expected } after pat sub, got %s", self.cur_token) return None else: self._BadToken("Expected } after pat sub, got %s", self.cur_token) return None
def _ParseForEachLoop(self): node = ast.ForEach() node.do_arg_iter = False ok, iter_name, quoted = word.StaticEval(self.cur_word) if not ok or quoted: self.AddErrorContext( "Invalid for loop variable", word=self.cur_word) return None if not VAR_NAME_RE.match(iter_name): self.AddErrorContext( "Invalid for loop variable name", word=self.cur_word) return None node.iter_name = iter_name self._Next() # skip past name if not self._NewlineOk(): return None in_spid = const.NO_INTEGER semi_spid = const.NO_INTEGER if not self._Peek(): return None if self.c_id == Id.KW_In: self._Next() # skip in in_spid = word.LeftMostSpanForWord(self.cur_word) + 1 x = self.ParseForWords() if x is None: return None iter_words, semi_spid = x words2 = braces.BraceDetectAll(iter_words) words3 = word.TildeDetectAll(words2) if iter_words is None: # empty list of words is OK return None node.iter_words = words3 elif self.c_id == Id.Op_Semi: node.do_arg_iter = True # implicit for loop self._Next() elif self.c_id == Id.KW_Do: node.do_arg_iter = True # implicit for loop # do not advance else: self.AddErrorContext("Unexpected word in for loop: %s", self.cur_word, word=self.cur_word) return None node.spids.extend((in_spid, semi_spid)) body_node = self.ParseDoGroup() if not body_node: return None node.body = body_node return node
def _ReadPatSubVarOp(self, lex_mode): """ Match = ('/' | '#' | '%') WORD VarSub = ... | VarOf '/' Match '/' WORD """ pat = self._ReadVarOpArg(lex_mode, eof_type=Id.Lit_Slash, empty_ok=False) if len(pat.parts) == 1: ok, s, quoted = word.StaticEval(pat) if ok and s == '/' and not quoted: # Looks like ${a////c}, read again self._Next(lex_mode) self._Peek() p = ast.LiteralPart(self.cur_token) pat.parts.append(p) if len(pat.parts) == 0: p_die('Pattern in ${x/pat/replace} must not be empty', token=self.cur_token) replace_mode = Id.Undefined_Tok # Check for / # % modifier on pattern. first_part = pat.parts[0] if first_part.tag == word_part_e.LiteralPart: lit_id = first_part.token.id if lit_id in (Id.Lit_Slash, Id.Lit_Pound, Id.Lit_Percent): pat.parts.pop(0) replace_mode = lit_id # NOTE: If there is a modifier, the pattern can be empty, e.g. # ${s/#/foo} and ${a/%/foo}. if self.token_type == Id.Right_VarSub: # e.g. ${v/a} is the same as ${v/a/} -- empty replacement string return ast.PatSub(pat, None, replace_mode) if self.token_type == Id.Lit_Slash: replace = self._ReadVarOpArg(lex_mode) # do not stop at / self._Peek() if self.token_type != Id.Right_VarSub: # NOTE: I think this never happens. # We're either in the VS_ARG_UNQ or VS_ARG_DQ lex state, and everything # there is Lit_ or Left_, except for }. p_die("Expected } after replacement string, got %s", self.cur_token, token=self.cur_token) return ast.PatSub(pat, replace, replace_mode) # Happens with ${x//} and ${x///foo}, see test/parse-errors.sh p_die("Expected } after pat sub, got %r", self.cur_token.val, token=self.cur_token)
def testGitComment(self): # ;# is a comment! Gah. # Conclusion: Comments are NOT LEXICAL. They are part of word parsing. node = assert_ParseCommandList( self, """\ . "$TEST_DIRECTORY"/diff-lib.sh ;# test-lib chdir's into trash """) self.assertEqual(command_e.Sentence, node.tag) self.assertEqual(2, len(node.child.words)) # This is NOT a comment node = assert_ParseCommandList(self, """\ echo foo#bar """) self.assertEqual(command_e.SimpleCommand, node.tag) self.assertEqual(2, len(node.words)) _, s, _ = word.StaticEval(node.words[1]) self.assertEqual('foo#bar', s) # This is a comment node = assert_ParseCommandList(self, """\ echo foo #comment """) self.assertEqual(command_e.SimpleCommand, node.tag) self.assertEqual(2, len(node.words)) _, s, _ = word.StaticEval(node.words[1]) self.assertEqual('foo', s) # Empty comment node = assert_ParseCommandList(self, """\ echo foo # """) self.assertEqual(command_e.SimpleCommand, node.tag) self.assertEqual(2, len(node.words)) _, s, _ = word.StaticEval(node.words[1]) self.assertEqual('foo', s)
def _ParseForEachLoop(self): node = ast.ForEach() node.do_arg_iter = False ok, value, quoted = word.StaticEval(self.cur_word) if not ok or quoted: self.AddErrorContext( "Invalid for loop variable: %s", self.cur_word, word=self.cur_word) return None node.iter_name = value self._Next() # skip past name if not self._NewlineOk(): return None in_spid = -1 semi_spid = -1 if not self._Peek(): return None if self.c_id == Id.KW_In: self._Next() # skip in in_spid = word.LeftMostSpanForWord(self.cur_word) + 1 iter_words, semi_spid = self.ParseForWords() if iter_words is None: # empty list of words is OK return None node.iter_words = iter_words elif self.c_id == Id.Op_Semi: node.do_arg_iter = True # implicit for loop self._Next() elif self.c_id == Id.KW_Do: node.do_arg_iter = True # implicit for loop # do not advance else: self.AddErrorContext("Unexpected word in for loop: %s", self.cur_word, word=self.cur_word) return None node.spids.extend((in_spid, semi_spid)) body_node = self.ParseDoGroup() if not body_node: return None node.body = body_node return node
def _MakeAssignment(self, assign_kw, suffix_words): bindings = [] for i, w in enumerate(suffix_words): if i == 0: continue # skip over local, export, etc. left_spid = word.LeftMostSpanForWord(w) kv = word.LooksLikeAssignment(w) if kv: k, v = kv t = word.TildeDetect(v) if t: # t is an unevaluated word with TildeSubPart pair = (k, t, left_spid) else: pair = (k, v, left_spid) # v is unevaluated word else: # In aboriginal in variables/sources: export_if_blank does export "$1". # We should allow that. ok, value, quoted = word.StaticEval(w) if not ok or quoted: self.AddErrorContext( 'Variable names must be constant strings, got %s', w, word=w) return None pair = (value, None, left_spid) # No value is equivalent to '' bindings.append(pair) pairs = [] for lhs, rhs, spid in bindings: p = ast.assign_pair(ast.LeftVar(lhs), rhs) p.spids.append(spid) pairs.append(p) node = ast.Assignment(assign_kw, pairs) return node
def ParseFactor(self): """ Factor : WORD | UNARY_OP WORD | WORD BINARY_OP WORD | '(' Expr ')' """ #print('ParseFactor %s %s' % (self.b_kind, IdName(self.op_id))) if self.b_kind == Kind.BoolUnary: # Just save the type and not the token itself? op = self.op_id if not self._Next(): return None w = self.cur_word if not self._Next(): return None node = ast.BoolUnary(op, w) return node if self.b_kind == Kind.Word: # Peek ahead another token. t2 = self._LookAhead() t2_op_id = word.BoolId(t2) t2_b_kind = LookupKind(t2_op_id) # Redir PUN for < and > if t2_b_kind in (Kind.BoolBinary, Kind.Redir): left = self.cur_word if not self._Next(): return None op = self.op_id # TODO: Need to change to LexMode.BASH_REGEX. # _Next(lex_mode) then? is_regex = t2_op_id == Id.BoolBinary_EqualTilde if is_regex: if not self._Next(lex_mode=LexMode.BASH_REGEX): return None else: if not self._Next(): return None right = self.cur_word if is_regex: ok, regex_str, unused_quoted = word.StaticEval(right) # doesn't contain $foo, etc. if ok and not libc.regex_parse(regex_str): self.AddErrorContext("Invalid regex: %r" % regex_str, word=right) return None if not self._Next(): return None return ast.BoolBinary(op, left, right) else: # [[ foo ]] w = self.cur_word if not self._Next(): return None return ast.WordTest(w) if self.op_id == Id.Op_LParen: if not self._Next(): return None node = self.ParseExpr() if self.op_id != Id.Op_RParen: raise RuntimeError("Expected ), got %s", self.cur_word) if not self._Next(): return None return node # TODO: A proper error, e.g. for "&&" raise AssertionError("Unexpected token: %s" % self.cur_word)
def ParseSimpleCommand(self): """ Fixed transcription of the POSIX grammar (TODO: port to grammar/Shell.g) io_file : '<' filename | LESSAND filename ... io_here : DLESS here_end | DLESSDASH here_end redirect : IO_NUMBER (io_redirect | io_here) prefix_part : ASSIGNMENT_WORD | redirect cmd_part : WORD | redirect assign_kw : Declare | Export | Local | Readonly # Without any words it is parsed as a command, not an assigment assign_listing : assign_kw # Now we have something to do (might be changing assignment flags too) # NOTE: any prefixes should be a warning, but they are allowed in shell. assignment : prefix_part* assign_kw (WORD | ASSIGNMENT_WORD)+ # an external command, a function call, or a builtin -- a "word_command" word_command : prefix_part* cmd_part+ simple_command : assign_listing | assignment | proc_command Simple imperative algorithm: 1) Read a list of words and redirects. Append them to separate lists. 2) Look for the first non-assignment word. If it's declare, etc., then keep parsing words AND assign words. Otherwise, just parse words. 3) If there are no non-assignment words, then it's a global assignment. { redirects, global assignments } OR { redirects, prefix_bindings, words } OR { redirects, ERROR_prefix_bindings, keyword, assignments, words } THEN CHECK that prefix bindings don't have any array literal parts! global assignment and keyword assignments can have the of course. well actually EXPORT shouldn't have them either -- WARNING 3 cases we want to warn: prefix_bindings for assignment, and array literal in prefix bindings, or export A command can be an assignment word, word, or redirect on its own. ls >out.txt >out.txt FOO=bar # this touches the file, and hten Or any sequence: ls foo bar <in.txt ls foo bar >out.txt <in.txt ls >out.txt foo bar Or add one or more environment bindings: VAR=val env >out.txt VAR=val env here_end vs filename is a matter of whether we test that it's quoted. e.g. <<EOF vs <<'EOF'. """ result = self._ScanSimpleCommand() if not result: return None redirects, words = result if not words: # e.g. >out.txt # redirect without words node = ast.SimpleCommand() node.redirects = redirects return node prefix_bindings, suffix_words = self._SplitSimpleCommandPrefix(words) if not suffix_words: # ONE=1 TWO=2 (with no other words) if redirects: binding1 = prefix_bindings[0] _, _, _, spid = binding1 self.AddErrorContext('Got redirects in global assignment', span_id=spid) return None pairs = [] for lhs, op, rhs, spid in prefix_bindings: p = ast.assign_pair(ast.LhsName(lhs), op, rhs) p.spids.append(spid) pairs.append(p) node = ast.Assignment(Id.Assign_None, [], pairs) left_spid = word.LeftMostSpanForWord(words[0]) node.spids.append(left_spid) # no keyword spid to skip past return node kind, kw_token = word.KeywordToken(suffix_words[0]) if kind == Kind.Assign: # Here we StaticEval suffix_words[1] to see if it's a command like # 'typeset -p'. Then it becomes a SimpleCommand node instead of an # Assignment. Note we're not handling duplicate flags like 'typeset # -pf'. I see this in bashdb (bash debugger) but it can just be changed # to 'typeset -p -f'. is_command = False if len(suffix_words) > 1: ok, val, _ = word.StaticEval(suffix_words[1]) if ok and (kw_token.id, val) in self._ASSIGN_COMMANDS: is_command = True if is_command: # declare -f, declare -p, typeset -p, etc. node = self._MakeSimpleCommand(prefix_bindings, suffix_words, redirects) return node else: # declare str='', declare -a array=() if redirects: # Attach the error location to the keyword. It would be more precise # to attach it to the self.AddErrorContext('Got redirects in assignment', token=kw_token) return None if prefix_bindings: # FOO=bar local spam=eggs not allowed # Use the location of the first value. TODO: Use the whole word before # splitting. _, _, v0, _ = prefix_bindings[0] self.AddErrorContext( 'Invalid prefix bindings in assignment: %s', prefix_bindings, word=v0) return None node = self._MakeAssignment(kw_token.id, suffix_words) if not node: return None node.spids.append(kw_token.span_id) return node elif kind == Kind.ControlFlow: if redirects: self.AddErrorContext('Got redirects in control flow: %s', redirects) return None if prefix_bindings: # FOO=bar local spam=eggs not allowed # Use the location of the first value. TODO: Use the whole word before # splitting. _, _, v0, _ = prefix_bindings[0] self.AddErrorContext( 'Invalid prefix bindings in control flow: %s', prefix_bindings, word=v0) return None # Attach the token for errors. (Assignment may not need it.) if len(suffix_words) == 1: arg_word = None elif len(suffix_words) == 2: arg_word = suffix_words[1] else: self.AddErrorContext('Too many arguments') return None return ast.ControlFlow(kw_token, arg_word) else: node = self._MakeSimpleCommand(prefix_bindings, suffix_words, redirects) return node
def _MakeAssignment(self, assign_kw, suffix_words): # First parse flags, e.g. -r -x -a -A. None of the flags have arguments. flags = [] n = len(suffix_words) i = 1 while i < n: w = suffix_words[i] ok, static_val, quoted = word.StaticEval(w) if not ok or quoted: break # can't statically evaluate if static_val.startswith('-'): flags.append(static_val) else: break # not a flag, rest are args i += 1 # Now parse bindings or variable names assignments = [] while i < n: w = suffix_words[i] left_spid = word.LeftMostSpanForWord(w) kov = word.LooksLikeAssignment(w) if kov: k, op, v = kov t = word.TildeDetect(v) if t: # t is an unevaluated word with TildeSubPart a = (k, op, t, left_spid) else: a = (k, op, v, left_spid) # v is unevaluated word else: # In aboriginal in variables/sources: export_if_blank does export "$1". # We should allow that. # Parse this differently then? # dynamic-export? # It sets global variables. ok, static_val, quoted = word.StaticEval(w) if not ok or quoted: self.AddErrorContext( 'Variable names must be constant strings, got %s', w, word=w) return None # No value is equivalent to '' m = VAR_NAME_RE.match(static_val) if not m: self.AddErrorContext('Invalid variable name %r', static_val, word=w) return None a = (static_val, assign_op_e.Equal, None, left_spid) assignments.append(a) i += 1 # TODO: Also make with LhsIndexedName pairs = [] for lhs, op, rhs, spid in assignments: p = ast.assign_pair(ast.LhsName(lhs), op, rhs) p.spids.append(spid) pairs.append(p) node = ast.Assignment(assign_kw, flags, pairs) return node
def testPatSub(self): w = _assertReadWord(self, '${var/pat/replace}') op = _GetSuffixOp(self, w) self.assertFalse(op.do_all) self.assertFalse(op.do_prefix) self.assertFalse(op.do_suffix) self.assertUnquoted('pat', op.pat) self.assertUnquoted('replace', op.replace) w = _assertReadWord(self, '${var//pat/replace}') # sub all op = _GetSuffixOp(self, w) self.assertTrue(op.do_all) self.assertUnquoted('pat', op.pat) self.assertUnquoted('replace', op.replace) w = _assertReadWord(self, '${var/%pat/replace}') # prefix op = _GetSuffixOp(self, w) self.assertTrue(op.do_prefix) self.assertUnquoted('pat', op.pat) self.assertUnquoted('replace', op.replace) w = _assertReadWord(self, '${var/#pat/replace}') # suffix op = _GetSuffixOp(self, w) self.assertTrue(op.do_suffix) self.assertUnquoted('pat', op.pat) self.assertUnquoted('replace', op.replace) w = _assertReadWord(self, '${var/pat}') # no replacement w = _assertReadWord(self, '${var//pat}') # no replacement op = _GetSuffixOp(self, w) self.assertTrue(op.do_all) self.assertUnquoted('pat', op.pat) self.assertEqual(None, op.replace) # replace with slash w = _assertReadWord(self, '${var/pat//}') op = _GetSuffixOp(self, w) self.assertUnquoted('pat', op.pat) self.assertUnquoted('/', op.replace) # replace with two slashes unquoted w = _assertReadWord(self, '${var/pat///}') op = _GetSuffixOp(self, w) self.assertUnquoted('pat', op.pat) self.assertUnquoted('//', op.replace) # replace with two slashes quoted w = _assertReadWord(self, '${var/pat/"//"}') op = _GetSuffixOp(self, w) self.assertUnquoted('pat', op.pat) ok, s, quoted = word.StaticEval(op.replace) self.assertTrue(ok) self.assertEqual('//', s) self.assertTrue(quoted) # Real example found in the wild! # http://www.oilshell.org/blog/2016/11/07.html w = _assertReadWord(self, r'${var////\\/}') op = _GetSuffixOp(self, w) self.assertTrue(op.do_all) self.assertUnquoted('/', op.pat) ok, s, quoted = word.StaticEval(op.replace) self.assertTrue(ok) self.assertEqual(r'\/', s)
def assertUnquoted(self, expected, w): ok, s, quoted = word.StaticEval(w) self.assertTrue(ok) self.assertEqual(expected, s) self.assertFalse(quoted)
def ParseFactor(self): """ Factor : WORD | UNARY_OP WORD | WORD BINARY_OP WORD | '(' Expr ')' """ if self.b_kind == Kind.BoolUnary: # Just save the type and not the token itself? op = self.op_id if not self._Next(): return None w = self.cur_word if not self._Next(): return None node = ast.BoolUnary(op, w) return node if self.b_kind == Kind.Word: # Peek ahead another token. t2 = self._LookAhead() t2_op_id = word.BoolId(t2) t2_b_kind = LookupKind(t2_op_id) #log('t2 %s / t2_op_id %s / t2_b_kind %s', t2, t2_op_id, t2_b_kind) # Redir pun for < and >, -a and -o pun if t2_b_kind in (Kind.BoolBinary, Kind.Redir): left = self.cur_word if not self._Next(): return None op = self.op_id # TODO: Need to change to lex_mode_e.BASH_REGEX. # _Next(lex_mode) then? is_regex = t2_op_id == Id.BoolBinary_EqualTilde if is_regex: if not self._Next(lex_mode=lex_mode_e.BASH_REGEX): return None else: if not self._Next(): return None right = self.cur_word if is_regex: # TODO: Quoted parts need to be regex-escaped, e.g. [[ $a =~ "{" ]]. # I don't think libc has a function to do this. Escape these # characters: # https://www.gnu.org/software/sed/manual/html_node/ERE-syntax.html0 ok, regex_str, unused_quoted = word.StaticEval(right) # doesn't contain $foo, etc. if ok and not libc.regex_parse(regex_str): self.AddErrorContext("Invalid regex: %r" % regex_str, word=right) return None if not self._Next(): return None return ast.BoolBinary(op, left, right) else: # [[ foo ]] w = self.cur_word if not self._Next(): return None return ast.WordTest(w) if self.op_id == Id.Op_LParen: if not self._Next(): return None node = self.ParseExpr() if self.op_id != Id.Op_RParen: self.AddErrorContext('Expected ), got %s', self.cur_word, word=self.cur_word) return None if not self._Next(): return None return node # TODO: A proper error, e.g. for [[ && ]] or [[ ]] self.AddErrorContext('Unexpected token: %s' % self.cur_word, word=self.cur_word) return None
def DoCommand(self, node, local_symbols, at_top_level=False): if node.tag == command_e.CommandList: # TODO: How to distinguish between echo hi; echo bye; and on separate # lines for child in node.children: self.DoCommand(child, local_symbols, at_top_level=at_top_level) elif node.tag == command_e.SimpleCommand: # How to preserve spaces between words? Do you want to do it? # Well you need to test this: # # echo foo \ # bar # TODO: Need to print until the left most part of the phrase? the phrase # is a word, binding, redirect. #self.cursor.PrintUntil() if node.more_env: (left_spid,) = node.more_env[0].spids self.cursor.PrintUntil(left_spid) self.f.write('env ') # We only need to transform the right side, not left side. for pair in node.more_env: self.DoWordInCommand(pair.val, local_symbols) # More translations: # - . to source # - eval to sh-eval if node.words: first_word = node.words[0] ok, val, quoted = word.StaticEval(first_word) word0_spid = word.LeftMostSpanForWord(first_word) if ok and not quoted: if val == '[': last_word = node.words[-1] # Check if last word is ] ok, val, quoted = word.StaticEval(last_word) if ok and not quoted and val == ']': # Replace [ with 'test' self.cursor.PrintUntil(word0_spid) self.cursor.SkipUntil(word0_spid + 1) self.f.write('test') for w in node.words[1:-1]: self.DoWordInCommand(w, local_symbols) # Now omit ] last_spid = word.LeftMostSpanForWord(last_word) self.cursor.PrintUntil(last_spid - 1) # Get the space before self.cursor.SkipUntil(last_spid + 1) # ] takes one spid return else: raise RuntimeError('Got [ without ]') elif val == '.': self.cursor.PrintUntil(word0_spid) self.cursor.SkipUntil(word0_spid + 1) self.f.write('source') return for w in node.words: self.DoWordInCommand(w, local_symbols) # NOTE: This will change to "phrase"? Word or redirect. for r in node.redirects: self.DoRedirect(r, local_symbols) # TODO: Print the terminator. Could be \n or ; # Need to print env like PYTHONPATH = 'foo' && ls # Need to print redirects: # < > are the same. << is here string, and >> is assignment. # append is >+ # TODO: static_eval of simple command # - [ -> "test". Eliminate trailing ]. # - . -> source, etc. elif node.tag == command_e.Assignment: self.DoAssignment(node, at_top_level, local_symbols) elif node.tag == command_e.Pipeline: # Obscure: |& turns into |- or |+ for stderr. # TODO: # if ! true; then -> if not true { # if ! echo | grep; then -> if not { echo | grep } { # } # not is like do {}, but it negates the return value I guess. for child in node.children: self.DoCommand(child, local_symbols) elif node.tag == command_e.AndOr: for child in node.children: self.DoCommand(child, local_symbols) elif node.tag == command_e.Sentence: # 'ls &' to 'fork ls' # Keep ; the same. self.DoCommand(node.child, local_symbols) # This has to be different in the function case. elif node.tag == command_e.BraceGroup: # { echo hi; } -> do { echo hi } # For now it might be OK to keep 'do { echo hi; } #left_spid, right_spid = node.spids (left_spid,) = node.spids self.cursor.PrintUntil(left_spid) self.cursor.SkipUntil(left_spid + 1) self.f.write('do {') for child in node.children: self.DoCommand(child, local_symbols) elif node.tag == command_e.Subshell: # (echo hi) -> shell echo hi # (echo hi; echo bye) -> shell {echo hi; echo bye} (left_spid, right_spid) = node.spids self.cursor.PrintUntil(left_spid) self.cursor.SkipUntil(left_spid + 1) self.f.write('shell {') self.DoCommand(node.child, local_symbols) #self._DebugSpid(right_spid) #self._DebugSpid(right_spid + 1) #print('RIGHT SPID', right_spid) self.cursor.PrintUntil(right_spid) self.cursor.SkipUntil(right_spid + 1) self.f.write('}') elif node.tag == command_e.DParen: # (( a == 0 )) is sh-expr ' a == 0 ' # # NOTE: (( n++ )) is auto-translated to sh-expr 'n++', but could be set # n++. left_spid, right_spid = node.spids self.cursor.PrintUntil(left_spid) self.cursor.SkipUntil(left_spid + 1) self.f.write("sh-expr '") self.cursor.PrintUntil(right_spid - 1) # before )) self.cursor.SkipUntil(right_spid + 1) # after )) -- each one is a token self.f.write("'") elif node.tag == command_e.DBracket: # [[ 1 -eq 2 ]] to (1 == 2) self.DoBoolExpr(node.expr) elif node.tag == command_e.FuncDef: # TODO: skip name #self.f.write('proc %s' % node.name) # New symbol table for every function. new_local_symbols = {} # Should be the left most span, including 'function' self.cursor.PrintUntil(node.spids[0]) self.f.write('proc ') self.f.write(node.name) self.cursor.SkipUntil(node.spids[1]) if node.body.tag == command_e.BraceGroup: # Don't add "do" like a standalone brace group. Just use {}. for child in node.body.children: self.DoCommand(child, new_local_symbols) else: pass # Add {}. # proc foo { # shell {echo hi; echo bye} # } #self.DoCommand(node.body) elif node.tag == command_e.BraceGroup: for child in node.children: self.DoCommand(child, local_symbols) elif node.tag == command_e.DoGroup: do_spid, done_spid = node.spids self.cursor.PrintUntil(do_spid) self.cursor.SkipUntil(do_spid + 1) self.f.write('{') for child in node.children: self.DoCommand(child, local_symbols) self.cursor.PrintUntil(done_spid) self.cursor.SkipUntil(done_spid + 1) self.f.write('}') elif node.tag == command_e.ForEach: # Need to preserve spaces between words, because there can be line # wrapping. # for x in a b c \ # d e f; do in_spid, semi_spid = node.spids if in_spid == const.NO_INTEGER: #self.cursor.PrintUntil() # 'for x' and then space self.f.write('for %s in @Argv ' % node.iter_name) self.cursor.SkipUntil(node.body.spids[0]) else: self.cursor.PrintUntil(in_spid + 1) # 'for x in' and then space self.f.write('[') for w in node.iter_words: self.DoWordInCommand(w, local_symbols) self.f.write(']') #print("SKIPPING SEMI %d" % semi_spid, file=sys.stderr) if semi_spid != const.NO_INTEGER: self.cursor.PrintUntil(semi_spid) self.cursor.SkipUntil(semi_spid + 1) self.DoCommand(node.body, local_symbols) elif node.tag == command_e.ForExpr: # Change (( )) to ( ), and then _FixDoGroup pass elif node.tag == command_e.WhileUntil: # Skip 'until', and replace it with 'while not' if node.keyword.id == Id.KW_Until: kw_spid = node.keyword.span_id self.cursor.PrintUntil(kw_spid) self.f.write('while not') self.cursor.SkipUntil(kw_spid + 1) cond = node.cond # Skip the semi-colon in the condition, which is ususally a Sentence if len(cond) == 1 and cond[0].tag == command_e.Sentence: self.DoCommand(cond[0].child, local_symbols) semi_spid = cond[0].terminator.span_id self.cursor.SkipUntil(semi_spid + 1) self.DoCommand(node.body, local_symbols) elif node.tag == command_e.If: else_spid, fi_spid = node.spids # if foo; then -> if foo { # elif foo; then -> } elif foo { for arm in node.arms: elif_spid, then_spid = arm.spids if elif_spid != const.NO_INTEGER: self.cursor.PrintUntil(elif_spid) self.f.write('} ') cond = arm.cond if len(cond) == 1 and cond[0].tag == command_e.Sentence: sentence = cond[0] self.DoCommand(sentence, local_symbols) # Remove semi-colon semi_spid = sentence.terminator.span_id self.cursor.PrintUntil(semi_spid) self.cursor.SkipUntil(semi_spid + 1) else: for child in arm.cond: self.DoCommand(child, local_symbols) self.cursor.PrintUntil(then_spid) self.cursor.SkipUntil(then_spid + 1) self.f.write('{') for child in arm.action: self.DoCommand(child, local_symbols) # else -> } else { if node.else_action: self.cursor.PrintUntil(else_spid) self.f.write('} ') self.cursor.PrintUntil(else_spid + 1) self.f.write(' {') for child in node.else_action: self.DoCommand(child, local_symbols) # fi -> } self.cursor.PrintUntil(fi_spid) self.cursor.SkipUntil(fi_spid + 1) self.f.write('}') elif node.tag == command_e.Case: case_spid, in_spid, esac_spid = node.spids self.cursor.PrintUntil(case_spid) self.cursor.SkipUntil(case_spid + 1) self.f.write('match') # Reformat "$1" to $1 self.DoWordInCommand(node.to_match, local_symbols) self.cursor.PrintUntil(in_spid) self.cursor.SkipUntil(in_spid + 1) self.f.write('{') # matchstr $var { # each arm needs the ) and the ;; node to skip over? for arm in node.arms: left_spid, rparen_spid, dsemi_spid, last_spid = arm.spids #print(left_spid, rparen_spid, dsemi_spid) self.cursor.PrintUntil(left_spid) # Hm maybe keep | because it's semi-deprecated? You acn use # reload|force-relaod { # } # e/reload|force-reload/ { # } # / 'reload' or 'force-reload' / { # } # # Yeah it's the more abbreviated syntax. # change | to 'or' for pat in arm.pat_list: pass self.f.write('with ') # Remove the ) self.cursor.PrintUntil(rparen_spid) self.cursor.SkipUntil(rparen_spid + 1) for child in arm.action: self.DoCommand(child, local_symbols) if dsemi_spid != const.NO_INTEGER: # Remove ;; self.cursor.PrintUntil(dsemi_spid) self.cursor.SkipUntil(dsemi_spid + 1) elif last_spid != const.NO_INTEGER: self.cursor.PrintUntil(last_spid) else: raise AssertionError( "Expected with dsemi_spid or last_spid in case arm") self.cursor.PrintUntil(esac_spid) self.cursor.SkipUntil(esac_spid + 1) self.f.write('}') # strmatch $var { elif node.tag == command_e.NoOp: pass elif node.tag == command_e.ControlFlow: # No change for break / return / continue pass elif node.tag == command_e.TimeBlock: self.DoCommand(node.pipeline, local_symbols) else: #log('Command not handled: %s', node) raise AssertionError(node.__class__.__name__)
def DoRedirect(self, node, local_symbols): #print(node, file=sys.stderr) op_spid = node.op.span_id op_id = node.op.id self.cursor.PrintUntil(op_spid) # TODO: # - Do < and <& the same way. # - How to handle here docs and here docs? # - >> becomes >+ or >-, or maybe >>> if node.tag == redir_e.Redir: if node.fd == const.NO_INTEGER: if op_id == Id.Redir_Great: self.f.write('>') # Allow us to replace the operator self.cursor.SkipUntil(op_spid + 1) elif op_id == Id.Redir_GreatAnd: self.f.write('> !') # Replace >& 2 with > !2 spid = word.LeftMostSpanForWord(node.arg_word) self.cursor.SkipUntil(spid) #self.DoWordInCommand(node.arg_word) else: # NOTE: Spacing like !2>err.txt vs !2 > err.txt can be done in the # formatter. self.f.write('!%d ' % node.fd) if op_id == Id.Redir_Great: self.f.write('>') self.cursor.SkipUntil(op_spid + 1) elif op_id == Id.Redir_GreatAnd: self.f.write('> !') # Replace 1>& 2 with !1 > !2 spid = word.LeftMostSpanForWord(node.arg_word) self.cursor.SkipUntil(spid) self.DoWordInCommand(node.arg_word, local_symbols) elif node.tag == redir_e.HereDoc: ok, delimiter, delim_quoted = word.StaticEval(node.here_begin) if not ok: p_die('Invalid here doc delimiter', word=node.here_begin) # Turn everything into <<. We just change the quotes self.f.write('<<') #here_begin_spid2 = word.RightMostSpanForWord(node.here_begin) if delim_quoted: self.f.write(" '''") else: self.f.write(' """') delim_end_spid = word.RightMostSpanForWord(node.here_begin) self.cursor.SkipUntil(delim_end_spid + 1) #self.cursor.SkipUntil(here_begin_spid + 1) # Now print the lines. TODO: Have a flag to indent these to the level of # the owning command, e.g. # cat <<EOF # EOF # Or since most here docs are the top level, you could just have a hack # for a fixed indent? TODO: Look at real use cases. for part in node.stdin_parts: self.DoWordPart(part, local_symbols) self.cursor.SkipUntil(node.here_end_span_id + 1) if delim_quoted: self.f.write("'''\n") else: self.f.write('"""\n') # Need #self.cursor.SkipUntil(here_end_spid2) else: raise AssertionError(node.__class__.__name__) # <<< 'here word' # << 'here word' # # 2> out.txt # !2 > out.txt # cat 1<< EOF # hello $name # EOF # cat !1 << """ # hello $name # """ # # cat << 'EOF' # no expansion # EOF # cat <<- 'EOF' # no expansion and indented # # cat << ''' # no expansion # ''' # cat << ''' # no expansion and indented # ''' # Warn about multiple here docs on a line. # As an obscure feature, allow # cat << \'ONE' << \"TWO" # 123 # ONE # 234 # TWO # The _ is an indicator that it's not a string to be piped in. pass