def testShellFuncExecution(self): ex = cmd_exec_test.InitExecutor() func_node = ast.FuncDef() c1 = ast.CompoundWord() t1 = ast.token(Id.Lit_Chars, 'f1') c1.parts.append(ast.LiteralPart(t1)) c2 = ast.CompoundWord() t2 = ast.token(Id.Lit_Chars, 'f2') c2.parts.append(ast.LiteralPart(t2)) a = ast.ArrayLiteralPart() a.words = [c1, c2] w = ast.CompoundWord() w.parts.append(a) # Set global COMPREPLY=(f1 f2) pair = ast.assign_pair(ast.LhsName('COMPREPLY'), assign_op_e.Equal, w) pair.spids.append(0) # dummy pairs = [pair] body_node = ast.Assignment(Id.Assign_None, [], pairs) func_node.name = 'myfunc' func_node.body = body_node a = completion.ShellFuncAction(ex, func_node) matches = list(a.Matches([], 0, 'f')) self.assertEqual(['f1 ', 'f2 '], matches)
def testPipeline2(self): Banner('ls | cut -d . -f 1 | head') p = process.Pipeline() p.Add(_ExtProc(['ls'])) p.Add(_ExtProc(['cut', '-d', '.', '-f', '1'])) p.Add(_ExtProc(['head'])) print(p.Run(_WAITER)) ex = InitExecutor() # Simulating subshell for each command w1 = ast.CompoundWord() w1.parts.append(ast.LiteralPart(ast.token(Id.Lit_Chars, 'ls'))) node1 = ast.SimpleCommand() node1.words = [w1] w2 = ast.CompoundWord() w2.parts.append(ast.LiteralPart(ast.token(Id.Lit_Chars, 'head'))) node2 = ast.SimpleCommand() node2.words = [w2] w3 = ast.CompoundWord() w3.parts.append(ast.LiteralPart(ast.token(Id.Lit_Chars, 'sort'))) w4 = ast.CompoundWord() w4.parts.append(ast.LiteralPart(ast.token(Id.Lit_Chars, '--reverse'))) node3 = ast.SimpleCommand() node3.words = [w3, w4] p = process.Pipeline() p.Add(Process(process.SubProgramThunk(ex, node1))) p.Add(Process(process.SubProgramThunk(ex, node2))) p.Add(Process(process.SubProgramThunk(ex, node3))) print(p.Run(_WAITER))
def testVarOps(self): ev = InitEvaluator() # initializes x=xxx and y=yyy unset_sub = ast.BracedVarSub(ast.token(Id.VSub_Name, 'unset')) part_vals = [] ev._EvalWordPart(unset_sub, part_vals) print(part_vals) set_sub = ast.BracedVarSub(ast.token(Id.VSub_Name, 'x')) part_vals = [] ev._EvalWordPart(set_sub, part_vals) print(part_vals) # Now add some ops part = ast.LiteralPart(ast.token(Id.Lit_Chars, 'default')) arg_word = ast.CompoundWord([part]) test_op = ast.StringUnary(Id.VTest_ColonHyphen, arg_word) unset_sub.suffix_op = test_op set_sub.suffix_op = test_op part_vals = [] ev._EvalWordPart(unset_sub, part_vals) print(part_vals) part_vals = [] ev._EvalWordPart(set_sub, part_vals) print(part_vals)
def _MaybeReadHereDocs(self): for h in self.pending_here_docs: lines = [] #log('HERE %r' % h.here_end) while True: # If op is <<-, strip off all leading tabs (NOT spaces). # (in C++, just bump the start?) line_id, line = self.line_reader.GetLine() #print("LINE %r %r" % (line, h.here_end)) if not line: # EOF # An unterminated here doc is just a warning in bash. We make it # fatal because we want to be strict, and because it causes problems # reporting other errors. # Attribute it to the << in <<EOF for now. self.AddErrorContext('Unterminated here doc', span_id=h.spids[0]) return False # NOTE: Could do this runtime to preserve LST. if h.op_id == Id.Redir_DLessDash: line = line.lstrip('\t') if line.rstrip() == h.here_end: break lines.append((line_id, line)) parts = [] if h.do_expansion: # NOTE: We read all lines at once, instead of doing it line-by-line, # because of cases like this: # cat <<EOF # 1 $(echo 2 # echo 3) 4 # EOF from osh import parse_lib # Avoid circular import w_parser = parse_lib.MakeWordParserForHereDoc(lines, self.arena) word = w_parser.ReadHereDocBody() if not word: self.AddErrorContext( 'Error reading here doc body: %s', w_parser.Error()) return False h.body = word h.was_filled = True else: # Each line is a single span. TODO: Add span_id to token. tokens = [ ast.token(Id.Lit_Chars, line, const.NO_INTEGER) for _, line in lines] parts = [ast.LiteralPart(t) for t in tokens] h.body = ast.CompoundWord(parts) h.was_filled = True # No .clear() until Python 3.3. del self.pending_here_docs[:] return True
def _ReadPatSubVarOp(self, lex_mode): """ Match = ('/' | '#' | '%') WORD VarSub = ... | VarOf '/' Match '/' WORD """ do_all = False do_prefix = False do_suffix = False pat = self._ReadVarOpArg(lex_mode, eof_type=Id.Lit_Slash, empty_ok=False) if not pat: return None if len(pat.parts) == 1: ok, s, quoted = word.StaticEval(pat) if ok and s == '/' and not quoted: # Looks like ${a////c}, read again self._Next(lex_mode) self._Peek() p = ast.LiteralPart(self.cur_token) pat.parts.append(p) # Check for other modifiers first_part = pat.parts[0] if first_part.tag == word_part_e.LiteralPart: lit_id = first_part.token.id if lit_id == Id.Lit_Slash: do_all = True pat.parts.pop(0) elif lit_id == Id.Lit_Percent: do_prefix = True pat.parts.pop(0) elif lit_id == Id.Lit_Pound: do_suffix = True pat.parts.pop(0) #self._Peek() if self.token_type == Id.Right_VarSub: return ast.PatSub(pat, None, do_all, do_prefix, do_suffix) elif self.token_type == Id.Lit_Slash: replace = self._ReadVarOpArg(lex_mode) # do not stop at / if not replace: return None self._Peek() if self.token_type == Id.Right_VarSub: return ast.PatSub(pat, replace, do_all, do_prefix, do_suffix) else: self._BadToken("Expected } after pat sub, got %s", self.cur_token) return None else: self._BadToken("Expected } after pat sub, got %s", self.cur_token) return None
def _MaybeReadHereDocs(self, node): here_docs = _GetHereDocsToFill(node) #print('') #print('--> FILLING', here_docs) #print('') for h in here_docs: lines = [] #print(h.here_end) while True: # If op is <<-, strip off all leading tabs (NOT spaces). # (in C++, just bump the start?) line_id, line = self.line_reader.GetLine() #print("LINE %r %r" % (line, h.here_end)) if not line: # EOF print('WARNING: unterminated here doc', file=sys.stderr) break if h.op_id == Id.Redir_DLessDash: line = line.lstrip('\t') if line.rstrip() == h.here_end: break lines.append((line_id, line)) parts = [] if h.do_expansion: # NOTE: We read all lines at once, instead of doing it line-by-line, # because of cases like this: # cat <<EOF # 1 $(echo 2 # echo 3) 4 # EOF # TODO: Move this import from osh import parse_lib # TODO: Thread arena. need self.arena w_parser = parse_lib.MakeWordParserForHereDoc(lines) word = w_parser.ReadHereDocBody() if not word: self.AddErrorContext('Error reading here doc body: %s', w_parser.Error()) return False h.arg_word = word h.was_filled = True else: # TODO: Add span_id to token # Each line is a single span. tokens = [ast.token(Id.Lit_Chars, line) for _, line in lines] parts = [ast.LiteralPart(t) for t in tokens] h.arg_word = ast.CompoundWord(parts) h.was_filled = True #print('') #print('--> FILLED', here_docs) #print('') return True
def testMultiLine(self): w_parser = InitWordParser("""\ ls foo # Multiple newlines and comments should be ignored ls bar """) print('--MULTI') w = w_parser.ReadWord(LexMode.OUTER) parts = [ast.LiteralPart(ast.token(Id.Lit_Chars, 'ls'))] self.assertEqual(ast.CompoundWord(parts), w) w = w_parser.ReadWord(LexMode.OUTER) parts = [ast.LiteralPart(ast.token(Id.Lit_Chars, 'foo'))] self.assertEqual(ast.CompoundWord(parts), w) w = w_parser.ReadWord(LexMode.OUTER) t = ast.token(Id.Op_Newline, '\n') self.assertEqual(ast.TokenWord(t), w) w = w_parser.ReadWord(LexMode.OUTER) parts = [ast.LiteralPart(ast.token(Id.Lit_Chars, 'ls'))] self.assertEqual(ast.CompoundWord(parts), w) w = w_parser.ReadWord(LexMode.OUTER) parts = [ast.LiteralPart(ast.token(Id.Lit_Chars, 'bar'))] self.assertEqual(ast.CompoundWord(parts), w) w = w_parser.ReadWord(LexMode.OUTER) t = ast.token(Id.Op_Newline, '\n') self.assertEqual(ast.TokenWord(t), w) w = w_parser.ReadWord(LexMode.OUTER) t = ast.token(Id.Eof_Real, '') self.assertEqual(ast.TokenWord(t), w)
def testVarOps(self): ev = InitEvaluator() # initializes x=xxx and y=yyy unset_sub = ast.BracedVarSub(ast.token(Id.Lit_Chars, 'unset')) print(ev.EvalVarSub(unset_sub)) set_sub = ast.BracedVarSub(ast.token(Id.Lit_Chars, 'x')) print(ev.EvalVarSub(set_sub)) part = ast.LiteralPart(ast.token(Id.Lit_Chars, 'default')) arg_word = ast.CompoundWord([part]) test_op = ast.StringUnary(Id.VTest_ColonHyphen, arg_word) unset_sub.suffix_op = test_op set_sub.suffix_op = test_op print(ev.EvalVarSub(unset_sub)) print(ev.EvalVarSub(set_sub))
def TildeDetect(word): """Detect tilde expansion. If it needs to include a TildeSubPart, return a new word. Otherwise return None. NOTE: This algorithm would be a simpler if 1. We could assume some regex for user names. 2. We didn't need to do brace expansion first, like {~foo,~bar} OR - If Lit_Slash were special (it is in the VAROP states, but not OUTER state). We could introduce another lexer mode after you hit Lit_Tilde? So we have to scan all LiteralPart instances until they contain a '/'. http://unix.stackexchange.com/questions/157426/what-is-the-regex-to-validate-linux-users "It is usually recommended to only use usernames that begin with a lower case letter or an underscore, followed by lower case letters, digits, underscores, or dashes. They can end with a dollar sign. In regular expression terms: [a-z_][a-z0-9_-]*[$]? On Debian, the only constraints are that usernames must neither start with a dash ('-') nor contain a colon (':') or a whitespace (space: ' ', end of line: '\n', tabulation: '\t', etc.). Note that using a slash ('/') may break the default algorithm for the definition of the user's home directory. """ if not word.parts: return None part0 = word.parts[0] if _LiteralPartId(part0) != Id.Lit_Tilde: return None prefix = '' found_slash = False # search for the next / for i in range(1, len(word.parts)): # Not a literal part, and we did NOT find a slash. So there is no # TildeSub applied. This would be something like ~X$var, ~$var, # ~$(echo), etc.. The slash is necessary. if word.parts[i].tag != word_part_e.LiteralPart: return None val = word.parts[i].token.val p = val.find('/') if p == -1: # no slash yet prefix += val elif p >= 0: # e.g. for ~foo!bar/baz, extract "bar" # NOTE: requires downcast to LiteralPart pre, post = val[:p], val[p:] prefix += pre tilde_part = ast.TildeSubPart(prefix) # TODO: Need a span_id here. Or use different algorithm. #print('SPLITTING %s p = %d' % (word.parts[i], p), file=sys.stderr) remainder_part = ast.LiteralPart(ast.token(Id.Lit_Chars, post)) found_slash = True break w = ast.CompoundWord() if found_slash: w.parts.append(tilde_part) w.parts.append(remainder_part) j = i + 1 while j < len(word.parts): w.parts.append(word.parts[j]) j += 1 else: # The whole thing is a tilde sub, e.g. ~foo or ~foo!bar w.parts.append(ast.TildeSubPart(prefix)) return w
def _ReadCompoundWord(self, eof_type=Id.Undefined_Tok, lex_mode=LexMode.OUTER, empty_ok=True): """ Precondition: Looking at the first token of the first word part Postcondition: Looking at the token after, e.g. space or operator """ #print('_ReadCompoundWord', lex_mode) word = ast.CompoundWord() num_parts = 0 done = False while not done: allow_done = empty_ok or num_parts != 0 self._Peek() #print('CW',self.cur_token) if allow_done and self.token_type == eof_type: done = True # e.g. for ${} # Keywords like "for" are treated like literals elif self.token_kind in (Kind.Lit, Kind.KW, Kind.Assign, Kind.ControlFlow, Kind.BoolUnary, Kind.BoolBinary): if self.token_type == Id.Lit_EscapedChar: part = ast.EscapedLiteralPart(self.cur_token) else: part = ast.LiteralPart(self.cur_token) #part.xspans.append(self.cur_token.span_id) word.parts.append(part) if self.token_type == Id.Lit_VarLike: #print('@', self.lexer.LookAhead()) #print('@', self.cursor) #print('@', self.cur_token) t = self.lexer.LookAhead(LexMode.OUTER) if t.id == Id.Op_LParen: self.lexer.PushHint(Id.Op_RParen, Id.Right_ArrayLiteral) part2 = self._ReadArrayLiteralPart() if not part2: self.AddErrorContext( '_ReadArrayLiteralPart failed') return False word.parts.append(part2) elif self.token_kind == Kind.VSub: part = ast.SimpleVarSub(self.cur_token) word.parts.append(part) elif self.token_kind == Kind.Left: #print('_ReadLeftParts') part = self._ReadLeftParts() if not part: return None word.parts.append(part) # NOT done yet, will advance below elif self.token_kind == Kind.Right: # Still part of the word; will be done on the next iter. if self.token_type == Id.Right_DoubleQuote: pass elif self.token_type == Id.Right_CommandSub: pass elif self.token_type == Id.Right_Subshell: # LEXER HACK for (case x in x) ;; esac ) assert self.next_lex_mode is None # Rewind before it's used if self.lexer.MaybeUnreadOne(): self.lexer.PushHint(Id.Op_RParen, Id.Right_Subshell) self._Next(lex_mode) done = True else: done = True elif self.token_kind == Kind.Ignored: done = True else: # LEXER HACK for unbalanced case clause. 'case foo in esac' is valid, # so to test for ESAC, we can read ) before getting a chance to # PushHint(Id.Op_RParen, Id.Right_CasePat). So here we unread one # token and do it again. # We get Id.Op_RParen at top level: case x in x) ;; esac # We get Id.Eof_RParen inside ComSub: $(case x in x) ;; esac ) if self.token_type in (Id.Op_RParen, Id.Eof_RParen): assert self.next_lex_mode is None # Rewind before it's used if self.lexer.MaybeUnreadOne(): if self.token_type == Id.Eof_RParen: # Redo translation self.lexer.PushHint(Id.Op_RParen, Id.Eof_RParen) self._Next(lex_mode) done = True # anything we don't recognize means we're done if not done: self._Next(lex_mode) num_parts += 1 return word
def _ReadDoubleQuotedPart(self, eof_type=Id.Undefined_Tok, here_doc=False): """ Args: eof_type: for stopping at }, Id.Lit_RBrace here_doc: Whether we are reading in a here doc context Also ${foo%%a b c} # treat this as double quoted. until you hit """ quoted_part = ast.DoubleQuotedPart() left_spid = -1 right_spid = -1 # gets set later if self.cur_token is not None: # None in here doc case left_spid = self.cur_token.span_id done = False while not done: self._Next(LexMode.DQ) self._Peek() #print(self.cur_token) if self.token_type == eof_type: # e.g. stop at } done = True continue elif self.token_kind == Kind.Lit: if self.token_type == Id.Lit_EscapedChar: part = ast.EscapedLiteralPart(self.cur_token) else: part = ast.LiteralPart(self.cur_token) quoted_part.parts.append(part) elif self.token_kind == Kind.Left: part = self._ReadDoubleQuotedLeftParts() if not part: return None quoted_part.parts.append(part) elif self.token_kind == Kind.VSub: part = ast.SimpleVarSub(self.cur_token) quoted_part.parts.append(part) elif self.token_kind == Kind.Right: assert self.token_type == Id.Right_DoubleQuote if here_doc: # Turn Id.Right_DoubleQuote into a literal part quoted_part.parts.append(ast.LiteralPart(self.cur_token)) else: done = True # assume Id.Right_DoubleQuote right_spid = self.cur_token.span_id elif self.token_kind == Kind.Eof: if here_doc: # here docs will have an EOF in their token stream done = True else: self.AddErrorContext( 'Unexpected EOF in double-quoted string') return False else: raise AssertionError(self.cur_token) quoted_part.spids.extend((left_spid, right_spid)) return quoted_part