def testRead(self): lexer = _InitLexer(CMD) t = lexer.Read(lex_mode_e.ShCommand) self.assertTokensEqual(token(Id.Lit_Chars, 'ls'), t) t = lexer.Read(lex_mode_e.ShCommand) self.assertTokensEqual(token(Id.WS_Space, ' '), t) t = lexer.Read(lex_mode_e.ShCommand) self.assertTokensEqual(token(Id.Lit_Chars, '/'), t) t = lexer.Read(lex_mode_e.ShCommand) self.assertTokensEqual(token(Id.Op_Newline, '\n'), t) # Line two t = lexer.Read(lex_mode_e.ShCommand) self.assertTokensEqual(token(Id.Lit_Chars, 'ls'), t) t = lexer.Read(lex_mode_e.ShCommand) self.assertTokensEqual(token(Id.WS_Space, ' '), t) t = lexer.Read(lex_mode_e.ShCommand) self.assertTokensEqual(token(Id.Lit_Chars, '/home/'), t) t = lexer.Read(lex_mode_e.ShCommand) self.assertTokensEqual(token(Id.Op_Newline, '\n'), t) t = lexer.Read(lex_mode_e.ShCommand) self.assertTokensEqual(token(Id.Eof_Real, ''), t) # Another EOF gives EOF t = lexer.Read(lex_mode_e.ShCommand) self.assertTokensEqual(token(Id.Eof_Real, ''), t)
def testVarOps(self): ev = InitEvaluator() # initializes x=xxx and y=yyy unset_sub = word_part.BracedVarSub(token(Id.VSub_Name, 'unset')) part_vals = [] ev._EvalWordPart(unset_sub, part_vals) print(part_vals) set_sub = word_part.BracedVarSub(token(Id.VSub_Name, 'x')) part_vals = [] ev._EvalWordPart(set_sub, part_vals) print(part_vals) # Now add some ops part = word_part.Literal(token(Id.Lit_Chars, 'default')) arg_word = word.Compound([part]) test_op = suffix_op.Unary(Id.VTest_ColonHyphen, arg_word) unset_sub.suffix_op = test_op set_sub.suffix_op = test_op part_vals = [] ev._EvalWordPart(unset_sub, part_vals) print(part_vals) part_vals = [] ev._EvalWordPart(set_sub, part_vals) print(part_vals)
def Read(self, lex_mode): # type: (lex_mode_t) -> token # Inner loop optimization line = self.line line_pos = self.line_pos tok_type, end_pos = self.match_func(lex_mode, line, line_pos) if tok_type == Id.Eol_Tok: # Do NOT add a span for this sentinel! return token(tok_type, '', const.NO_INTEGER) tok_val = line[line_pos:end_pos] # NOTE: We're putting the arena hook in LineLexer and not Lexer because we # want it to be "low level". The only thing fabricated here is a newline # added at the last line, so we don't end with \0. if self.arena_skip: # make another token from the last span assert self.last_span_id != const.NO_INTEGER span_id = self.last_span_id self.arena_skip = False else: span_id = self.arena.AddLineSpan(self.line_id, line_pos, len(tok_val)) self.last_span_id = span_id #log('LineLexer.Read() span ID %d for %s', span_id, tok_type) t = token(tok_type, tok_val, span_id) self.line_pos = end_pos return t
def LookAhead(self, lex_mode): # type: (lex_mode_t) -> token """Look ahead for a non-space token, using the given lexer mode. Does NOT advance self.line_pos. Called with at least the following modes: lex_mode_e.Arith -- for ${a[@]} vs ${a[1+2]} lex_mode_e.VS_1 lex_mode_e.Outer """ pos = self.line_pos n = len(self.line) #print('Look ahead from pos %d, line %r' % (pos,self.line)) while True: if pos == n: # We don't allow lookahead while already at end of line, because it # would involve interacting with the line reader, and we never need # it. In the OUTER mode, there is an explicit newline token, but # ARITH doesn't have it. t = token(Id.Unknown_Tok, '', const.NO_INTEGER) return t tok_type, end_pos = self.match_func(lex_mode, self.line, pos) tok_val = self.line[pos:end_pos] # NOTE: Instead of hard-coding this token, we could pass it in. This # one only appears in OUTER state! LookAhead(lex_mode, past_token_type) if tok_type != Id.WS_Space: break pos = end_pos return token(tok_type, tok_val, const.NO_INTEGER)
def testToken(self): t = token(Id.Lit_Chars, 'abc') print(t) # This redundancy is OK I guess. t = token(Id.Lit_LBrace, '{') print(t) t = token(Id.Op_Semi, ';') print(t)
def testMode_DollarSq(self): lexer = _InitLexer(r'foo bar\n \x00 \000 \u0065') t = lexer.Read(lex_mode_e.DollarSQ) print(t) self.assertTokensEqual(token(Id.Char_Literals, 'foo bar'), t) t = lexer.Read(lex_mode_e.DollarSQ) print(t) self.assertTokensEqual(token(Id.Char_OneChar, r'\n'), t)
def testLookAhead(self): # Lines always end with '\n' l = LineLexer(match.MATCHER, '', self.arena) self.assertTokensEqual(token(Id.Unknown_Tok, ''), l.LookAhead(lex_mode_e.ShCommand)) l = LineLexer(match.MATCHER, 'foo', self.arena) self.assertTokensEqual(token(Id.Lit_Chars, 'foo'), l.Read(lex_mode_e.ShCommand)) self.assertTokensEqual(token(Id.Unknown_Tok, ''), l.LookAhead(lex_mode_e.ShCommand)) l = LineLexer(match.MATCHER, 'foo bar', self.arena) self.assertTokensEqual(token(Id.Lit_Chars, 'foo'), l.Read(lex_mode_e.ShCommand)) self.assertTokensEqual(token(Id.Lit_Chars, 'bar'), l.LookAhead(lex_mode_e.ShCommand)) # No lookahead; using the cursor! l = LineLexer(match.MATCHER, 'fun(', self.arena) self.assertTokensEqual(token(Id.Lit_Chars, 'fun'), l.Read(lex_mode_e.ShCommand)) self.assertTokensEqual(token(Id.Op_LParen, '('), l.LookAhead(lex_mode_e.ShCommand)) l = LineLexer(match.MATCHER, 'fun (', self.arena) self.assertTokensEqual(token(Id.Lit_Chars, 'fun'), l.Read(lex_mode_e.ShCommand)) self.assertTokensEqual(token(Id.Op_LParen, '('), l.LookAhead(lex_mode_e.ShCommand))
def testMode_ExtGlob(self): lexer = _InitLexer('@(foo|bar)') t = lexer.Read(lex_mode_e.ShCommand) self.assertTokensEqual(token(Id.ExtGlob_At, '@('), t) t = lexer.Read(lex_mode_e.ExtGlob) self.assertTokensEqual(token(Id.Lit_Chars, 'foo'), t) t = lexer.Read(lex_mode_e.ExtGlob) self.assertTokensEqual(token(Id.Op_Pipe, '|'), t) t = lexer.Read(lex_mode_e.ExtGlob) self.assertTokensEqual(token(Id.Lit_Chars, 'bar'), t) t = lexer.Read(lex_mode_e.ExtGlob) self.assertTokensEqual(token(Id.Op_RParen, ')'), t) # Individual cases lexer = _InitLexer('@(') t = lexer.Read(lex_mode_e.ExtGlob) self.assertTokensEqual(token(Id.ExtGlob_At, '@('), t) lexer = _InitLexer('*(') t = lexer.Read(lex_mode_e.ExtGlob) self.assertTokensEqual(token(Id.ExtGlob_Star, '*('), t) lexer = _InitLexer('?(') t = lexer.Read(lex_mode_e.ExtGlob) self.assertTokensEqual(token(Id.ExtGlob_QMark, '?('), t) lexer = _InitLexer('$') t = lexer.Read(lex_mode_e.ExtGlob) self.assertTokensEqual(token(Id.Lit_Other, '$'), t)
def testMode_BashRegex(self): lexer = _InitLexer('(foo|bar)') t = lexer.Read(lex_mode_e.BashRegex) self.assertTokensEqual(token(Id.Lit_Other, '('), t) t = lexer.Read(lex_mode_e.BashRegex) self.assertTokensEqual(token(Id.Lit_Chars, 'foo'), t) t = lexer.Read(lex_mode_e.BashRegex) self.assertTokensEqual(token(Id.Lit_Other, '|'), t)
def testPushHint(self): # Extglob use case lexer = _InitLexer('@()') lexer.PushHint(Id.Op_RParen, Id.Right_ExtGlob) t = lexer.Read(lex_mode_e.ShCommand) self.assertTokensEqual(token(Id.ExtGlob_At, '@('), t) t = lexer.Read(lex_mode_e.ShCommand) self.assertTokensEqual(token(Id.Right_ExtGlob, ')'), t) t = lexer.Read(lex_mode_e.ShCommand) self.assertTokensEqual(token(Id.Eof_Real, ''), t)
def EvalPrompt(self, val): """Perform the two evaluations that bash does. Used by $PS1 and ${x@P}.""" if val.tag != value_e.Str: return self.default_prompt # no evaluation necessary # Parse backslash escapes (cached) try: tokens = self.tokens_cache[val.s] except KeyError: tokens = list(match.PS1_LEXER.Tokens(val.s)) self.tokens_cache[val.s] = tokens # Replace values. ps1_str = self._ReplaceBackslashCodes(tokens) # Parse it like a double-quoted word (cached). # NOTE: This is copied from the PS4 logic in Tracer. try: ps1_word = self.parse_cache[ps1_str] except KeyError: w_parser = self.parse_ctx.MakeWordParserForPlugin( ps1_str, self.arena) try: ps1_word = w_parser.ReadForPlugin() except Exception as e: error_str = '<ERROR: cannot parse PS1>' t = token(Id.Lit_Chars, error_str, const.NO_INTEGER) ps1_word = word.CompoundWord([word_part.LiteralPart(t)]) self.parse_cache[ps1_str] = ps1_word # Evaluate, e.g. "${debian_chroot}\u" -> '\u' # TODO: Handle runtime errors like unset variables, etc. val2 = self.ex.word_ev.EvalWordToString(ps1_word) return val2.s
def _Read(self, lex_mode): # type: (lex_mode_t) -> token """Read from the normal line buffer, not an alias.""" t = self.line_lexer.Read(lex_mode) if t.id == Id.Eol_Tok: # hit \0, read a new line line_id, line, line_pos = self.line_reader.GetLine() if line is None: # no more lines span_id = self.line_lexer.GetSpanIdForEof() if self.emit_comp_dummy: id_ = Id.Lit_CompDummy self.emit_comp_dummy = False # emit EOF the next time else: id_ = Id.Eof_Real t = token(id_, '', span_id) return t self.line_lexer.Reset(line, line_id, line_pos) # fill with a new line t = self.line_lexer.Read(lex_mode) # e.g. translate ) or ` into EOF if self.translation_stack: old_id, new_id = self.translation_stack[-1] # top if t.id == old_id: #print('==> TRANSLATING %s ==> %s' % (t, new_s)) self.translation_stack.pop() t.id = new_id return t
def _Read(self, lex_mode): # type: (lex_mode_t) -> token """Read from the normal line buffer, not an alias.""" t = self.line_lexer.Read(lex_mode) if t.id == Id.Eol_Tok: # hit \0, read a new line line_id, line, line_pos = self.line_reader.GetLine() if line is None: # no more lines # NOTE: Eof_Real has no contents, but it has a span_id because we want # to retrieve the path and line number in ui.PrettyPrintError(). # The line_id might be -1. span_id = self.line_lexer.GetSpanIdForEof() if self.emit_comp_dummy: id_ = Id.Lit_CompDummy self.emit_comp_dummy = False # emit EOF the next time else: id_ = Id.Eof_Real t = token(id_, '', span_id) return t self.line_lexer.Reset(line, line_id, line_pos) # fill with a new line t = self.line_lexer.Read(lex_mode) # e.g. translate ) or ` into EOF if self.translation_stack: old_id, new_id = self.translation_stack[-1] # top if t.id == old_id: #print('==> TRANSLATING %s ==> %s' % (t, new_s)) self.translation_stack.pop() t.id = new_id return t
def testRangePartDetect(self): CASES = [ ('', None), ('1', None), ('1..', None), ('1..3', ('1', '3')), ('3..-10..-2', ('3', '-10', -2)), ('3..-10..-2..', None), # nope! unexpected trailing tokens ('a', None), ('a..', None), ('a..z', ('a', 'z')), ('a..z..', None), ('z..a..-1', ('z', 'a', -1)), ] for s, expected in CASES: tok = token(Id.Lit_Chars, s) part = braces._RangePartDetect(tok) if expected is None: self.assert_(part is None) elif len(expected) == 2: s, e = expected self.assertEqual(s, part.start) self.assertEqual(e, part.end) #self.assertEqual(const.NO_INTEGER, part.step) elif len(expected) == 3: s, e, step = expected self.assertEqual(s, part.start) self.assertEqual(e, part.end) self.assertEqual(step, part.step) else: raise AssertionError log('%r\t%s', s, part)
def testEmitCompDummy(self): lexer = _InitLexer('echo ') lexer.EmitCompDummy() t = lexer.Read(lex_mode_e.ShCommand) self.assertTokensEqual(token(Id.Lit_Chars, 'echo'), t) t = lexer.Read(lex_mode_e.ShCommand) self.assertTokensEqual(token(Id.WS_Space, ' '), t) # Right before EOF t = lexer.Read(lex_mode_e.ShCommand) self.assertTokensEqual(token(Id.Lit_CompDummy, ''), t) t = lexer.Read(lex_mode_e.ShCommand) self.assertTokensEqual(token(Id.Eof_Real, ''), t)
def _assertReadWordWithArena(test, w_parser): w = w_parser.ReadWord(lex_mode_e.ShCommand) assert w is not None w.PrettyPrint() # Next word must be Eof_Real w2 = w_parser.ReadWord(lex_mode_e.ShCommand) test.assertTrue( test_lib.TokenWordsEqual(word.Token(token(Id.Eof_Real, '')), w2), w2) return w
def testTokens(self): print(Id.Op_Newline) print(syntax_asdl.token(Id.Op_Newline, '\n')) print(Id.Op_Newline) print(Kind.Eof) print(Kind.Left) print('--') num_kinds = 0 for name in dir(Kind): if name[0].isupper(): print(name, getattr(Kind, name)) num_kinds += 1 print('Number of Kinds:', num_kinds) # 233 out of 256 tokens now print('Number of IDs:', len(ID_SPEC.id_str2int)) # Make sure we're not exporting too much print(dir(id_kind)) t = syntax_asdl.token(Id.Arith_Plus, '+') self.assertEqual(Kind.Arith, LookupKind(t.id)) t = syntax_asdl.token(Id.Arith_CaretEqual, '^=') self.assertEqual(Kind.Arith, LookupKind(t.id)) t = syntax_asdl.token(Id.Arith_RBrace, '}') self.assertEqual(Kind.Arith, LookupKind(t.id)) t = syntax_asdl.token(Id.BoolBinary_GlobDEqual, '==') self.assertEqual(Kind.BoolBinary, LookupKind(t.id)) t = syntax_asdl.token(Id.BoolBinary_Equal, '=') self.assertEqual(Kind.BoolBinary, LookupKind(t.id))
def testLookAhead(self): # I think this is the usage pattern we care about. Peek and Next() past # the function; then Peek() the next token. Then Lookahead in that state. lexer = _InitLexer('fun()') t = lexer.Read(lex_mode_e.ShCommand) self.assertTokensEqual(token(Id.Lit_Chars, 'fun'), t) #self.assertEqual(Id.Op_LParen, lexer.LookAhead()) t = lexer.Read(lex_mode_e.ShCommand) self.assertTokensEqual(token(Id.Op_LParen, '('), t) self.assertTokensEqual(token(Id.Op_RParen, ')'), lexer.LookAhead(lex_mode_e.ShCommand)) lexer = _InitLexer('fun ()') t = lexer.Read(lex_mode_e.ShCommand) self.assertTokensEqual(token(Id.Lit_Chars, 'fun'), t) t = lexer.Read(lex_mode_e.ShCommand) self.assertTokensEqual(token(Id.WS_Space, ' '), t) self.assertTokensEqual(token(Id.Op_LParen, '('), lexer.LookAhead(lex_mode_e.ShCommand))
def _ExpandPart( parts, # type: List[word_part_t] first_alt_index, # type: int suffixes, # type: List[List[word_part_t]] ): # type: (...) -> List[List[word_part_t]] """Mutually recursive with _BraceExpand. Args: parts: input parts first_alt_index: index of the first BracedTuple suffixes: List of suffixes to append. """ out = [] prefix = parts[:first_alt_index] expand_part = parts[first_alt_index] if isinstance(expand_part, word_part__BracedTuple): # Call _BraceExpand on each of the inner words too! expanded_alts = [] # type: List[List[word_part_t]] for w in expand_part.words: assert isinstance(w, word__Compound) # for MyPy expanded_alts.extend(_BraceExpand(w.parts)) for alt_parts in expanded_alts: for suffix in suffixes: out_parts = [] # type: List[word_part_t] out_parts.extend(prefix) out_parts.extend(alt_parts) out_parts.extend(suffix) out.append(out_parts) elif isinstance(expand_part, word_part__BracedRange): # Not mutually recursive with _BraceExpand strs = _RangeStrings(expand_part) for s in strs: for suffix in suffixes: out_parts_ = [] # type: List[word_part_t] out_parts_.extend(prefix) # Preserve span_id from the original t = token(Id.Lit_Chars, s, expand_part.spids[0]) out_parts_.append(word_part.Literal(t)) out_parts_.extend(suffix) out.append(out_parts_) else: raise AssertionError return out
def Read(self, lex_mode): # type: (lex_mode_t) -> token # Inner loop optimization line = self.line line_pos = self.line_pos tok_type, end_pos = self.match_func(lex_mode, line, line_pos) if tok_type == Id.Eol_Tok: # Do NOT add a span for this sentinel! return token(tok_type, '', const.NO_INTEGER) tok_val = line[line_pos:end_pos] # NOTE: tok_val is redundant, but even in osh.asdl we have some separation # between data needed for formatting and data needed for execution. Could # revisit this later. # TODO: Add this back once arena is threaded everywhere span = line_span(self.line_id, line_pos, len(tok_val)) # NOTE: We're putting the arena hook in LineLexer and not Lexer because we # want it to be "low level". The only thing fabricated here is a newline # added at the last line, so we don't end with \0. if self.arena_skip: assert self.last_span_id != const.NO_INTEGER span_id = self.last_span_id self.arena_skip = False else: span_id = self.arena.AddLineSpan(span) self.last_span_id = span_id #log('LineLexer.Read() span ID %d for %s', span_id, tok_type) t = token(tok_type, tok_val, span_id) self.line_pos = end_pos return t
def _assertReadWordWithArena(test, word_str): print('\n---', word_str) arena = test_lib.MakeArena('word_parse_test.py') w_parser = _InitWordParser(word_str, arena=arena) w = w_parser.ReadWord(lex_mode_e.Outer) assert w is not None w.PrettyPrint() # Next word must be Eof_Real w2 = w_parser.ReadWord(lex_mode_e.Outer) test.assertTrue( test_lib.TokenWordsEqual(osh_word.TokenWord(token(Id.Eof_Real, '')), w2), w2) return arena, w
def _EvalPS4(self): """For set -x.""" val = self.mem.GetVar('PS4') assert val.tag == value_e.Str s = val.s if s: first_char, ps4 = s[0], s[1:] else: first_char, ps4 = '+', ' ' # default # NOTE: This cache is slightly broken because aliases are mutable! I think # thati s more or less harmless though. try: ps4_word = self.parse_cache[ps4] except KeyError: # We have to parse this at runtime. PS4 should usually remain constant. w_parser = self.parse_ctx.MakeWordParserForPlugin(ps4, self.arena) try: ps4_word = w_parser.ReadForPlugin() except util.ParseError as e: error_str = '<ERROR: cannot parse PS4>' t = token(Id.Lit_Chars, error_str, const.NO_INTEGER) ps4_word = osh_word.CompoundWord([word_part.LiteralPart(t)]) self.parse_cache[ps4] = ps4_word #print(ps4_word) # TODO: Repeat first character according process stack depth. Where is # that stored? In the executor itself? It should be stored along with # the PID. Need some kind of ShellProcessState or something. # # We should come up with a better mechanism. Something like $PROC_INDENT # and $OIL_XTRACE_PREFIX. # TODO: Handle runtime errors! For example, you could PS4='$(( 1 / 0 ))' # <ERROR: cannot evaluate PS4> prefix = self.word_ev.EvalWordToString(ps4_word) return first_char, prefix.s
def testTokens(self): print(Id.Op_Newline) print(syntax_asdl.token(Id.Op_Newline, '\n')) print(Id.Op_Newline) print(Kind.Eof) print(Kind.Left) print('--') num_kinds = 0 for name in dir(Kind): if name[0].isupper(): kind = getattr(Kind, name) print('%-20s %s' % (name, kind)) num_kinds += 1 print() print('Number of Kinds:', num_kinds) print() for name in dir(Id): if name[0].isupper(): id_ = getattr(Id, name) print('%-30s %s' % (name, id_)) # 309 out of 256 tokens now print() print('Number of IDs:', len(ID_SPEC.id_str2int)) t = syntax_asdl.token(Id.Arith_Plus, '+') self.assertEqual(Kind.Arith, LookupKind(t.id)) t = syntax_asdl.token(Id.Arith_CaretEqual, '^=') self.assertEqual(Kind.Arith, LookupKind(t.id)) t = syntax_asdl.token(Id.Arith_RBrace, '}') self.assertEqual(Kind.Arith, LookupKind(t.id)) t = syntax_asdl.token(Id.BoolBinary_GlobDEqual, '==') self.assertEqual(Kind.BoolBinary, LookupKind(t.id)) t = syntax_asdl.token(Id.BoolBinary_Equal, '=') self.assertEqual(Kind.BoolBinary, LookupKind(t.id))
def testMultiLine(self): w_parser = _InitWordParser("""\ ls foo # Multiple newlines and comments should be ignored ls bar """) print('--MULTI') w = w_parser.ReadWord(lex_mode_e.Outer) parts = [word_part.LiteralPart(token(Id.Lit_Chars, 'ls'))] test_lib.AssertAsdlEqual(self, osh_word.CompoundWord(parts), w) w = w_parser.ReadWord(lex_mode_e.Outer) parts = [word_part.LiteralPart(token(Id.Lit_Chars, 'foo'))] test_lib.AssertAsdlEqual(self, osh_word.CompoundWord(parts), w) w = w_parser.ReadWord(lex_mode_e.Outer) t = token(Id.Op_Newline, '\n') test_lib.AssertAsdlEqual(self, osh_word.TokenWord(t), w) w = w_parser.ReadWord(lex_mode_e.Outer) parts = [word_part.LiteralPart(token(Id.Lit_Chars, 'ls'))] test_lib.AssertAsdlEqual(self, osh_word.CompoundWord(parts), w) w = w_parser.ReadWord(lex_mode_e.Outer) parts = [word_part.LiteralPart(token(Id.Lit_Chars, 'bar'))] test_lib.AssertAsdlEqual(self, osh_word.CompoundWord(parts), w) w = w_parser.ReadWord(lex_mode_e.Outer) t = token(Id.Op_Newline, '\n') test_lib.AssertAsdlEqual(self, osh_word.TokenWord(t), w) w = w_parser.ReadWord(lex_mode_e.Outer) t = token(Id.Eof_Real, '') test_lib.AssertAsdlEqual(self, osh_word.TokenWord(t), w)
def testMode_DBracket(self): lexer = _InitLexer('-z foo') t = lexer.Read(lex_mode_e.DBracket) self.assertTokensEqual(token(Id.BoolUnary_z, '-z'), t) self.assertEqual(Kind.BoolUnary, LookupKind(t.id))
def testReadOuter(self): l = LineLexer(match.MATCHER, '\n', self.arena) self.assertTokensEqual(token(Id.Op_Newline, '\n'), l.Read(lex_mode_e.ShCommand))
def ErrorWord(fmt, err): # type: (str, _ErrorWithLocation) -> word__Compound error_str = fmt % err.UserErrorString() t = token(Id.Lit_Chars, error_str, const.NO_INTEGER) return word.Compound([word_part.Literal(t)])