Exemple #1
0
    def Read(self, lex_mode):
        #assert self.line_pos <= len(self.line), (self.line, self.line_pos)
        tok_type, end_pos = self.match_func(lex_mode, self.line, self.line_pos)
        #assert end_pos <= len(self.line)
        if tok_type == Id.Eol_Tok:  # Do NOT add a span for this sentinel!
            return syntax.token(tok_type, '', const.NO_INTEGER)

        tok_val = self.line[self.line_pos:end_pos]

        # NOTE: tok_val is redundant, but even in osh.asdl we have some separation
        # between data needed for formatting and data needed for execution.  Could
        # revisit this later.

        # TODO: Add this back once arena is threaded everywhere
        #assert self.line_id != -1
        line_span = syntax.line_span(self.line_id, self.line_pos, len(tok_val))

        # NOTE: We're putting the arena hook in LineLexer and not Lexer because we
        # want it to be "low level".  The only thing fabricated here is a newline
        # added at the last line, so we don't end with \0.

        if self.arena_skip:
            assert self.last_span_id != const.NO_INTEGER
            span_id = self.last_span_id
            self.arena_skip = False
        else:
            span_id = self.arena.AddLineSpan(line_span)
            self.last_span_id = span_id

        #log('LineLexer.Read() span ID %d for %s', span_id, tok_type)
        t = syntax.token(tok_type, tok_val, span_id)

        self.line_pos = end_pos
        return t
Exemple #2
0
  def testRead(self):
    lexer = _InitLexer(CMD)

    t = lexer.Read(lex_mode_e.Outer)
    self.assertTokensEqual(syntax_asdl.token(Id.Lit_Chars, 'ls'), t)
    t = lexer.Read(lex_mode_e.Outer)

    self.assertTokensEqual(syntax_asdl.token(Id.WS_Space, ' '), t)

    t = lexer.Read(lex_mode_e.Outer)
    self.assertTokensEqual(syntax_asdl.token(Id.Lit_Chars, '/'), t)

    t = lexer.Read(lex_mode_e.Outer)
    self.assertTokensEqual(syntax_asdl.token(Id.Op_Newline, '\n'), t)

    # Line two
    t = lexer.Read(lex_mode_e.Outer)
    self.assertTokensEqual(syntax_asdl.token(Id.Lit_Chars, 'ls'), t)

    t = lexer.Read(lex_mode_e.Outer)
    self.assertTokensEqual(syntax_asdl.token(Id.WS_Space, ' '), t)

    t = lexer.Read(lex_mode_e.Outer)
    self.assertTokensEqual(syntax_asdl.token(Id.Lit_Chars, '/home/'), t)

    t = lexer.Read(lex_mode_e.Outer)
    self.assertTokensEqual(syntax_asdl.token(Id.Op_Newline, '\n'), t)

    t = lexer.Read(lex_mode_e.Outer)
    self.assertTokensEqual(syntax_asdl.token(Id.Eof_Real, ''), t)

    # Another EOF gives EOF
    t = lexer.Read(lex_mode_e.Outer)
    self.assertTokensEqual(syntax_asdl.token(Id.Eof_Real, ''), t)
Exemple #3
0
    def LookAhead(self, lex_mode):
        # type: (lex_mode_t) -> token
        """Look ahead for a non-space token, using the given lexer mode.

    Does NOT advance self.line_pos.

    Called with at least the following modes:
      lex_mode_e.Arith -- for ${a[@]} vs ${a[1+2]}
      lex_mode_e.VS_1
      lex_mode_e.Outer
    """
        pos = self.line_pos
        n = len(self.line)
        #print('Look ahead from pos %d, line %r' % (pos,self.line))
        while True:
            if pos == n:
                # We don't allow lookahead while already at end of line, because it
                # would involve interacting with the line reader, and we never need
                # it.  In the OUTER mode, there is an explicit newline token, but
                # ARITH doesn't have it.
                t = syntax.token(Id.Unknown_Tok, '', const.NO_INTEGER)
                return t

            tok_type, end_pos = self.match_func(lex_mode, self.line, pos)
            tok_val = self.line[pos:end_pos]
            # NOTE: Instead of hard-coding this token, we could pass it in.  This
            # one only appears in OUTER state!  LookAhead(lex_mode, past_token_type)
            if tok_type != Id.WS_Space:
                break
            pos = end_pos

        return syntax.token(tok_type, tok_val, const.NO_INTEGER)
Exemple #4
0
    def testVarOps(self):
        ev = InitEvaluator()  # initializes x=xxx and y=yyy
        unset_sub = word_part.BracedVarSub(
            syntax_asdl.token(Id.VSub_Name, 'unset'))
        part_vals = []
        ev._EvalWordPart(unset_sub, part_vals)
        print(part_vals)

        set_sub = word_part.BracedVarSub(syntax_asdl.token(Id.VSub_Name, 'x'))
        part_vals = []
        ev._EvalWordPart(set_sub, part_vals)
        print(part_vals)

        # Now add some ops
        part = word_part.LiteralPart(syntax_asdl.token(Id.Lit_Chars,
                                                       'default'))
        arg_word = osh_word.CompoundWord([part])
        test_op = suffix_op.StringUnary(Id.VTest_ColonHyphen, arg_word)
        unset_sub.suffix_op = test_op
        set_sub.suffix_op = test_op

        part_vals = []
        ev._EvalWordPart(unset_sub, part_vals)
        print(part_vals)

        part_vals = []
        ev._EvalWordPart(set_sub, part_vals)
        print(part_vals)
Exemple #5
0
  def testDollarSqState(self):
    lexer = _InitLexer(r'foo bar\n \x00 \000 \u0065')

    t = lexer.Read(lex_mode_e.DollarSQ)
    print(t)
    self.assertTokensEqual(syntax_asdl.token(Id.Char_Literals, 'foo bar'), t)

    t = lexer.Read(lex_mode_e.DollarSQ)
    print(t)
    self.assertTokensEqual(syntax_asdl.token(Id.Char_OneChar, r'\n'), t)
Exemple #6
0
  def testToken(self):
    t = syntax_asdl.token(Id.Lit_Chars, 'abc')
    print(t)

    # This redundancy is OK I guess.
    t = syntax_asdl.token(Id.Lit_LBrace, '{')
    print(t)

    t = syntax_asdl.token(Id.Op_Semi, ';')
    print(t)
Exemple #7
0
  def testLookAhead(self):
    # Lines always end with '\n'
    l = LineLexer(match.MATCHER, '', self.arena)
    self.assertTokensEqual(
        syntax_asdl.token(Id.Unknown_Tok, ''), l.LookAhead(lex_mode_e.Outer))

    l = LineLexer(match.MATCHER, 'foo', self.arena)
    self.assertTokensEqual(
        syntax_asdl.token(Id.Lit_Chars, 'foo'), l.Read(lex_mode_e.Outer))
    self.assertTokensEqual(
        syntax_asdl.token(Id.Unknown_Tok, ''), l.LookAhead(lex_mode_e.Outer))

    l = LineLexer(match.MATCHER, 'foo  bar', self.arena)
    self.assertTokensEqual(
        syntax_asdl.token(Id.Lit_Chars, 'foo'), l.Read(lex_mode_e.Outer))
    self.assertTokensEqual(
        syntax_asdl.token(Id.Lit_Chars, 'bar'), l.LookAhead(lex_mode_e.Outer))

    # No lookahead; using the cursor!
    l = LineLexer(match.MATCHER, 'func(', self.arena)
    self.assertTokensEqual(
        syntax_asdl.token(Id.Lit_Chars, 'func'), l.Read(lex_mode_e.Outer))
    self.assertTokensEqual(
        syntax_asdl.token(Id.Op_LParen, '('), l.LookAhead(lex_mode_e.Outer))

    l = LineLexer(match.MATCHER, 'func  (', self.arena)
    self.assertTokensEqual(
        syntax_asdl.token(Id.Lit_Chars, 'func'), l.Read(lex_mode_e.Outer))
    self.assertTokensEqual(
        syntax_asdl.token(Id.Op_LParen, '('), l.LookAhead(lex_mode_e.Outer))
Exemple #8
0
  def testExtGlob(self):
    lexer = _InitLexer('@(foo|bar)')

    t = lexer.Read(lex_mode_e.Outer)
    self.assertTokensEqual(syntax_asdl.token(Id.ExtGlob_At, '@('), t)

    t = lexer.Read(lex_mode_e.ExtGlob)
    self.assertTokensEqual(syntax_asdl.token(Id.Lit_Chars, 'foo'), t)

    t = lexer.Read(lex_mode_e.ExtGlob)
    self.assertTokensEqual(syntax_asdl.token(Id.Op_Pipe, '|'), t)

    t = lexer.Read(lex_mode_e.ExtGlob)
    self.assertTokensEqual(syntax_asdl.token(Id.Lit_Chars, 'bar'), t)

    t = lexer.Read(lex_mode_e.ExtGlob)
    self.assertTokensEqual(syntax_asdl.token(Id.Op_RParen, ')'), t)

    # Individual cases

    lexer = _InitLexer('@(')
    t = lexer.Read(lex_mode_e.ExtGlob)
    self.assertTokensEqual(syntax_asdl.token(Id.ExtGlob_At, '@('), t)

    lexer = _InitLexer('*(')
    t = lexer.Read(lex_mode_e.ExtGlob)
    self.assertTokensEqual(syntax_asdl.token(Id.ExtGlob_Star, '*('), t)

    lexer = _InitLexer('?(')
    t = lexer.Read(lex_mode_e.ExtGlob)
    self.assertTokensEqual(syntax_asdl.token(Id.ExtGlob_QMark, '?('), t)

    lexer = _InitLexer('$')
    t = lexer.Read(lex_mode_e.ExtGlob)
    self.assertTokensEqual(syntax_asdl.token(Id.Lit_Other, '$'), t)
Exemple #9
0
  def testBashRegexState(self):
    lexer = _InitLexer('(foo|bar)')

    t = lexer.Read(lex_mode_e.BashRegex)
    self.assertTokensEqual(syntax_asdl.token(Id.Lit_Other, '('), t)

    t = lexer.Read(lex_mode_e.BashRegex)
    self.assertTokensEqual(syntax_asdl.token(Id.Lit_Chars, 'foo'), t)

    t = lexer.Read(lex_mode_e.BashRegex)
    self.assertTokensEqual(syntax_asdl.token(Id.Lit_Other, '|'), t)
Exemple #10
0
    def _Read(self, lex_mode):
        # type: (lex_mode_t) -> token
        """Read from the normal line buffer, not an alias."""
        t = self.line_lexer.Read(lex_mode)
        if t.id == Id.Eol_Tok:  # hit \0, read a new line
            line_id, line, line_pos = self.line_reader.GetLine()

            if line is None:  # no more lines
                # NOTE: Eof_Real has no contents, but it has a span_id because we want
                # to retrieve the path and line number in ui.PrettyPrintError().
                # The line_id might be -1.
                span_id = self.line_lexer.GetSpanIdForEof()
                if self.emit_comp_dummy:
                    id_ = Id.Lit_CompDummy
                    self.emit_comp_dummy = False  # emit EOF the next time
                else:
                    id_ = Id.Eof_Real
                t = syntax.token(id_, '', span_id)
                return t

            self.line_lexer.Reset(line, line_id,
                                  line_pos)  # fill with a new line
            t = self.line_lexer.Read(lex_mode)

        # e.g. translate ) or ` into EOF
        if self.translation_stack:
            old_id, new_id = self.translation_stack[-1]  # top
            if t.id == old_id:
                #print('==> TRANSLATING %s ==> %s' % (t, new_s))
                self.translation_stack.pop()
                t.id = new_id

        return t
Exemple #11
0
    def EvalPrompt(self, val):
        """Perform the two evaluations that bash does.  Used by $PS1 and ${x@P}."""
        if val.tag != value_e.Str:
            return self.default_prompt  # no evaluation necessary

        # Parse backslash escapes (cached)
        try:
            tokens = self.tokens_cache[val.s]
        except KeyError:
            tokens = list(match.PS1_LEXER.Tokens(val.s))
            self.tokens_cache[val.s] = tokens

        # Replace values.
        ps1_str = self._ReplaceBackslashCodes(tokens)

        # Parse it like a double-quoted word (cached).
        # NOTE: This is copied from the PS4 logic in Tracer.
        try:
            ps1_word = self.parse_cache[ps1_str]
        except KeyError:
            w_parser = self.parse_ctx.MakeWordParserForPlugin(
                ps1_str, self.arena)
            try:
                ps1_word = w_parser.ReadForPlugin()
            except Exception as e:
                error_str = '<ERROR: cannot parse PS1>'
                t = syntax_asdl.token(Id.Lit_Chars, error_str,
                                      const.NO_INTEGER)
                ps1_word = word.CompoundWord([word_part.LiteralPart(t)])
            self.parse_cache[ps1_str] = ps1_word

        # Evaluate, e.g. "${debian_chroot}\u" -> '\u'
        # TODO: Handle runtime errors like unset variables, etc.
        val2 = self.ex.word_ev.EvalWordToString(ps1_word)
        return val2.s
Exemple #12
0
def _MakeLiteralHereLines(here_lines, arena):
  """Create a line_span and a token for each line."""
  tokens = []
  for line_id, line, start_offset in here_lines:
    line_span = syntax_asdl.line_span(line_id, start_offset, len(line))
    span_id = arena.AddLineSpan(line_span)
    t = syntax_asdl.token(Id.Lit_Chars, line[start_offset:], span_id)
    tokens.append(t)
  return [word_part.LiteralPart(t) for t in tokens]
Exemple #13
0
    def testTokens(self):
        print(Id.Op_Newline)
        print(syntax_asdl.token(Id.Op_Newline, '\n'))

        print(Id.Op_Newline)

        print(Kind.Eof)
        print(Kind.Left)
        print('--')
        num_kinds = 0
        for name in dir(Kind):
            if name[0].isupper():
                print(name, getattr(Kind, name))
                num_kinds += 1

        print('Number of Kinds:', num_kinds)
        # 233 out of 256 tokens now
        print('Number of IDs:', len(ID_SPEC.id_str2int))

        # Make sure we're not exporting too much
        print(dir(id_kind))

        t = syntax_asdl.token(Id.Arith_Plus, '+')
        self.assertEqual(Kind.Arith, LookupKind(t.id))
        t = syntax_asdl.token(Id.Arith_CaretEqual, '^=')
        self.assertEqual(Kind.Arith, LookupKind(t.id))
        t = syntax_asdl.token(Id.Arith_RBrace, '}')
        self.assertEqual(Kind.Arith, LookupKind(t.id))

        t = syntax_asdl.token(Id.BoolBinary_GlobDEqual, '==')
        self.assertEqual(Kind.BoolBinary, LookupKind(t.id))

        t = syntax_asdl.token(Id.BoolBinary_Equal, '=')
        self.assertEqual(Kind.BoolBinary, LookupKind(t.id))
Exemple #14
0
  def testLookAhead(self):
    # I think this is the usage pattern we care about.  Peek and Next() past
    # the function; then Peek() the next token.  Then Lookahead in that state.
    lexer = _InitLexer('func()')

    t = lexer.Read(lex_mode_e.Outer)
    self.assertTokensEqual(syntax_asdl.token(Id.Lit_Chars, 'func'), t)

    #self.assertEqual(Id.Op_LParen, lexer.LookAhead())

    t = lexer.Read(lex_mode_e.Outer)
    self.assertTokensEqual(syntax_asdl.token(Id.Op_LParen, '('), t)

    self.assertTokensEqual(
        syntax_asdl.token(Id.Op_RParen, ')'), lexer.LookAhead(lex_mode_e.Outer))

    lexer = _InitLexer('func ()')

    t = lexer.Read(lex_mode_e.Outer)
    self.assertTokensEqual(syntax_asdl.token(Id.Lit_Chars, 'func'), t)

    t = lexer.Read(lex_mode_e.Outer)
    self.assertTokensEqual(syntax_asdl.token(Id.WS_Space, ' '), t)

    self.assertTokensEqual(
        syntax_asdl.token(Id.Op_LParen, '('), lexer.LookAhead(lex_mode_e.Outer))
Exemple #15
0
def _assertReadWordWithArena(test, word_str):
    print('\n---', word_str)
    arena = test_lib.MakeArena('word_parse_test.py')
    w_parser = _InitWordParser(word_str, arena=arena)
    w = w_parser.ReadWord(lex_mode_e.Outer)
    assert w is not None
    ast_lib.PrettyPrint(w)

    # Next word must be Eof_Real
    w2 = w_parser.ReadWord(lex_mode_e.Outer)
    test.assertTrue(
        test_lib.TokenWordsEqual(
            osh_word.TokenWord(syntax_asdl.token(Id.Eof_Real, '')), w2), w2)

    return arena, w
Exemple #16
0
    def testMultiLine(self):
        w_parser = _InitWordParser("""\
ls foo

# Multiple newlines and comments should be ignored

ls bar
""")
        print('--MULTI')
        w = w_parser.ReadWord(lex_mode_e.Outer)
        parts = [word_part.LiteralPart(syntax_asdl.token(Id.Lit_Chars, 'ls'))]
        test_lib.AssertAsdlEqual(self, osh_word.CompoundWord(parts), w)

        w = w_parser.ReadWord(lex_mode_e.Outer)
        parts = [word_part.LiteralPart(syntax_asdl.token(Id.Lit_Chars, 'foo'))]
        test_lib.AssertAsdlEqual(self, osh_word.CompoundWord(parts), w)

        w = w_parser.ReadWord(lex_mode_e.Outer)
        t = syntax_asdl.token(Id.Op_Newline, '\n')
        test_lib.AssertAsdlEqual(self, osh_word.TokenWord(t), w)

        w = w_parser.ReadWord(lex_mode_e.Outer)
        parts = [word_part.LiteralPart(syntax_asdl.token(Id.Lit_Chars, 'ls'))]
        test_lib.AssertAsdlEqual(self, osh_word.CompoundWord(parts), w)

        w = w_parser.ReadWord(lex_mode_e.Outer)
        parts = [word_part.LiteralPart(syntax_asdl.token(Id.Lit_Chars, 'bar'))]
        test_lib.AssertAsdlEqual(self, osh_word.CompoundWord(parts), w)

        w = w_parser.ReadWord(lex_mode_e.Outer)
        t = syntax_asdl.token(Id.Op_Newline, '\n')
        test_lib.AssertAsdlEqual(self, osh_word.TokenWord(t), w)

        w = w_parser.ReadWord(lex_mode_e.Outer)
        t = syntax_asdl.token(Id.Eof_Real, '')
        test_lib.AssertAsdlEqual(self, osh_word.TokenWord(t), w)
Exemple #17
0
 def testDBracketState(self):
   lexer = _InitLexer('-z foo')
   t = lexer.Read(lex_mode_e.DBracket)
   self.assertTokensEqual(syntax_asdl.token(Id.BoolUnary_z, '-z'), t)
   self.assertEqual(Kind.BoolUnary, LookupKind(t.id))
Exemple #18
0
 def testReadOuter(self):
   l = LineLexer(match.MATCHER, '\n', self.arena)
   self.assertTokensEqual(
       syntax_asdl.token(Id.Op_Newline, '\n'), l.Read(lex_mode_e.Outer))