Example #1
0
    def Read(self, lex_mode):
        #assert self.line_pos <= len(self.line), (self.line, self.line_pos)
        tok_type, end_pos = self.match_func(lex_mode, self.line, self.line_pos)
        #assert end_pos <= len(self.line)
        if tok_type == Id.Eol_Tok:  # Do NOT add a span for this sentinel!
            return ast.token(tok_type, '', const.NO_INTEGER)

        tok_val = self.line[self.line_pos:end_pos]

        # NOTE: tok_val is redundant, but even in osh.asdl we have some separation
        # between data needed for formatting and data needed for execution.  Could
        # revisit this later.

        # TODO: Add this back once arena is threaded everywhere
        #assert self.line_id != -1
        line_span = ast.line_span(self.line_id, self.line_pos, len(tok_val))

        # NOTE: We're putting the arena hook in LineLexer and not Lexer because we
        # want it to be "low level".  The only thing fabricated here is a newline
        # added at the last line, so we don't end with \0.

        if self.arena_skip:
            assert self.last_span_id != const.NO_INTEGER
            span_id = self.last_span_id
            self.arena_skip = False
        else:
            span_id = self.arena.AddLineSpan(line_span)
            self.last_span_id = span_id

        #log('LineLexer.Read() span ID %d for %s', span_id, tok_type)
        t = ast.token(tok_type, tok_val, span_id)

        self.line_pos = end_pos
        return t
Example #2
0
    def testPipeline2(self):
        Banner('ls | cut -d . -f 1 | head')
        p = process.Pipeline()
        p.Add(_ExtProc(['ls']))
        p.Add(_ExtProc(['cut', '-d', '.', '-f', '1']))
        p.Add(_ExtProc(['head']))

        print(p.Run(_WAITER))

        ex = InitExecutor()

        # Simulating subshell for each command
        w1 = ast.CompoundWord()
        w1.parts.append(ast.LiteralPart(ast.token(Id.Lit_Chars, 'ls')))
        node1 = ast.SimpleCommand()
        node1.words = [w1]

        w2 = ast.CompoundWord()
        w2.parts.append(ast.LiteralPart(ast.token(Id.Lit_Chars, 'head')))
        node2 = ast.SimpleCommand()
        node2.words = [w2]

        w3 = ast.CompoundWord()
        w3.parts.append(ast.LiteralPart(ast.token(Id.Lit_Chars, 'sort')))
        w4 = ast.CompoundWord()
        w4.parts.append(ast.LiteralPart(ast.token(Id.Lit_Chars, '--reverse')))
        node3 = ast.SimpleCommand()
        node3.words = [w3, w4]

        p = process.Pipeline()
        p.Add(Process(process.SubProgramThunk(ex, node1)))
        p.Add(Process(process.SubProgramThunk(ex, node2)))
        p.Add(Process(process.SubProgramThunk(ex, node3)))

        print(p.Run(_WAITER))
Example #3
0
    def testRead(self):
        lexer = _InitLexer(CMD)

        t = lexer.Read(LexMode.OUTER)
        self.assertTokensEqual(ast.token(Id.Lit_Chars, 'ls'), t)
        t = lexer.Read(LexMode.OUTER)

        self.assertTokensEqual(ast.token(Id.WS_Space, ' '), t)

        t = lexer.Read(LexMode.OUTER)
        self.assertTokensEqual(ast.token(Id.Lit_Chars, '/'), t)

        t = lexer.Read(LexMode.OUTER)
        self.assertTokensEqual(ast.token(Id.Op_Newline, '\n'), t)

        # Line two
        t = lexer.Read(LexMode.OUTER)
        self.assertTokensEqual(ast.token(Id.Lit_Chars, 'ls'), t)

        t = lexer.Read(LexMode.OUTER)
        self.assertTokensEqual(ast.token(Id.WS_Space, ' '), t)

        t = lexer.Read(LexMode.OUTER)
        self.assertTokensEqual(ast.token(Id.Lit_Chars, '/home/'), t)

        t = lexer.Read(LexMode.OUTER)
        self.assertTokensEqual(ast.token(Id.Op_Newline, '\n'), t)

        t = lexer.Read(LexMode.OUTER)
        self.assertTokensEqual(ast.token(Id.Eof_Real, ''), t)

        # Another EOF gives EOF
        t = lexer.Read(LexMode.OUTER)
        self.assertTokensEqual(ast.token(Id.Eof_Real, ''), t)
Example #4
0
    def testTokens(self):
        print(Id.Op_Newline)
        print(ast.token(Id.Op_Newline, '\n'))

        print(IdName(Id.Op_Newline))

        print(Kind.Eof)
        print(Kind.Left)
        print('--')
        for name in dir(Kind):
            if name[0].isupper():
                print(name, getattr(Kind, name))

        # Make sure we're not exporting too much
        print(dir(id_kind))

        # 206 out of 256 tokens now
        print(len(id_kind._ID_NAMES))

        t = ast.token(Id.Arith_Plus, '+')
        self.assertEqual(Kind.Arith, LookupKind(t.id))
        t = ast.token(Id.Arith_CaretEqual, '^=')
        self.assertEqual(Kind.Arith, LookupKind(t.id))
        t = ast.token(Id.Arith_RBrace, '}')
        self.assertEqual(Kind.Arith, LookupKind(t.id))

        t = ast.token(Id.BoolBinary_DEqual, '==')
        self.assertEqual(Kind.BoolBinary, LookupKind(t.id))
Example #5
0
    def testShellFuncExecution(self):
        ex = cmd_exec_test.InitExecutor()
        func_node = ast.FuncDef()

        c1 = ast.CompoundWord()
        t1 = ast.token(Id.Lit_Chars, 'f1')
        c1.parts.append(ast.LiteralPart(t1))

        c2 = ast.CompoundWord()
        t2 = ast.token(Id.Lit_Chars, 'f2')
        c2.parts.append(ast.LiteralPart(t2))

        a = ast.ArrayLiteralPart()
        a.words = [c1, c2]
        w = ast.CompoundWord()
        w.parts.append(a)

        # Set global COMPREPLY=(f1 f2)
        pair = ast.assign_pair(ast.LhsName('COMPREPLY'), assign_op_e.Equal, w)
        pair.spids.append(0)  # dummy
        pairs = [pair]
        body_node = ast.Assignment(Id.Assign_None, [], pairs)

        func_node.name = 'myfunc'
        func_node.body = body_node

        a = completion.ShellFuncAction(ex, func_node)
        matches = list(a.Matches([], 0, 'f'))
        self.assertEqual(['f1 ', 'f2 '], matches)
Example #6
0
    def LookAhead(self, lex_mode):
        """Look ahead for a non-space token, using the given lexer mode.

    Does NOT advance self.line_pos.

    Called with at least the following modes:
      lex_mode_e.ARITH -- for ${a[@]} vs ${a[1+2]}
      lex_mode_e.VS_1
      lex_mode_e.OUTER
    """
        pos = self.line_pos
        #print('Look ahead from pos %d, line %r' % (pos,self.line))
        while True:
            if pos == len(self.line):
                # We don't allow lookahead while already at end of line, because it
                # would involve interacting with the line reader, and we never need
                # it.  In the OUTER mode, there is an explicit newline token, but
                # ARITH doesn't have it.
                t = ast.token(Id.Unknown_Tok, '', const.NO_INTEGER)
                return t

            tok_type, end_pos = self.match_func(lex_mode, self.line, pos)
            tok_val = self.line[pos:end_pos]
            # NOTE: Instead of hard-coding this token, we could pass it in.  This
            # one only appears in OUTER state!  LookAhead(lex_mode, past_token_type)
            if tok_type != Id.WS_Space:
                break
            pos = end_pos

        return ast.token(tok_type, tok_val, const.NO_INTEGER)
Example #7
0
    def testVarOps(self):
        ev = InitEvaluator()  # initializes x=xxx and y=yyy
        unset_sub = ast.BracedVarSub(ast.token(Id.VSub_Name, 'unset'))
        part_vals = []
        ev._EvalWordPart(unset_sub, part_vals)
        print(part_vals)

        set_sub = ast.BracedVarSub(ast.token(Id.VSub_Name, 'x'))
        part_vals = []
        ev._EvalWordPart(set_sub, part_vals)
        print(part_vals)

        # Now add some ops
        part = ast.LiteralPart(ast.token(Id.Lit_Chars, 'default'))
        arg_word = ast.CompoundWord([part])
        test_op = ast.StringUnary(Id.VTest_ColonHyphen, arg_word)
        unset_sub.suffix_op = test_op
        set_sub.suffix_op = test_op

        part_vals = []
        ev._EvalWordPart(unset_sub, part_vals)
        print(part_vals)

        part_vals = []
        ev._EvalWordPart(set_sub, part_vals)
        print(part_vals)
Example #8
0
    def testDollarSqState(self):
        lexer = _InitLexer(r'foo bar\n \x00 \000 \u0065')

        t = lexer.Read(lex_mode_e.DOLLAR_SQ)
        print(t)
        self.assertTokensEqual(ast.token(Id.Char_Literals, 'foo bar'), t)

        t = lexer.Read(lex_mode_e.DOLLAR_SQ)
        print(t)
        self.assertTokensEqual(ast.token(Id.Char_OneChar, r'\n'), t)
Example #9
0
    def testToken(self):
        t = ast.token(Id.Lit_Chars, 'abc')
        print(t)

        # This redundancy is OK I guess.
        t = ast.token(Id.Lit_LBrace, '{')
        print(t)

        t = ast.token(Id.Op_Semi, ';')
        print(t)
Example #10
0
    def testLookAhead(self):
        # Lines always end with '\n'
        l = LineLexer(LEXER_DEF, '')
        self.assertTokensEqual(ast.token(Id.Eof_Real, ''),
                               l.LookAhead(LexMode.OUTER))

        l = LineLexer(LEXER_DEF, 'foo')
        self.assertTokensEqual(ast.token(Id.Lit_Chars, 'foo'),
                               l.Read(LexMode.OUTER))
        self.assertTokensEqual(ast.token(Id.Eof_Real, ''),
                               l.LookAhead(LexMode.OUTER))

        l = LineLexer(LEXER_DEF, 'foo  bar')
        self.assertTokensEqual(ast.token(Id.Lit_Chars, 'foo'),
                               l.Read(LexMode.OUTER))
        self.assertEqual(ast.token(Id.Lit_Chars, 'bar'),
                         l.LookAhead(LexMode.OUTER))

        # No lookahead; using the cursor!
        l = LineLexer(LEXER_DEF, 'func(')
        self.assertTokensEqual(ast.token(Id.Lit_Chars, 'func'),
                               l.Read(LexMode.OUTER))
        self.assertTokensEqual(ast.token(Id.Op_LParen, '('),
                               l.LookAhead(LexMode.OUTER))

        l = LineLexer(LEXER_DEF, 'func  (')
        self.assertTokensEqual(ast.token(Id.Lit_Chars, 'func'),
                               l.Read(LexMode.OUTER))
        self.assertTokensEqual(ast.token(Id.Op_LParen, '('),
                               l.LookAhead(LexMode.OUTER))
Example #11
0
    def testExtGlob(self):
        lexer = _InitLexer('@(foo|bar)')

        t = lexer.Read(lex_mode_e.OUTER)
        self.assertTokensEqual(ast.token(Id.ExtGlob_At, '@('), t)

        t = lexer.Read(lex_mode_e.EXTGLOB)
        self.assertTokensEqual(ast.token(Id.Lit_Chars, 'foo'), t)

        t = lexer.Read(lex_mode_e.EXTGLOB)
        self.assertTokensEqual(ast.token(Id.Op_Pipe, '|'), t)

        t = lexer.Read(lex_mode_e.EXTGLOB)
        self.assertTokensEqual(ast.token(Id.Lit_Chars, 'bar'), t)

        t = lexer.Read(lex_mode_e.EXTGLOB)
        self.assertTokensEqual(ast.token(Id.Op_RParen, ')'), t)

        # Individual cases

        lexer = _InitLexer('@(')
        t = lexer.Read(lex_mode_e.EXTGLOB)
        self.assertTokensEqual(ast.token(Id.ExtGlob_At, '@('), t)

        lexer = _InitLexer('*(')
        t = lexer.Read(lex_mode_e.EXTGLOB)
        self.assertTokensEqual(ast.token(Id.ExtGlob_Star, '*('), t)

        lexer = _InitLexer('?(')
        t = lexer.Read(lex_mode_e.EXTGLOB)
        self.assertTokensEqual(ast.token(Id.ExtGlob_QMark, '?('), t)

        lexer = _InitLexer('$')
        t = lexer.Read(lex_mode_e.EXTGLOB)
        self.assertTokensEqual(ast.token(Id.Lit_Other, '$'), t)
Example #12
0
    def testLookAhead(self):
        # Lines always end with '\n'
        l = LineLexer(parse_lib._MakeMatcher(), '', self.arena)
        self.assertTokensEqual(ast.token(Id.Unknown_Tok, ''),
                               l.LookAhead(lex_mode_e.OUTER))

        l = LineLexer(parse_lib._MakeMatcher(), 'foo', self.arena)
        self.assertTokensEqual(ast.token(Id.Lit_Chars, 'foo'),
                               l.Read(lex_mode_e.OUTER))
        self.assertTokensEqual(ast.token(Id.Unknown_Tok, ''),
                               l.LookAhead(lex_mode_e.OUTER))

        l = LineLexer(parse_lib._MakeMatcher(), 'foo  bar', self.arena)
        self.assertTokensEqual(ast.token(Id.Lit_Chars, 'foo'),
                               l.Read(lex_mode_e.OUTER))
        self.assertTokensEqual(ast.token(Id.Lit_Chars, 'bar'),
                               l.LookAhead(lex_mode_e.OUTER))

        # No lookahead; using the cursor!
        l = LineLexer(parse_lib._MakeMatcher(), 'func(', self.arena)
        self.assertTokensEqual(ast.token(Id.Lit_Chars, 'func'),
                               l.Read(lex_mode_e.OUTER))
        self.assertTokensEqual(ast.token(Id.Op_LParen, '('),
                               l.LookAhead(lex_mode_e.OUTER))

        l = LineLexer(parse_lib._MakeMatcher(), 'func  (', self.arena)
        self.assertTokensEqual(ast.token(Id.Lit_Chars, 'func'),
                               l.Read(lex_mode_e.OUTER))
        self.assertTokensEqual(ast.token(Id.Op_LParen, '('),
                               l.LookAhead(lex_mode_e.OUTER))
Example #13
0
    def testBashRegexState(self):
        lexer = _InitLexer('(foo|bar)')

        t = lexer.Read(LexMode.BASH_REGEX)
        self.assertTokensEqual(ast.token(Id.Lit_Chars, '('), t)

        t = lexer.Read(LexMode.BASH_REGEX)
        self.assertTokensEqual(ast.token(Id.Lit_Chars, 'foo'), t)

        t = lexer.Read(LexMode.BASH_REGEX)
        self.assertTokensEqual(ast.token(Id.Lit_Chars, '|'), t)
Example #14
0
    def testVarOps(self):
        ev = InitEvaluator()  # initializes x=xxx and y=yyy
        unset_sub = ast.BracedVarSub(ast.token(Id.Lit_Chars, 'unset'))
        print(ev.EvalVarSub(unset_sub))

        set_sub = ast.BracedVarSub(ast.token(Id.Lit_Chars, 'x'))
        print(ev.EvalVarSub(set_sub))

        part = ast.LiteralPart(ast.token(Id.Lit_Chars, 'default'))
        arg_word = ast.CompoundWord([part])
        test_op = ast.StringUnary(Id.VTest_ColonHyphen, arg_word)
        unset_sub.suffix_op = test_op
        set_sub.suffix_op = test_op

        print(ev.EvalVarSub(unset_sub))
        print(ev.EvalVarSub(set_sub))
Example #15
0
  def _Read(self, lex_mode):
    if self.line_lexer.AtEnd():
      line_id, line = self.line_reader.GetLine()

      if line is None:  # no more lines
        t = ast.token(Id.Eof_Real, '', -1)
        # No line number.  I guess we are showing the last line of the file.
        # TODO: Could keep track of previous position for this case?
        return t

      self.line_lexer.Reset(line, line_id)

    t = self.line_lexer.Read(lex_mode)

    # e.g. translate ) or ` into EOF
    if self.translation_stack:
      old_id, new_id = self.translation_stack[-1]  # top
      if t.id == old_id:
        new_s = IdName(new_id)
        #print('==> TRANSLATING %s ==> %s' % (t, new_s))
        self.translation_stack.pop()
        #print(self.translation_stack)
        t.id = new_id

    return t
Example #16
0
  def _MaybeReadHereDocs(self):
    for h in self.pending_here_docs:
      lines = []
      #log('HERE %r' % h.here_end)
      while True:
        # If op is <<-, strip off all leading tabs (NOT spaces).
        # (in C++, just bump the start?)
        line_id, line = self.line_reader.GetLine()

        #print("LINE %r %r" % (line, h.here_end))
        if not line:  # EOF
          # An unterminated here doc is just a warning in bash.  We make it
          # fatal because we want to be strict, and because it causes problems
          # reporting other errors.
          # Attribute it to the << in <<EOF for now.
          self.AddErrorContext('Unterminated here doc', span_id=h.spids[0])
          return False

        # NOTE: Could do this runtime to preserve LST.
        if h.op_id == Id.Redir_DLessDash:
          line = line.lstrip('\t')
        if line.rstrip() == h.here_end:
          break

        lines.append((line_id, line))

      parts = []
      if h.do_expansion:
        # NOTE: We read all lines at once, instead of doing it line-by-line,
        # because of cases like this:
        # cat <<EOF
        # 1 $(echo 2
        # echo 3) 4
        # EOF

        from osh import parse_lib  # Avoid circular import
        w_parser = parse_lib.MakeWordParserForHereDoc(lines, self.arena)
        word = w_parser.ReadHereDocBody()
        if not word:
          self.AddErrorContext(
              'Error reading here doc body: %s', w_parser.Error())
          return False
        h.body = word
        h.was_filled = True
      else:
        # Each line is a single span.  TODO: Add span_id to token.
        tokens = [
            ast.token(Id.Lit_Chars, line, const.NO_INTEGER)
            for _, line in lines]
        parts = [ast.LiteralPart(t) for t in tokens]
        h.body = ast.CompoundWord(parts)
        h.was_filled = True

    # No .clear() until Python 3.3.
    del self.pending_here_docs[:]

    return True
Example #17
0
  def _MaybeReadHereDocs(self, node):
    here_docs = _GetHereDocsToFill(node)
    #print('')
    #print('--> FILLING', here_docs)
    #print('')
    for h in here_docs:
      lines = []
      #print(h.here_end)
      while True:
        # If op is <<-, strip off all leading tabs (NOT spaces).
        # (in C++, just bump the start?)
        line_id, line = self.line_reader.GetLine()

        #print("LINE %r %r" % (line, h.here_end))
        if not line:  # EOF
          print('WARNING: unterminated here doc', file=sys.stderr)
          break

        if h.op_id == Id.Redir_DLessDash:
          line = line.lstrip('\t')
        if line.rstrip() == h.here_end:
          break

        lines.append((line_id, line))

      parts = []
      if h.do_expansion:
        # NOTE: We read all lines at once, instead of doing it line-by-line,
        # because of cases like this:
        # cat <<EOF
        # 1 $(echo 2
        # echo 3) 4
        # EOF

        # TODO: Move this import
        from osh import parse_lib
        # TODO: Thread arena.  need self.arena
        w_parser = parse_lib.MakeWordParserForHereDoc(lines)
        word = w_parser.ReadHereDocBody()
        if not word:
          self.AddErrorContext('Error reading here doc body: %s', w_parser.Error())
          return False
        h.arg_word = word
        h.was_filled = True
      else:
        # TODO: Add span_id to token
        # Each line is a single span.
        tokens = [ast.token(Id.Lit_Chars, line) for _, line in lines]
        parts = [ast.LiteralPart(t) for t in tokens]
        h.arg_word = ast.CompoundWord(parts)
        h.was_filled = True

    #print('')
    #print('--> FILLED', here_docs)
    #print('')
    return True
Example #18
0
  def LookAhead(self, lex_mode):
    """Look ahead for a non-space token, using the given lexical state."""
    pos = self.line_pos
    #print('Look ahead from pos %d, line %r' % (pos,self.line))
    while True:
      if pos == len(self.line):
        t = ast.token(Id.Eof_Real, '')  # no location
        return t

      re_list = self.lexer_def[lex_mode]
      end_index, tok_type, tok_val = FindLongestMatch(
          re_list, self.line, pos)
      # NOTE: Instead of hard-coding this token, we could pass it in.  This one
      # only appears in OUTER state!  LookAhead(lex_mode, past_token_type)
      if tok_type != Id.WS_Space:
        break
      pos = end_index

    return ast.token(tok_type, tok_val)  # no location
Example #19
0
    def testLookAhead(self):
        # I think this is the usage pattern we care about.  Peek and Next() past
        # the function; then Peek() the next token.  Then Lookahead in that state.
        lexer = _InitLexer('func()')

        t = lexer.Read(LexMode.OUTER)
        self.assertTokensEqual(ast.token(Id.Lit_Chars, 'func'), t)

        #self.assertEqual(Id.Op_LParen, lexer.LookAhead())

        t = lexer.Read(LexMode.OUTER)
        self.assertTokensEqual(ast.token(Id.Op_LParen, '('), t)

        self.assertTokensEqual(ast.token(Id.Op_RParen, ')'),
                               lexer.LookAhead(LexMode.OUTER))

        lexer = _InitLexer('func ()')

        t = lexer.Read(LexMode.OUTER)
        self.assertTokensEqual(ast.token(Id.Lit_Chars, 'func'), t)

        t = lexer.Read(LexMode.OUTER)
        self.assertTokensEqual(ast.token(Id.WS_Space, ' '), t)

        self.assertTokensEqual(ast.token(Id.Op_LParen, '('),
                               lexer.LookAhead(LexMode.OUTER))
Example #20
0
    def testReadOuter(self):
        # Lines always end with '\n'
        l = LineLexer(LEXER_DEF, '')
        try:
            l.Read(LexMode.OUTER)
        except AssertionError as e:
            print(e)
        else:
            raise AssertionError('Expected error')

        l = LineLexer(LEXER_DEF, '\n')
        self.assertTokensEqual(ast.token(Id.Op_Newline, '\n'),
                               l.Read(LexMode.OUTER))
Example #21
0
def _assertReadWordWithArena(test, word_str):
    print('\n---', word_str)
    arena, w_parser = _InitWordParserWithArena(word_str)
    w = w_parser.ReadWord(LexMode.OUTER)
    if w:
        ast.PrettyPrint(w)
    else:
        err = w_parser.Error()
        test.fail("Couldn't parse %r: %s" % (word_str, err))

    # Next word must be \n
    w2 = w_parser.ReadWord(LexMode.OUTER)
    test.assertTrue(
        TokenWordsEqual(ast.TokenWord(ast.token(Id.Op_Newline, '\n')), w2))

    return arena, w
Example #22
0
def _assertReadWordWithArena(test, word_str):
  print('\n---', word_str)
  arena, w_parser = _InitWordParserWithArena(word_str)
  w = w_parser.ReadWord(lex_mode_e.OUTER)
  if w:
    ast.PrettyPrint(w)
  else:
    err = w_parser.Error()
    test.fail("Couldn't parse %r: %s" % (word_str, err))

  # Next word must be Eof_Real
  w2 = w_parser.ReadWord(lex_mode_e.OUTER)
  test.assertTrue(
      test_lib.TokenWordsEqual(ast.TokenWord(ast.token(Id.Eof_Real, '')), w2),
      w2)

  return arena, w
Example #23
0
    def _Read(self, lex_mode):
        t = self.line_lexer.Read(lex_mode)
        if t.id == Id.Eol_Tok:  # hit \0
            line_id, line = self.line_reader.GetLine()

            if line is None:  # no more lines
                span_id = self.line_lexer.GetSpanIdForEof()
                t = ast.token(Id.Eof_Real, '', span_id)
                return t

            self.line_lexer.Reset(line, line_id)
            t = self.line_lexer.Read(lex_mode)

        # e.g. translate ) or ` into EOF
        if self.translation_stack:
            old_id, new_id = self.translation_stack[-1]  # top
            if t.id == old_id:
                #print('==> TRANSLATING %s ==> %s' % (t, new_s))
                self.translation_stack.pop()
                t.id = new_id

        return t
Example #24
0
  def Read(self, lex_mode):
    if self.AtEnd():
      raise AssertionError('EOF')

    re_list = self.lexer_def[lex_mode]

    end_index, tok_type, tok_val = FindLongestMatch(
        re_list, self.line, self.line_pos)

    # NOTE: tok_val is redundant, but even in osh.asdl we have some separation
    # between data needed for formatting and data needed for execution.  Could
    # revisit this later.

    # TODO: Add this back once arena is threaded everywhere
    #assert self.line_id != -1
    line_span = ast.line_span(self.line_id, self.line_pos, len(tok_val))

    # NOTE: We're putting the arena hook in LineLexer and not Lexer because we
    # want it to be "low level".  The only thing fabricated here is a newline
    # added at the last line, so we don't end with \0.

    if self.arena is not None:
      if self.arena_skip:
        assert self.last_span_id != -1
        span_id = self.last_span_id
        self.arena_skip = False
      else:
        span_id = self.arena.AddLineSpan(line_span)
        self.last_span_id = span_id
    else:
      # Completion parser might not have arena?
      # We should probably get rid of this.
      span_id = -1

    t = ast.token(tok_type, tok_val, span_id)

    self.line_pos = end_index
    return t
Example #25
0
    def testMultiLine(self):
        w_parser = InitWordParser("""\
ls foo

# Multiple newlines and comments should be ignored

ls bar
""")

        print('--MULTI')
        w = w_parser.ReadWord(LexMode.OUTER)
        parts = [ast.LiteralPart(ast.token(Id.Lit_Chars, 'ls'))]
        self.assertEqual(ast.CompoundWord(parts), w)

        w = w_parser.ReadWord(LexMode.OUTER)
        parts = [ast.LiteralPart(ast.token(Id.Lit_Chars, 'foo'))]
        self.assertEqual(ast.CompoundWord(parts), w)

        w = w_parser.ReadWord(LexMode.OUTER)
        t = ast.token(Id.Op_Newline, '\n')
        self.assertEqual(ast.TokenWord(t), w)

        w = w_parser.ReadWord(LexMode.OUTER)
        parts = [ast.LiteralPart(ast.token(Id.Lit_Chars, 'ls'))]
        self.assertEqual(ast.CompoundWord(parts), w)

        w = w_parser.ReadWord(LexMode.OUTER)
        parts = [ast.LiteralPart(ast.token(Id.Lit_Chars, 'bar'))]
        self.assertEqual(ast.CompoundWord(parts), w)

        w = w_parser.ReadWord(LexMode.OUTER)
        t = ast.token(Id.Op_Newline, '\n')
        self.assertEqual(ast.TokenWord(t), w)

        w = w_parser.ReadWord(LexMode.OUTER)
        t = ast.token(Id.Eof_Real, '')
        self.assertEqual(ast.TokenWord(t), w)
Example #26
0
 def testReadOuter(self):
     l = LineLexer(parse_lib._MakeMatcher(), '\n', self.arena)
     self.assertTokensEqual(ast.token(Id.Op_Newline, '\n'),
                            l.Read(lex_mode_e.OUTER))
Example #27
0
 def testDBracketState(self):
     lexer = _InitLexer('-z foo')
     t = lexer.Read(LexMode.DBRACKET)
     self.assertTokensEqual(ast.token(Id.BoolUnary_z, '-z'), t)
     self.assertEqual(Kind.BoolUnary, LookupKind(t.id))
Example #28
0
def TildeDetect(word):
    """Detect tilde expansion.

  If it needs to include a TildeSubPart, return a new word.  Otherwise return
  None.

  NOTE: This algorithm would be a simpler if
  1. We could assume some regex for user names.
  2. We didn't need to do brace expansion first, like {~foo,~bar}
  OR
  - If Lit_Slash were special (it is in the VAROP states, but not OUTER
  state).  We could introduce another lexer mode after you hit Lit_Tilde?

  So we have to scan all LiteralPart instances until they contain a '/'.

  http://unix.stackexchange.com/questions/157426/what-is-the-regex-to-validate-linux-users
  "It is usually recommended to only use usernames that begin with a lower
  case letter or an underscore, followed by lower case letters, digits,
  underscores, or dashes. They can end with a dollar sign. In regular
  expression terms: [a-z_][a-z0-9_-]*[$]?

  On Debian, the only constraints are that usernames must neither start with
  a dash ('-') nor contain a colon (':') or a whitespace (space: ' ', end
  of line: '\n', tabulation: '\t', etc.). Note that using a slash ('/') may
  break the default algorithm for the definition of the user's home
  directory.
  """
    if not word.parts:
        return None
    part0 = word.parts[0]
    if _LiteralPartId(part0) != Id.Lit_Tilde:
        return None

    prefix = ''
    found_slash = False
    # search for the next /
    for i in range(1, len(word.parts)):
        # Not a literal part, and we did NOT find a slash.  So there is no
        # TildeSub applied.  This would be something like ~X$var, ~$var,
        # ~$(echo), etc..  The slash is necessary.
        if word.parts[i].tag != word_part_e.LiteralPart:
            return None
        val = word.parts[i].token.val
        p = val.find('/')

        if p == -1:  # no slash yet
            prefix += val

        elif p >= 0:
            # e.g. for ~foo!bar/baz, extract "bar"
            # NOTE: requires downcast to LiteralPart
            pre, post = val[:p], val[p:]
            prefix += pre
            tilde_part = ast.TildeSubPart(prefix)
            # TODO: Need a span_id here.  Or use different algorithm.
            #print('SPLITTING %s p = %d' % (word.parts[i], p), file=sys.stderr)
            remainder_part = ast.LiteralPart(ast.token(Id.Lit_Chars, post))
            found_slash = True
            break

    w = ast.CompoundWord()
    if found_slash:
        w.parts.append(tilde_part)
        w.parts.append(remainder_part)
        j = i + 1
        while j < len(word.parts):
            w.parts.append(word.parts[j])
            j += 1
    else:
        # The whole thing is a tilde sub, e.g. ~foo or ~foo!bar
        w.parts.append(ast.TildeSubPart(prefix))
    return w