Esempio n. 1
0
  def testShellFuncExecution(self):
    ex = cmd_exec_test.InitExecutor()
    func_node = ast.FuncDef()

    c1 = ast.CompoundWord()
    t1 = ast.token(Id.Lit_Chars, 'f1')
    c1.parts.append(ast.LiteralPart(t1))

    c2 = ast.CompoundWord()
    t2 = ast.token(Id.Lit_Chars, 'f2')
    c2.parts.append(ast.LiteralPart(t2))

    a = ast.ArrayLiteralPart()
    a.words = [c1, c2]
    w = ast.CompoundWord()
    w.parts.append(a)

    # Set global COMPREPLY=(f1 f2)
    pair = ast.assign_pair(ast.LhsName('COMPREPLY'), assign_op_e.Equal, w)
    pair.spids.append(0)  # dummy
    pairs = [pair]
    body_node = ast.Assignment(Id.Assign_None, [], pairs)

    func_node.name = 'myfunc'
    func_node.body = body_node

    a = completion.ShellFuncAction(ex, func_node)
    matches = list(a.Matches([], 0, 'f'))
    self.assertEqual(['f1 ', 'f2 '], matches)
Esempio n. 2
0
    def testPipeline2(self):
        Banner('ls | cut -d . -f 1 | head')
        p = process.Pipeline()
        p.Add(_ExtProc(['ls']))
        p.Add(_ExtProc(['cut', '-d', '.', '-f', '1']))
        p.Add(_ExtProc(['head']))

        print(p.Run(_WAITER))

        ex = InitExecutor()

        # Simulating subshell for each command
        w1 = ast.CompoundWord()
        w1.parts.append(ast.LiteralPart(ast.token(Id.Lit_Chars, 'ls')))
        node1 = ast.SimpleCommand()
        node1.words = [w1]

        w2 = ast.CompoundWord()
        w2.parts.append(ast.LiteralPart(ast.token(Id.Lit_Chars, 'head')))
        node2 = ast.SimpleCommand()
        node2.words = [w2]

        w3 = ast.CompoundWord()
        w3.parts.append(ast.LiteralPart(ast.token(Id.Lit_Chars, 'sort')))
        w4 = ast.CompoundWord()
        w4.parts.append(ast.LiteralPart(ast.token(Id.Lit_Chars, '--reverse')))
        node3 = ast.SimpleCommand()
        node3.words = [w3, w4]

        p = process.Pipeline()
        p.Add(Process(process.SubProgramThunk(ex, node1)))
        p.Add(Process(process.SubProgramThunk(ex, node2)))
        p.Add(Process(process.SubProgramThunk(ex, node3)))

        print(p.Run(_WAITER))
Esempio n. 3
0
    def _ReadExtGlobPart(self):
        """
    Grammar:
      Item         = CompoundWord | EPSILON  # important: @(foo|) is allowed
      LEFT         = '@(' | '*(' | '+(' | '?(' | '!('
      RIGHT        = ')'
      ExtGlob      = LEFT (Item '|')* Item RIGHT  # ITEM may be empty
      CompoundWord includes ExtGlobPart
    """
        left_token = self.cur_token
        arms = []
        part = ast.ExtGlobPart(left_token, arms)  # return value
        part.spids.append(left_token.span_id)

        self.lexer.PushHint(Id.Op_RParen, Id.Right_ExtGlob)
        self._Next(lex_mode_e.EXTGLOB)  # advance past LEFT

        read_word = False  # did we just a read a word?  To handle @(||).

        while True:
            self._Peek()
            #log('t %r', self.cur_token)

            if self.token_type == Id.Right_ExtGlob:
                if not read_word:
                    arms.append(ast.CompoundWord())
                part.spids.append(self.cur_token.span_id)
                break

            elif self.token_type == Id.Op_Pipe:
                if not read_word:
                    arms.append(ast.CompoundWord())
                read_word = False
                self._Next(lex_mode_e.EXTGLOB)

            # lex mode EXTGLOB should only produce these 4 kinds of tokens
            elif self.token_kind in (Kind.Lit, Kind.Left, Kind.VSub,
                                     Kind.ExtGlob):
                w = self._ReadCompoundWord(lex_mode=lex_mode_e.EXTGLOB)
                arms.append(w)
                read_word = True

            elif self.token_kind == Kind.Eof:
                self.AddErrorContext(
                    'Unexpected EOF reading extended glob that began here',
                    token=left_token)
                return None

            else:
                raise AssertionError('Unexpected token %r' % self.cur_token)

        return part
Esempio n. 4
0
  def testVarOps(self):
    ev = InitEvaluator()  # initializes x=xxx and y=yyy
    unset_sub = ast.BracedVarSub(ast.token(Id.VSub_Name, 'unset'))
    part_vals = []
    ev._EvalWordPart(unset_sub, part_vals)
    print(part_vals)

    set_sub = ast.BracedVarSub(ast.token(Id.VSub_Name, 'x'))
    part_vals = []
    ev._EvalWordPart(set_sub, part_vals)
    print(part_vals)

    # Now add some ops
    part = ast.LiteralPart(ast.token(Id.Lit_Chars, 'default'))
    arg_word = ast.CompoundWord([part])
    test_op = ast.StringUnary(Id.VTest_ColonHyphen, arg_word)
    unset_sub.suffix_op = test_op
    set_sub.suffix_op = test_op

    part_vals = []
    ev._EvalWordPart(unset_sub, part_vals)
    print(part_vals)

    part_vals = []
    ev._EvalWordPart(set_sub, part_vals)
    print(part_vals)
Esempio n. 5
0
    def EvalPrompt(self, val):
        """Perform the two evaluations that bash does.  Used by $PS1 and ${x@P}."""
        if val.tag != value_e.Str:
            return DEFAULT_PS1  # no evaluation necessary

        try:
            tokens = self.tokens_cache[val.s]
        except KeyError:
            tokens = match.PS1_LEXER.Tokens(val.s)
            self.tokens_cache[val.s] = tokens

        # First replacements.  TODO: Should we cache this too?
        ps1_str = self._ReplaceBackslashCodes(tokens)

        # The prompt is often constant, so we can avoid parsing it.
        # NOTE: This is copied from the PS4 logic in Tracer.
        try:
            ps1_word = self.parse_cache[ps1_str]
        except KeyError:
            w_parser = self.parse_ctx.MakeWordParserForPlugin(
                ps1_str, self.arena)
            try:
                ps1_word = w_parser.ReadPS()
            except Exception as e:
                error_str = '<ERROR: cannot parse PS1>'
                t = ast.token(Id.Lit_Chars, error_str, const.NO_INTEGER)
                ps1_word = ast.CompoundWord([ast.LiteralPart(t)])
            self.parse_cache[ps1_str] = ps1_word

        # e.g. "${debian_chroot}\u" -> '\u'
        val2 = self.ex.word_ev.EvalWordToString(ps1_word)
        return val2.s
Esempio n. 6
0
    def EvalPrompt(self, val):
        """Perform the two evaluations that bash does.  Used by $PS1 and ${x@P}."""
        if val.tag != value_e.Str:
            return DEFAULT_PS1  # no evaluation necessary

        # Parse backslash escapes (cached)
        try:
            tokens = self.tokens_cache[val.s]
        except KeyError:
            tokens = list(match.PS1_LEXER.Tokens(val.s))
            self.tokens_cache[val.s] = tokens

        # Replace values.
        ps1_str = self._ReplaceBackslashCodes(tokens)

        # Parse it like a double-quoted word (cached).
        # NOTE: This is copied from the PS4 logic in Tracer.
        try:
            ps1_word = self.parse_cache[ps1_str]
        except KeyError:
            w_parser = self.parse_ctx.MakeWordParserForPlugin(
                ps1_str, self.arena)
            try:
                ps1_word = w_parser.ReadPS()
            except Exception as e:
                error_str = '<ERROR: cannot parse PS1>'
                t = ast.token(Id.Lit_Chars, error_str, const.NO_INTEGER)
                ps1_word = ast.CompoundWord([ast.LiteralPart(t)])
            self.parse_cache[ps1_str] = ps1_word

        # Evaluate, e.g. "${debian_chroot}\u" -> '\u'
        val2 = self.ex.word_ev.EvalWordToString(ps1_word)
        return val2.s
Esempio n. 7
0
    def ReadPS(self):
        """For $PS1, $PS4, etc.

    This is just like reading a here doc line.  "\n" is allowed, as well as the
    typical substitutions ${x} $(echo hi) $((1 + 2)).
    """
        w = ast.CompoundWord()
        self._ReadLikeDQ(None, w.parts)
        return w
Esempio n. 8
0
  def _MaybeReadHereDocs(self):
    for h in self.pending_here_docs:
      lines = []
      #log('HERE %r' % h.here_end)
      while True:
        # If op is <<-, strip off all leading tabs (NOT spaces).
        # (in C++, just bump the start?)
        line_id, line = self.line_reader.GetLine()

        #print("LINE %r %r" % (line, h.here_end))
        if not line:  # EOF
          # An unterminated here doc is just a warning in bash.  We make it
          # fatal because we want to be strict, and because it causes problems
          # reporting other errors.
          # Attribute it to the << in <<EOF for now.
          self.AddErrorContext('Unterminated here doc', span_id=h.spids[0])
          return False

        # NOTE: Could do this runtime to preserve LST.
        if h.op_id == Id.Redir_DLessDash:
          line = line.lstrip('\t')
        if line.rstrip() == h.here_end:
          break

        lines.append((line_id, line))

      parts = []
      if h.do_expansion:
        # NOTE: We read all lines at once, instead of doing it line-by-line,
        # because of cases like this:
        # cat <<EOF
        # 1 $(echo 2
        # echo 3) 4
        # EOF

        from osh import parse_lib  # Avoid circular import
        w_parser = parse_lib.MakeWordParserForHereDoc(lines, self.arena)
        word = w_parser.ReadHereDocBody()
        if not word:
          self.AddErrorContext(
              'Error reading here doc body: %s', w_parser.Error())
          return False
        h.body = word
        h.was_filled = True
      else:
        # Each line is a single span.  TODO: Add span_id to token.
        tokens = [
            ast.token(Id.Lit_Chars, line, const.NO_INTEGER)
            for _, line in lines]
        parts = [ast.LiteralPart(t) for t in tokens]
        h.body = ast.CompoundWord(parts)
        h.was_filled = True

    # No .clear() until Python 3.3.
    del self.pending_here_docs[:]

    return True
Esempio n. 9
0
def BraceExpandWords(words):
    out = []
    for w in words:
        if w.tag == word_e.BracedWordTree:
            parts_list = _BraceExpand(w.parts)
            out.extend(ast.CompoundWord(p) for p in parts_list)
        else:
            out.append(w)
    return out
Esempio n. 10
0
  def _EvalRedirect(self, n):
    fd = REDIR_DEFAULT_FD[n.op.id] if n.fd == const.NO_INTEGER else n.fd
    if n.tag == redir_e.Redir:
      redir_type = REDIR_ARG_TYPES[n.op.id]  # could be static in the LST?

      if redir_type == redir_arg_type_e.Path:
        # NOTE: no globbing.  You can write to a file called '*.py'.
        val = self.word_ev.EvalWordToString(n.arg_word)
        if val.tag != value_e.Str:  # TODO: This error never fires
          util.error("Redirect filename must be a string, got %s", val)
          return None
        filename = val.s
        if not filename:
          # Whether this is fatal depends on errexit.
          util.error("Redirect filename can't be empty")
          return None

        return runtime.PathRedirect(n.op.id, fd, filename)

      elif redir_type == redir_arg_type_e.Desc:  # e.g. 1>&2
        val = self.word_ev.EvalWordToString(n.arg_word)
        if val.tag != value_e.Str:  # TODO: This error never fires
          util.error("Redirect descriptor should be a string, got %s", val)
          return None
        t = val.s
        if not t:
          util.error("Redirect descriptor can't be empty")
          return None
        try:
          target_fd = int(t)
        except ValueError:
          util.error(
              "Redirect descriptor should look like an integer, got %s", val)
          return None

        return runtime.DescRedirect(n.op.id, fd, target_fd)

      elif redir_type == redir_arg_type_e.Here:  # here word
        val = self.word_ev.EvalWordToString(n.arg_word)
        assert val.tag == value_e.Str, val
        # NOTE: bash and mksh both add \n
        return runtime.HereRedirect(fd, val.s + '\n')
      else:
        raise AssertionError('Unknown redirect op')

    elif n.tag == redir_e.HereDoc:
      # HACK: Wrap it in a word to evaluate.
      w = ast.CompoundWord(n.stdin_parts)
      val = self.word_ev.EvalWordToString(w)
      assert val.tag == value_e.Str, val
      return runtime.HereRedirect(fd, val.s)

    else:
      raise AssertionError('Unknown redirect type')
Esempio n. 11
0
    def testBraceExpand(self):
        w = _assertReadWord(self, 'hi')
        results = braces._BraceExpand(w.parts)
        self.assertEqual(1, len(results))
        for parts in results:
            _PrettyPrint(ast.CompoundWord(parts))
            print('')

        w = _assertReadWord(self, 'B-{a,b}-E')
        tree = braces._BraceDetect(w)
        self.assertEqual(3, len(tree.parts))
        pprint(tree)

        results = braces._BraceExpand(tree.parts)
        self.assertEqual(2, len(results))
        for parts in results:
            _PrettyPrint(ast.CompoundWord(parts))
            print('')

        w = _assertReadWord(self, 'B-{a,={b,c,d}=,e}-E')
        tree = braces._BraceDetect(w)
        self.assertEqual(3, len(tree.parts))
        pprint(tree)

        results = braces._BraceExpand(tree.parts)
        self.assertEqual(5, len(results))
        for parts in results:
            _PrettyPrint(ast.CompoundWord(parts))
            print('')

        w = _assertReadWord(self, 'B-{a,b}-{c,d}-E')
        tree = braces._BraceDetect(w)
        self.assertEqual(5, len(tree.parts))
        pprint(tree)

        results = braces._BraceExpand(tree.parts)
        self.assertEqual(4, len(results))
        for parts in results:
            _PrettyPrint(ast.CompoundWord(parts))
            print('')
Esempio n. 12
0
    def _ReadArithWord(self):
        """Helper function for ReadArithWord."""
        #assert self.token_type != Id.Undefined_Tok
        self._Peek()
        #print('_ReadArithWord', self.cur_token)

        if self.token_kind == Kind.Unknown:
            self.AddErrorContext("Unknown token in arith context: %s",
                                 self.cur_token,
                                 token=self.cur_token)
            return None, False

        elif self.token_kind == Kind.Eof:
            # Just return EOF token
            w = ast.TokenWord(self.cur_token)
            return w, False
            #self.AddErrorContext("Unexpected EOF in arith context: %s",
            #    self.cur_token, token=self.cur_token)
            #return None, False

        elif self.token_kind == Kind.Ignored:
            # Space should be ignored.  TODO: change this to SPACE_SPACE and
            # SPACE_NEWLINE?  or SPACE_TOK.
            self._Next(lex_mode_e.ARITH)
            return None, True  # Tell wrapper to try again

        elif self.token_kind in (Kind.Arith, Kind.Right):
            # Id.Right_ArithSub IS just a normal token, handled by ArithParser
            self._Next(lex_mode_e.ARITH)
            w = ast.TokenWord(self.cur_token)
            return w, False

        elif self.token_kind in (Kind.Lit, Kind.Left):
            w = self._ReadCompoundWord(lex_mode=lex_mode_e.ARITH)
            if not w:
                return None, True
            return w, False

        elif self.token_kind == Kind.VSub:
            part = ast.SimpleVarSub(self.cur_token)
            self._Next(lex_mode_e.ARITH)
            w = ast.CompoundWord([part])
            return w, False

        else:
            self._BadToken("Unexpected token parsing arith sub: %s",
                           self.cur_token)
            return None, False

        raise AssertionError("Shouldn't get here")
Esempio n. 13
0
def TildeDetect(word):
    """Detect tilde expansion in a word.

  It might begin with  LiteralPart that needs to be turned into a TildeSubPart.
  (It depends on whether the second token begins with slash).

  If so, it return a new word.  Otherwise return None.

  NOTE:
  - The regex for Lit_TildeLike could be expanded.  Right now it's
    conservative, like Lit_Chars without the /.
  - It's possible to write this in a mutating style, since only the first token
    is changed.  But note that we CANNOT know this during lexing.
  """
    # NOTE: BracedWordTree, EmptyWord, etc. can't be tilde expanded
    if word.tag != word_e.CompoundWord:
        return None

    assert word.parts, word

    part0 = word.parts[0]
    if _LiteralPartId(part0) != Id.Lit_TildeLike:
        return None

    if len(word.parts) == 1:  # can't be zero
        tilde_part = ast.TildeSubPart(part0.token)
        return ast.CompoundWord([tilde_part])

    part1 = word.parts[1]
    # NOTE: We could inspect the raw tokens.
    if _LiteralPartId(part1) == Id.Lit_Chars and part1.token.val.startswith(
            '/'):
        tilde_part = ast.TildeSubPart(part0.token)
        return ast.CompoundWord([tilde_part] + word.parts[1:])

    # It could be something like '~foo:bar', which doesn't have a slash.
    return None
Esempio n. 14
0
    def testMultiLine(self):
        w_parser = InitWordParser("""\
ls foo

# Multiple newlines and comments should be ignored

ls bar
""")
        print('--MULTI')
        w = w_parser.ReadWord(lex_mode_e.OUTER)
        parts = [ast.LiteralPart(ast.token(Id.Lit_Chars, 'ls'))]
        test_lib.AssertAsdlEqual(self, ast.CompoundWord(parts), w)

        w = w_parser.ReadWord(lex_mode_e.OUTER)
        parts = [ast.LiteralPart(ast.token(Id.Lit_Chars, 'foo'))]
        test_lib.AssertAsdlEqual(self, ast.CompoundWord(parts), w)

        w = w_parser.ReadWord(lex_mode_e.OUTER)
        t = ast.token(Id.Op_Newline, '\n')
        test_lib.AssertAsdlEqual(self, ast.TokenWord(t), w)

        w = w_parser.ReadWord(lex_mode_e.OUTER)
        parts = [ast.LiteralPart(ast.token(Id.Lit_Chars, 'ls'))]
        test_lib.AssertAsdlEqual(self, ast.CompoundWord(parts), w)

        w = w_parser.ReadWord(lex_mode_e.OUTER)
        parts = [ast.LiteralPart(ast.token(Id.Lit_Chars, 'bar'))]
        test_lib.AssertAsdlEqual(self, ast.CompoundWord(parts), w)

        w = w_parser.ReadWord(lex_mode_e.OUTER)
        t = ast.token(Id.Op_Newline, '\n')
        test_lib.AssertAsdlEqual(self, ast.TokenWord(t), w)

        w = w_parser.ReadWord(lex_mode_e.OUTER)
        t = ast.token(Id.Eof_Real, '')
        test_lib.AssertAsdlEqual(self, ast.TokenWord(t), w)
Esempio n. 15
0
    def ReadHereDocBody(self):
        """
    Sort of like Read(), except we're in a double quoted context, but not using
    double quotes.

    Returns:
      CompoundWord.  NOTE: We could also just use a DoubleQuotedPart for both
      cases?
    """
        w = ast.CompoundWord()
        dq = self._ReadDoubleQuotedPart(here_doc=True)
        if not dq:
            self.AddErrorContext('Error parsing here doc body')
            return False
        w.parts.append(dq)
        return w
Esempio n. 16
0
def LooksLikeAssignment(w):
    """Tests whether a word looks like FOO=bar.

  Returns:
    (string, CompoundWord) if it looks like FOO=bar
    False                  if it doesn't

  s=1
  s+=1
  s[x]=1
  s[x]+=1

  a=()
  a+=()
  a[x]=()
  a[x]+=()  # Not valid because arrays can't be nested.

  NOTE: a[ and s[ might be parsed separately?
  """
    assert w.tag == word_e.CompoundWord
    if len(w.parts) == 0:
        return False

    part0 = w.parts[0]
    if _LiteralPartId(part0) != Id.Lit_VarLike:
        return False

    s = part0.token.val
    assert s.endswith('=')
    if s[-2] == '+':
        op = assign_op_e.PlusEqual
        name = s[:-2]
    else:
        op = assign_op_e.Equal
        name = s[:-1]

    rhs = ast.CompoundWord()
    if len(w.parts) == 1:
        # This fake SingleQuotedPart is necesssary so that EmptyUnquoted elision
        # isn't applied.  EMPTY= is like EMPTY=''.
        # TODO: This part doesn't have spids, so it might break some invariants.
        rhs.parts.append(ast.EmptyPart())
    else:
        for p in w.parts[1:]:
            rhs.parts.append(p)

    return name, op, rhs
Esempio n. 17
0
  def _EvalPS4(self):
    """For set -x."""

    val = self.mem.GetVar('PS4')
    assert val.tag == value_e.Str

    s = val.s
    if s:
      first_char, ps4 = s[0], s[1:]
    else:
      first_char, ps4 = '+', ' '  # default

    try:
      ps4_word = self.parse_cache[ps4]
    except KeyError:
      # We have to parse this at runtime.  PS4 should usually remain constant.
      w_parser = parse_lib.MakeWordParserForPlugin(ps4, self.arena)

      # NOTE: Reading PS4 is just like reading a here doc line.  "\n" is
      # allowed too.  The OUTER mode would stop at spaces, and ReadWord
      # doesn't allow lex_mode_e.DQ.
      ps4_word = w_parser.ReadHereDocBody()

      if not ps4_word:
        error_str = '<ERROR: cannot parse PS4>'
        t = ast.token(Id.Lit_Chars, error_str, const.NO_INTEGER)
        ps4_word = ast.CompoundWord([ast.LiteralPart(t)])
      self.parse_cache[ps4] = ps4_word

    #print(ps4_word)

    # TODO: Repeat first character according process stack depth.  Where is
    # that stored?  In the executor itself?  It should be stored along with
    # the PID.  Need some kind of ShellProcessState or something.
    #
    # We should come up with a better mechanism.  Something like $PROC_INDENT
    # and $OIL_XTRACE_PREFIX.

    # TODO: Handle runtime errors!  For example, you could PS4='$(( 1 / 0 ))'
    # <ERROR: cannot evaluate PS4>
    prefix = self.word_ev.EvalWordToString(ps4_word)

    return first_char, prefix.s
Esempio n. 18
0
  def _EvalPS4(self):
    """For set -x."""

    val = self.mem.GetVar('PS4')
    assert val.tag == value_e.Str

    s = val.s
    if s:
      first_char, ps4 = s[0], s[1:]
    else:
      first_char, ps4 = '+', ' '  # default

    # NOTE: This cache is slightly broken because aliases are mutable!  I think
    # thati s more or less harmless though.
    try:
      ps4_word = self.parse_cache[ps4]
    except KeyError:
      # We have to parse this at runtime.  PS4 should usually remain constant.
      w_parser = self.parse_ctx.MakeWordParserForPlugin(ps4, self.arena)

      try:
        ps4_word = w_parser.ReadPS()
      except util.ParseError as e:
        error_str = '<ERROR: cannot parse PS4>'
        t = ast.token(Id.Lit_Chars, error_str, const.NO_INTEGER)
        ps4_word = ast.CompoundWord([ast.LiteralPart(t)])
      self.parse_cache[ps4] = ps4_word

    #print(ps4_word)

    # TODO: Repeat first character according process stack depth.  Where is
    # that stored?  In the executor itself?  It should be stored along with
    # the PID.  Need some kind of ShellProcessState or something.
    #
    # We should come up with a better mechanism.  Something like $PROC_INDENT
    # and $OIL_XTRACE_PREFIX.

    # TODO: Handle runtime errors!  For example, you could PS4='$(( 1 / 0 ))'
    # <ERROR: cannot evaluate PS4>
    prefix = self.word_ev.EvalWordToString(ps4_word)

    return first_char, prefix.s
Esempio n. 19
0
    def _ReadArithWord(self):
        """Helper function for ReadArithWord."""
        self._Peek()

        if self.token_kind == Kind.Unknown:
            p_die('Unexpected token in arithmetic context',
                  token=self.cur_token)

        elif self.token_kind == Kind.Eof:
            # Just return EOF token
            w = ast.TokenWord(self.cur_token)
            return w, False

        elif self.token_kind == Kind.Ignored:
            # Space should be ignored.  TODO: change this to SPACE_SPACE and
            # SPACE_NEWLINE?  or SPACE_TOK.
            self._Next(lex_mode_e.ARITH)
            return None, True  # Tell wrapper to try again

        elif self.token_kind in (Kind.Arith, Kind.Right):
            # Id.Right_ArithSub IS just a normal token, handled by ArithParser
            self._Next(lex_mode_e.ARITH)
            w = ast.TokenWord(self.cur_token)
            return w, False

        elif self.token_kind in (Kind.Lit, Kind.Left):
            w = self._ReadCompoundWord(lex_mode=lex_mode_e.ARITH)
            return w, False

        elif self.token_kind == Kind.VSub:
            part = ast.SimpleVarSub(self.cur_token)
            self._Next(lex_mode_e.ARITH)
            w = ast.CompoundWord([part])
            return w, False

        else:
            assert False, ("Unexpected token parsing arith sub: %s" %
                           self.cur_token)

        raise AssertionError("Shouldn't get here")
Esempio n. 20
0
    def _ReadCompoundWord(self,
                          eof_type=Id.Undefined_Tok,
                          lex_mode=lex_mode_e.OUTER,
                          empty_ok=True):
        """
    Precondition: Looking at the first token of the first word part
    Postcondition: Looking at the token after, e.g. space or operator

    NOTE: eof_type is necessary because / is a literal, i.e. Lit_Slash, but it
    could be an operator delimiting a compound word.  Can we change lexer modes
    and remove this special case?
    """
        #print('_ReadCompoundWord', lex_mode)
        word = ast.CompoundWord()

        num_parts = 0
        done = False
        while not done:
            allow_done = empty_ok or num_parts != 0
            self._Peek()
            #print('CW',self.cur_token)
            if allow_done and self.token_type == eof_type:
                done = True  # e.g. for ${foo//pat/replace}

            # Keywords like "for" are treated like literals
            elif self.token_kind in (Kind.Lit, Kind.KW, Kind.Assign,
                                     Kind.ControlFlow, Kind.BoolUnary,
                                     Kind.BoolBinary):
                if self.token_type == Id.Lit_EscapedChar:
                    part = ast.EscapedLiteralPart(self.cur_token)
                else:
                    part = ast.LiteralPart(self.cur_token)
                    #part.xspans.append(self.cur_token.span_id)

                word.parts.append(part)

                if self.token_type == Id.Lit_VarLike:
                    #print('@', self.cursor)
                    #print('@', self.cur_token)

                    t = self.lexer.LookAhead(lex_mode_e.OUTER)
                    if t.id == Id.Op_LParen:
                        self.lexer.PushHint(Id.Op_RParen,
                                            Id.Right_ArrayLiteral)
                        part2 = self._ReadArrayLiteralPart()
                        if not part2:
                            self.AddErrorContext(
                                '_ReadArrayLiteralPart failed')
                            return False
                        word.parts.append(part2)

            elif self.token_kind == Kind.VSub:
                part = ast.SimpleVarSub(self.cur_token)
                word.parts.append(part)

            elif self.token_kind == Kind.ExtGlob:
                part = self._ReadExtGlobPart()
                if not part:
                    return None
                word.parts.append(part)

            elif self.token_kind == Kind.Left:
                #print('_ReadLeftParts')
                part = self._ReadLeftParts()
                if not part:
                    return None
                word.parts.append(part)

            # NOT done yet, will advance below
            elif self.token_kind == Kind.Right:
                # Still part of the word; will be done on the next iter.
                if self.token_type == Id.Right_DoubleQuote:
                    pass
                elif self.token_type == Id.Right_CommandSub:
                    pass
                elif self.token_type == Id.Right_Subshell:
                    # LEXER HACK for (case x in x) ;; esac )
                    assert self.next_lex_mode is None  # Rewind before it's used
                    if self.lexer.MaybeUnreadOne():
                        self.lexer.PushHint(Id.Op_RParen, Id.Right_Subshell)
                        self._Next(lex_mode)
                    done = True
                else:
                    done = True

            elif self.token_kind == Kind.Ignored:
                done = True

            else:
                # LEXER HACK for unbalanced case clause.  'case foo in esac' is valid,
                # so to test for ESAC, we can read ) before getting a chance to
                # PushHint(Id.Op_RParen, Id.Right_CasePat).  So here we unread one
                # token and do it again.

                # We get Id.Op_RParen at top level:      case x in x) ;; esac
                # We get Id.Eof_RParen inside ComSub:  $(case x in x) ;; esac )
                if self.token_type in (Id.Op_RParen, Id.Eof_RParen):
                    assert self.next_lex_mode is None  # Rewind before it's used
                    if self.lexer.MaybeUnreadOne():
                        if self.token_type == Id.Eof_RParen:
                            # Redo translation
                            self.lexer.PushHint(Id.Op_RParen, Id.Eof_RParen)
                        self._Next(lex_mode)

                done = True  # anything we don't recognize means we're done

            if not done:
                self._Next(lex_mode)
            num_parts += 1
        return word
Esempio n. 21
0
def _BraceDetect(w):
    """
  Args:
    CompoundWord

  Returns:
    CompoundWord or None?

  Another option:

  Grammar:

    # an alternative is a literal, possibly empty, or another brace_expr

    part = <any part except LiteralPart>

    alt = part* | brace_expr

    # a brace_expr is group of at least 2 braced and comma-separated
    # alternatives, with optional prefix and suffix.
    brace_expr = part* '{' alt ',' alt (',' alt)* '}' part*

  Problem this grammar: it's not LL(1)
  Is it indirect left-recursive?
  What's the best way to handle it?  LR(1) parser?

  Iterative algorithm:

  Parse it with a stack?
    It's a stack that asserts there is at least one , in between {}

  Yeah just go through and when you see {, push another list.
  When you get ,  append to list
  When you get } and at least one ',', appendt o list
  When you get } without, then pop

  If there is no matching }, then abort with error

  if not balanced, return error too?
  """
    # Errors:
    # }a{    - stack depth dips below 0
    # {a,b}{ - Stack depth doesn't end at 0
    # {a}    - no comma, and also not an numeric range

    cur_parts = []
    stack = []

    found = False

    for i, part in enumerate(w.parts):
        append = True
        if part.tag == word_part_e.LiteralPart:
            id_ = part.token.id
            if id_ == Id.Lit_LBrace:
                # Save prefix parts.  Start new parts list.
                new_frame = _StackFrame(cur_parts)
                stack.append(new_frame)
                cur_parts = []
                append = False
                found = True  # assume found, but can early exit with None later

            elif id_ == Id.Lit_Comma:
                # Append a new alternative.
                #print('*** Appending after COMMA', cur_parts)

                # NOTE: Should we allow this:
                # ,{a,b}
                # or force this:
                # \,{a,b}
                # ?  We're forcing braces right now but not commas.
                if stack:
                    stack[-1].saw_comma = True

                    stack[-1].alt_part.words.append(
                        ast.CompoundWord(cur_parts))
                    cur_parts = []  # clear
                    append = False

            elif id_ == Id.Lit_RBrace:
                # TODO:
                # - Detect lack of , -- abort the whole thing
                # - Detect {1..10} and {1..10..2}
                #   - bash and zsh only -- this is NOT implemented by mksh
                #   - Use a regex on the middle part:
                #     - digit+ '..' digit+  ( '..' digit+ )?
                # - Char ranges are bash only!
                #
                # ast.BracedIntRangePart()
                # ast.CharRangePart()

                if not stack:  # e.g. echo }  -- unbalancd {
                    return None
                if not stack[-1].saw_comma:  # {foo} is not a real alternative
                    return None
                stack[-1].alt_part.words.append(ast.CompoundWord(cur_parts))

                frame = stack.pop()
                cur_parts = frame.cur_parts
                cur_parts.append(frame.alt_part)
                append = False

        if append:
            cur_parts.append(part)

    if len(stack) != 0:
        return None

    if found:
        return ast.BracedWordTree(cur_parts)
    else:
        return None
Esempio n. 22
0
def TildeDetect(word):
    """Detect tilde expansion.

  If it needs to include a TildeSubPart, return a new word.  Otherwise return
  None.

  NOTE: This algorithm would be a simpler if
  1. We could assume some regex for user names.
  2. We didn't need to do brace expansion first, like {~foo,~bar}
  OR
  - If Lit_Slash were special (it is in the VAROP states, but not OUTER
  state).  We could introduce another lexer mode after you hit Lit_Tilde?

  So we have to scan all LiteralPart instances until they contain a '/'.

  http://unix.stackexchange.com/questions/157426/what-is-the-regex-to-validate-linux-users
  "It is usually recommended to only use usernames that begin with a lower
  case letter or an underscore, followed by lower case letters, digits,
  underscores, or dashes. They can end with a dollar sign. In regular
  expression terms: [a-z_][a-z0-9_-]*[$]?

  On Debian, the only constraints are that usernames must neither start with
  a dash ('-') nor contain a colon (':') or a whitespace (space: ' ', end
  of line: '\n', tabulation: '\t', etc.). Note that using a slash ('/') may
  break the default algorithm for the definition of the user's home
  directory.
  """
    if not word.parts:
        return None
    part0 = word.parts[0]
    if _LiteralPartId(part0) != Id.Lit_Tilde:
        return None

    prefix = ''
    found_slash = False
    # search for the next /
    for i in range(1, len(word.parts)):
        # Not a literal part, and we did NOT find a slash.  So there is no
        # TildeSub applied.  This would be something like ~X$var, ~$var,
        # ~$(echo), etc..  The slash is necessary.
        if word.parts[i].tag != word_part_e.LiteralPart:
            return None
        val = word.parts[i].token.val
        p = val.find('/')

        if p == -1:  # no slash yet
            prefix += val

        elif p >= 0:
            # e.g. for ~foo!bar/baz, extract "bar"
            # NOTE: requires downcast to LiteralPart
            pre, post = val[:p], val[p:]
            prefix += pre
            tilde_part = ast.TildeSubPart(prefix)
            # NOTE: no span_id here.  It would be nicer to use a different algorithm
            # that didn't require this.
            t = ast.token(Id.Lit_Chars, post, const.NO_INTEGER)
            remainder_part = ast.LiteralPart(t)
            found_slash = True
            break

    w = ast.CompoundWord()
    if found_slash:
        w.parts.append(tilde_part)
        w.parts.append(remainder_part)
        j = i + 1
        while j < len(word.parts):
            w.parts.append(word.parts[j])
            j += 1
    else:
        # The whole thing is a tilde sub, e.g. ~foo or ~foo!bar
        w.parts.append(ast.TildeSubPart(prefix))
    return w