コード例 #1
0
  def _ReadExtGlob(self):
    # type: () -> word_part__ExtGlob
    """
    Grammar:
      Item         = word.Compound | EPSILON  # important: @(foo|) is allowed
      LEFT         = '@(' | '*(' | '+(' | '?(' | '!('
      RIGHT        = ')'
      ExtGlob      = LEFT (Item '|')* Item RIGHT  # ITEM may be empty
      Compound includes ExtGlob
    """
    left_token = self.cur_token
    arms = []  # type: List[word_t]
    spids = []
    spids.append(left_token.span_id)

    self.lexer.PushHint(Id.Op_RParen, Id.Right_ExtGlob)
    self._Next(lex_mode_e.ExtGlob)  # advance past LEFT

    read_word = False  # did we just a read a word?  To handle @(||).

    while True:
      self._Peek()

      if self.token_type == Id.Right_ExtGlob:
        if not read_word:
          arms.append(word.Compound())
        spids.append(self.cur_token.span_id)
        break

      elif self.token_type == Id.Op_Pipe:
        if not read_word:
          arms.append(word.Compound())
        read_word = False
        self._Next(lex_mode_e.ExtGlob)

      # lex mode EXTGLOB should only produce these 4 kinds of tokens
      elif self.token_kind in (Kind.Lit, Kind.Left, Kind.VSub, Kind.ExtGlob):
        w = self._ReadCompoundWord(lex_mode=lex_mode_e.ExtGlob)
        arms.append(w)
        read_word = True

      elif self.token_kind == Kind.Eof:
        p_die('Unexpected EOF reading extended glob that began here',
              token=left_token)

      else:
        raise AssertionError('Unexpected token %r' % self.cur_token)

    part = word_part.ExtGlob(left_token, arms)
    part.spids.extend(spids)
    return part
コード例 #2
0
ファイル: cmd_exec_test.py プロジェクト: waldyrious/oil
  def testVarOps(self):
    ev = InitEvaluator()  # initializes x=xxx and y=yyy
    unset_sub = word_part.BracedVarSub(token(Id.VSub_Name, 'unset'))
    part_vals = []
    ev._EvalWordPart(unset_sub, part_vals)
    print(part_vals)

    set_sub = word_part.BracedVarSub(token(Id.VSub_Name, 'x'))
    part_vals = []
    ev._EvalWordPart(set_sub, part_vals)
    print(part_vals)

    # Now add some ops
    part = word_part.Literal(token(Id.Lit_Chars, 'default'))
    arg_word = word.Compound([part])
    test_op = suffix_op.Unary(Id.VTest_ColonHyphen, arg_word)
    unset_sub.suffix_op = test_op
    set_sub.suffix_op = test_op

    part_vals = []
    ev._EvalWordPart(unset_sub, part_vals)
    print(part_vals)

    part_vals = []
    ev._EvalWordPart(set_sub, part_vals)
    print(part_vals)
コード例 #3
0
ファイル: braces.py プロジェクト: waldyrious/oil
def BraceExpandWords(words):
    # type: (List[word__Compound]) -> List[word__Compound]
    out = []  # type: List[word__Compound]
    for w in words:
        if isinstance(w, word__BracedTree):
            parts_list = _BraceExpand(w.parts)
            out.extend(word.Compound(p) for p in parts_list)
        else:
            out.append(w)
    return out
コード例 #4
0
  def ReadForPlugin(self):
    # type: () -> word__Compound
    """For $PS1, $PS4, etc.

    This is just like reading a here doc line.  "\n" is allowed, as well as the
    typical substitutions ${x} $(echo hi) $((1 + 2)).
    """
    w = word.Compound()
    self._ReadLikeDQ(None, w.parts)
    return w
コード例 #5
0
ファイル: braces_test.py プロジェクト: waldyrious/oil
    def testBraceExpand(self):
        w = _assertReadWord(self, 'hi')
        results = braces._BraceExpand(w.parts)
        self.assertEqual(1, len(results))
        for parts in results:
            _PrettyPrint(word.Compound(parts))
            print('')

        w = _assertReadWord(self, 'B-{a,b}-E')
        tree = braces._BraceDetect(w)
        self.assertEqual(3, len(tree.parts))
        _PrettyPrint(tree)

        results = braces._BraceExpand(tree.parts)
        self.assertEqual(2, len(results))
        for parts in results:
            _PrettyPrint(word.Compound(parts))
            print('')

        w = _assertReadWord(self, 'B-{a,={b,c,d}=,e}-E')
        tree = braces._BraceDetect(w)
        self.assertEqual(3, len(tree.parts))
        _PrettyPrint(tree)

        results = braces._BraceExpand(tree.parts)
        self.assertEqual(5, len(results))
        for parts in results:
            _PrettyPrint(word.Compound(parts))
            print('')

        w = _assertReadWord(self, 'B-{a,b}-{c,d}-E')
        tree = braces._BraceDetect(w)
        self.assertEqual(5, len(tree.parts))
        _PrettyPrint(tree)

        results = braces._BraceExpand(tree.parts)
        self.assertEqual(4, len(results))
        for parts in results:
            _PrettyPrint(word.Compound(parts))
            print('')
コード例 #6
0
ファイル: word_.py プロジェクト: roryokane/oil
def TildeDetect(w):
    # type: (word_t) -> Optional[word_t]
    """Detect tilde expansion in a word.

  It might begin with  Literal that needs to be turned into a TildeSub.
  (It depends on whether the second token begins with slash).

  If so, it return a new word.  Otherwise return None.

  NOTE:
  - The regex for Lit_TildeLike could be expanded.  Right now it's
    conservative, like Lit_Chars without the /.
  - It's possible to write this in a mutating style, since only the first token
    is changed.  But note that we CANNOT know this during lexing.
  """
    # NOTE: BracedTree, Empty, etc. can't be tilde expanded
    if not isinstance(w, word__Compound):
        return None

    assert w.parts, w

    part0 = w.parts[0]
    if _LiteralId(part0) != Id.Lit_TildeLike:
        return None
    assert isinstance(part0, word_part__Literal)  # for MyPy

    if len(w.parts) == 1:  # can't be zero
        tilde_part = word_part.TildeSub(part0.token)
        return word.Compound([tilde_part])

    part1 = w.parts[1]
    # NOTE: We could inspect the raw tokens.
    if _LiteralId(part1) == Id.Lit_Chars:
        assert isinstance(part1, word_part__Literal)  # for MyPy
        if part1.token.val.startswith('/'):
            tilde_part_ = word_part.TildeSub(part0.token)  # type: word_part_t
            return word.Compound([tilde_part_] + w.parts[1:])

    # It could be something like '~foo:bar', which doesn't have a slash.
    return None
コード例 #7
0
ファイル: word_.py プロジェクト: roryokane/oil
def DetectAssocPair(w):
    # type: (word__Compound) -> Optional[Tuple[word__Compound, word__Compound]]
    """
  Like DetectShAssignment, but for A=(['k']=v ['k2']=v)

  The key and the value are both strings.  So we just pick out word_part.
  Unlike a[k]=v, A=([k]=v) is NOT ambiguous, because the [k] syntax is only used
  for associative array literals, as opposed to indexed array literals.
  """
    parts = w.parts
    if _LiteralId(parts[0]) != Id.Lit_LBracket:
        return None

    n = len(parts)
    for i in xrange(n):
        id_ = _LiteralId(parts[i])
        if id_ == Id.Lit_ArrayLhsClose:  # ]=
            # e.g. if we have [$x$y]=$a$b
            key = word.Compound(parts[1:i])  # $x$y
            value = word.Compound(parts[i + 1:])  # $a$b from
            return key, value

    return None
コード例 #8
0
ファイル: word_parse_test.py プロジェクト: tekknolagi/oil
  def testMultiLine(self):
    w_parser = test_lib.InitWordParser("""\
ls foo

# Multiple newlines and comments should be ignored

ls bar
""")
    print('--MULTI')
    w = w_parser.ReadWord(lex_mode_e.ShCommand)
    parts = [word_part.Literal(token(Id.Lit_Chars, 'ls'))]
    test_lib.AssertAsdlEqual(self, word.Compound(parts), w)

    w = w_parser.ReadWord(lex_mode_e.ShCommand)
    parts = [word_part.Literal(token(Id.Lit_Chars, 'foo'))]
    test_lib.AssertAsdlEqual(self, word.Compound(parts), w)

    w = w_parser.ReadWord(lex_mode_e.ShCommand)
    t = token(Id.Op_Newline, '\n')
    test_lib.AssertAsdlEqual(self, word.Token(t), w)

    w = w_parser.ReadWord(lex_mode_e.ShCommand)
    parts = [word_part.Literal(token(Id.Lit_Chars, 'ls'))]
    test_lib.AssertAsdlEqual(self, word.Compound(parts), w)

    w = w_parser.ReadWord(lex_mode_e.ShCommand)
    parts = [word_part.Literal(token(Id.Lit_Chars, 'bar'))]
    test_lib.AssertAsdlEqual(self, word.Compound(parts), w)

    w = w_parser.ReadWord(lex_mode_e.ShCommand)
    t = token(Id.Op_Newline, '\n')
    test_lib.AssertAsdlEqual(self, word.Token(t), w)

    w = w_parser.ReadWord(lex_mode_e.ShCommand)
    t = token(Id.Eof_Real, '')
    test_lib.AssertAsdlEqual(self, word.Token(t), w)
コード例 #9
0
ファイル: word_.py プロジェクト: roryokane/oil
def ErrorWord(fmt, err):
    # type: (str, _ErrorWithLocation) -> word__Compound
    error_str = fmt % err.UserErrorString()
    t = token(Id.Lit_Chars, error_str, const.NO_INTEGER)
    return word.Compound([word_part.Literal(t)])
コード例 #10
0
ファイル: braces.py プロジェクト: waldyrious/oil
def _BraceDetect(w):
    # type: (word__Compound) -> Optional[word__BracedTree]
    """Return a new word if the input word looks like a brace expansion.

  e.g. {a,b} or {1..10..2} (TODO)
  Do we want to accept {01..02} ?  zsh does make some attempt to do this too.

  NOTE: This is an iterative algorithm that uses a stack.  The grammar-based
  approach didn't seem natural.

  It's not LL(1) because of 'part*'.  And not LL(k) even?  Maybe it be handled
  with an LR parser?  In any case the imperative algorithm with 'early return'
  for a couple cases is fairly simple.

  Grammar:
    # an alternative is a literal, possibly empty, or another brace_expr

    part = <any part except Literal>
    alt = part* | brace_expr

    # a brace_expr is group of at least 2 braced and comma-separated
    # alternatives, with optional prefix and suffix.
    brace_expr = part* '{' alt ',' alt (',' alt)* '}' part*
  """
    # Errors:
    # }a{    - stack depth dips below 0
    # {a,b}{ - Stack depth doesn't end at 0
    # {a}    - no comma, and also not an numeric range

    cur_parts = []  # type: List[word_part_t]
    stack = []  # type: List[_StackFrame]

    found = False

    for i, part in enumerate(w.parts):
        append = True
        if isinstance(part, word_part__Literal):
            id_ = part.token.id
            if id_ == Id.Lit_LBrace:
                # Save prefix parts.  Start new parts list.
                new_frame = _StackFrame(cur_parts)
                stack.append(new_frame)
                cur_parts = []
                append = False
                found = True  # assume found, but can early exit with None later

            elif id_ == Id.Lit_Comma:  # Append a new alternative.
                # NOTE: Should we allow this:
                # ,{a,b}
                # or force this:
                # \,{a,b}
                # ?  We're forcing braces right now but not commas.
                if stack:
                    stack[-1].saw_comma = True
                    stack[-1].alt_part.words.append(word.Compound(cur_parts))
                    cur_parts = []  # clear
                    append = False

            elif id_ == Id.Lit_RBrace:
                if not stack:  # e.g. echo {a,b}{  -- unbalanced {
                    return None  # do not expand ANYTHING because of invalid syntax

                # Detect {1..10} and {1..10..2}

                #log('stack[-1]: %s', stack[-1])
                #log('cur_parts: %s', cur_parts)

                range_part = None
                # only allow {1..3}, not {a,1..3}
                if not stack[-1].saw_comma and len(cur_parts) == 1:
                    # It must be ONE part.  For example, -1..-100..-2 is initially
                    # lexed as a single Lit_Chars token.
                    part = cur_parts[0]
                    if (isinstance(part, word_part__Literal)
                            and part.token.id == Id.Lit_Chars):
                        range_part = _RangePartDetect(part.token)
                        if range_part:
                            frame = stack.pop()
                            cur_parts = frame.cur_parts
                            cur_parts.append(range_part)
                            append = False

                # It doesn't look like a range -- process it as the last element in
                # {a,b,c}

                if not range_part:
                    if not stack[
                            -1].saw_comma:  # {foo} is not a real alternative
                        return None  # early return

                    stack[-1].alt_part.words.append(word.Compound(cur_parts))

                    frame = stack.pop()
                    cur_parts = frame.cur_parts
                    cur_parts.append(frame.alt_part)
                    append = False

        if append:
            cur_parts.append(part)

    if len(stack) != 0:
        return None

    if found:
        return word.BracedTree(cur_parts)
    else:
        return None
コード例 #11
0
ファイル: expr_to_ast.py プロジェクト: waldyrious/oil
    def Expr(self, pnode):
        # type: (PNode) -> expr_t
        """Transform expressions (as opposed to statements)."""
        typ = pnode.typ
        tok = pnode.tok
        children = pnode.children

        if ISNONTERMINAL(typ):
            c = '-' if not children else len(children)
            #log('non-terminal %s %s', nt_name, c)

            if typ == grammar_nt.oil_expr:  # for if/while
                # oil_expr: '(' testlist ')'
                return self.Expr(children[1])

            if typ == grammar_nt.return_expr:  # for if/while
                # return_expr: testlist end_stmt
                return self.Expr(children[0])

            if typ == grammar_nt.lvalue_list:
                return self._AssocBinary(children)

            if typ == grammar_nt.atom:
                return self.atom(children)

            if typ == grammar_nt.eval_input:
                # testlist_input: testlist NEWLINE* ENDMARKER
                return self.Expr(children[0])

            if typ == grammar_nt.testlist:
                # testlist: test (',' test)* [',']
                return self._AssocBinary(children)

            elif typ == grammar_nt.arith_expr:
                # expr: term (('+'|'-') term)*
                return self._AssocBinary(children)

            elif typ == grammar_nt.term:
                # term: factor (('*'|'/'|'div'|'mod') factor)*
                return self._AssocBinary(children)

            elif typ == grammar_nt.expr:
                # expr: xor_expr ('|' xor_expr)*
                return self._AssocBinary(children)

            elif typ == grammar_nt.shift_expr:
                # shift_expr: arith_expr (('<<'|'>>') arith_expr)*
                return self._AssocBinary(children)

            elif typ == grammar_nt.comparison:
                # comparison: expr (comp_op expr)*
                return self._AssocBinary(children)

            elif typ == grammar_nt.factor:
                # factor: ('+'|'-'|'~') factor | power
                # the power would have already been reduced
                assert len(children) == 2, children
                op, e = children
                assert isinstance(op.tok, token)
                return expr.Unary(op.tok, self.Expr(e))

            elif typ == grammar_nt.atom_expr:
                # atom_expr: ['await'] atom trailer*

                # NOTE: This would be shorter in a recursive style.
                base = self.Expr(children[0])
                n = len(children)
                for i in xrange(1, n):
                    pnode = children[i]
                    tok = pnode.tok
                    base = self.trailer(base, pnode)

                return base

            elif typ == grammar_nt.power:
                # power: atom_expr ['^' factor]

                # This doesn't repeat, so it doesn't matter if it's left or right
                # associative.
                return self._AssocBinary(children)

            elif typ == grammar_nt.array_literal:
                left_tok = children[0].tok

                # Approximation for now.
                tokens = [
                    pnode.tok for pnode in children[1:-1]
                    if pnode.tok.id == Id.Lit_Chars
                ]
                items = [expr.Const(t) for t in tokens]  # type: List[expr_t]
                return expr.ArrayLiteral(left_tok, items)

            elif typ == grammar_nt.sh_array_literal:
                left_tok = children[0].tok

                # HACK: When typ is Id.Expr_WordsDummy, the 'tok' field ('opaque')
                # actually has a list of words!
                typ1 = children[1].typ
                assert typ1 == Id.Expr_WordsDummy.enum_id, typ1
                array_words = cast('List[word_t]', children[1].tok)

                return expr.ShellArrayLiteral(left_tok, array_words)

            elif typ == grammar_nt.regex_literal:
                left_tok = children[0].tok

                # Approximation for now.
                tokens = [
                    pnode.tok for pnode in children[1:-1]
                    if pnode.tok.id == Id.Expr_Name
                ]
                parts = [regex.Var(t) for t in tokens]  # type: List[regex_t]

                return expr.RegexLiteral(left_tok, regex.Concat(parts))

            elif typ == grammar_nt.command_sub:
                left_tok = children[0].tok

                # Approximation for now.
                tokens = [
                    pnode.tok for pnode in children[1:-1]
                    if pnode.tok.id == Id.Lit_Chars
                ]
                words = [
                    word.Compound([word_part.Literal(t)]) for t in tokens
                ]  # type: List[word_t]
                return expr.CommandSub(left_tok, command.Simple(words))

            elif typ == grammar_nt.sh_command_sub:
                left_tok = children[0].tok

                # HACK: When typ is Id.Expr_CommandDummy, the 'tok' field ('opaque')
                # actually has a word_part.CommandSub!
                typ1 = children[1].typ
                assert typ1 == Id.Expr_CommandDummy.enum_id, typ1
                cs_part = cast(word_part__CommandSub, children[1].tok)

                # Awkward: the schemas are different
                expr_part = expr.CommandSub(cs_part.left_token,
                                            cs_part.command_list)
                expr_part.spids.extend(cs_part.spids)
                return expr_part

            elif typ == grammar_nt.var_sub:
                left_tok = children[0].tok

                return expr.VarSub(left_tok, self.Expr(children[1]))

            elif typ == grammar_nt.dq_string:
                left_tok = children[0].tok

                tokens = [
                    pnode.tok for pnode in children[1:-1]
                    if pnode.tok.id == Id.Lit_Chars
                ]
                parts2 = [word_part.Literal(t)
                          for t in tokens]  # type: List[word_part_t]
                return expr.DoubleQuoted(left_tok, parts2)

            else:
                nt_name = self.number2symbol[typ]
                raise AssertionError("PNode type %d (%s) wasn't handled" %
                                     (typ, nt_name))

        else:  # Terminals should have a token
            #log('terminal %s', tok)

            if tok.id == Id.Expr_Name:
                return expr.Var(tok)
            elif tok.id == Id.Expr_Digits:
                return expr.Const(tok)

            else:
                raise AssertionError(tok.id)
コード例 #12
0
  def _ReadCompoundWord(self, eof_type=Id.Undefined_Tok,
                        lex_mode=lex_mode_e.ShCommand, empty_ok=True):
    # type: (Id_t, lex_mode_t, bool) -> word__Compound
    """
    Precondition: Looking at the first token of the first word part
    Postcondition: Looking at the token after, e.g. space or operator

    NOTE: eof_type is necessary because / is a literal, i.e. Lit_Slash, but it
    could be an operator delimiting a compound word.  Can we change lexer modes
    and remove this special case?
    """
    w = word.Compound()
    num_parts = 0
    brace_count = 0
    done = False
    while not done:
      self._Peek()

      allow_done = empty_ok or num_parts != 0
      if allow_done and self.token_type == eof_type:
        done = True  # e.g. for ${foo//pat/replace}

      # Keywords like "for" are treated like literals
      elif self.token_kind in (
          Kind.Lit, Kind.History, Kind.KW, Kind.ControlFlow,
          Kind.BoolUnary, Kind.BoolBinary):
        if self.token_type == Id.Lit_EscapedChar:
          part = word_part.EscapedLiteral(self.cur_token)  # type: word_part_t
        else:
          part = word_part.Literal(self.cur_token)

        if self.token_type == Id.Lit_VarLike and num_parts == 0:  # foo=
          w.parts.append(part)
          # Unfortunately it's awkward to pull the check for a=(1 2) up to
          # _ReadWord.
          t = self.lexer.LookAhead(lex_mode_e.ShCommand)
          if t.id == Id.Op_LParen:
            self.lexer.PushHint(Id.Op_RParen, Id.Right_ShArrayLiteral)
            part2 = self._ReadArrayLiteral()
            w.parts.append(part2)

            # Array literal must be the last part of the word.
            self._Next(lex_mode)
            self._Peek()
            # EOF, whitespace, newline, Right_Subshell
            if self.token_kind not in self.KINDS_THAT_END_WORDS:
              p_die('Unexpected token after array literal',
                    token=self.cur_token)
            done = True

        elif (self.parse_opts.at and self.token_type == Id.Lit_Splice and
              num_parts == 0):

          splice_token = self.cur_token

          t = self.lexer.LookAhead(lex_mode_e.ShCommand)
          if t.id == Id.Op_LParen:  # @arrayfunc(x)
            arglist = arg_list()
            self._ParseCallArguments(arglist)
            part = word_part.FuncCall(splice_token, arglist)
          else:
            part = word_part.Splice(splice_token)

          w.parts.append(part)

          # @words or @arrayfunc() must be the last part of the word
          self._Next(lex_mode)
          self._Peek()
          # EOF, whitespace, newline, Right_Subshell
          if self.token_kind not in self.KINDS_THAT_END_WORDS:
            p_die('Unexpected token after array splice',
                  token=self.cur_token)
          done = True

        else:
          # Syntax error for { and }
          if self.token_type == Id.Lit_LBrace:
            brace_count += 1
          elif self.token_type == Id.Lit_RBrace:
            brace_count -= 1

          # not a literal with lookahead; append it
          w.parts.append(part)

      elif self.token_kind == Kind.VSub:
        vsub_token = self.cur_token

        part = simple_var_sub(vsub_token)
        if self.token_type == Id.VSub_DollarName:
          # Look ahead for $strfunc(x)
          #   $f(x) or --name=$f(x) is allowed
          #   but "--name=$f(x)" not allowed?  This would BREAK EXISTING CODE.
          #   It would need a parse option.

          t = self.lexer.LookAhead(lex_mode_e.ShCommand)
          if t.id == Id.Op_LParen:
            arglist = arg_list()
            self._ParseCallArguments(arglist)
            part = word_part.FuncCall(vsub_token, arglist)

            # Unlike @arrayfunc(x), it makes sense to allow $f(1)$f(2)
            # var a = f(1); var b = f(2); echo $a$b
            # It's consistent with other uses of $.

        w.parts.append(part)

      elif self.token_kind == Kind.ExtGlob:
        part = self._ReadExtGlob()
        w.parts.append(part)

      elif self.token_kind == Kind.Left:
        part = self._ReadLeftParts()
        w.parts.append(part)

      # NOT done yet, will advance below
      elif self.token_kind == Kind.Right:
        # Still part of the word; will be done on the next iter.
        if self.token_type == Id.Right_DoubleQuote:
          pass
        # Never happens, no PushHint for this case.
        #elif self.token_type == Id.Right_DollarParen:
        #  pass
        elif self.token_type == Id.Right_Subshell:
          # LEXER HACK for (case x in x) ;; esac )
          assert self.next_lex_mode is None  # Rewind before it's used
          if self.lexer.MaybeUnreadOne():
            self.lexer.PushHint(Id.Op_RParen, Id.Right_Subshell)
            self._Next(lex_mode)
          done = True
        else:
          done = True

      elif self.token_kind == Kind.Ignored:
        done = True

      else:
        # LEXER HACK for unbalanced case clause.  'case foo in esac' is valid,
        # so to test for ESAC, we can read ) before getting a chance to
        # PushHint(Id.Op_RParen, Id.Right_CasePat).  So here we unread one
        # token and do it again.

        # We get Id.Op_RParen at top level:      case x in x) ;; esac
        # We get Id.Eof_RParen inside ComSub:  $(case x in x) ;; esac )
        if self.token_type in (Id.Op_RParen, Id.Eof_RParen):
          assert self.next_lex_mode is None  # Rewind before it's used
          if self.lexer.MaybeUnreadOne():
            if self.token_type == Id.Eof_RParen:
              # Redo translation
              self.lexer.PushHint(Id.Op_RParen, Id.Eof_RParen)
            self._Next(lex_mode)

        done = True  # anything we don't recognize means we're done

      if not done:
        self._Next(lex_mode)
        num_parts += 1

    if self.parse_opts.brace and num_parts > 1 and brace_count != 0:
      # accept { and }, but not foo{
      p_die(
          'Word has unbalanced { }.  Maybe add a space or quote it like \{',
          word=w)

    return w