Ejemplo n.º 1
0
    def _ReadExtGlobPart(self):
        # type: () -> word_part__ExtGlobPart
        """
    Grammar:
      Item         = CompoundWord | EPSILON  # important: @(foo|) is allowed
      LEFT         = '@(' | '*(' | '+(' | '?(' | '!('
      RIGHT        = ')'
      ExtGlob      = LEFT (Item '|')* Item RIGHT  # ITEM may be empty
      CompoundWord includes ExtGlobPart
    """
        left_token = self.cur_token
        arms = []  # type: List[word_t]
        spids = []
        spids.append(left_token.span_id)

        self.lexer.PushHint(Id.Op_RParen, Id.Right_ExtGlob)
        self._Next(lex_mode_e.ExtGlob)  # advance past LEFT

        read_word = False  # did we just a read a word?  To handle @(||).

        while True:
            self._Peek()

            if self.token_type == Id.Right_ExtGlob:
                if not read_word:
                    arms.append(osh_word.CompoundWord())
                spids.append(self.cur_token.span_id)
                break

            elif self.token_type == Id.Op_Pipe:
                if not read_word:
                    arms.append(osh_word.CompoundWord())
                read_word = False
                self._Next(lex_mode_e.ExtGlob)

            # lex mode EXTGLOB should only produce these 4 kinds of tokens
            elif self.token_kind in (Kind.Lit, Kind.Left, Kind.VSub,
                                     Kind.ExtGlob):
                w = self._ReadCompoundWord(lex_mode=lex_mode_e.ExtGlob)
                arms.append(w)
                read_word = True

            elif self.token_kind == Kind.Eof:
                p_die('Unexpected EOF reading extended glob that began here',
                      token=left_token)

            else:
                raise AssertionError('Unexpected token %r' % self.cur_token)

        part = word_part.ExtGlobPart(left_token, arms)
        part.spids.extend(spids)
        return part
Ejemplo n.º 2
0
Archivo: prompt.py Proyecto: jyn514/oil
    def EvalPrompt(self, val):
        """Perform the two evaluations that bash does.  Used by $PS1 and ${x@P}."""
        if val.tag != value_e.Str:
            return self.default_prompt  # no evaluation necessary

        # Parse backslash escapes (cached)
        try:
            tokens = self.tokens_cache[val.s]
        except KeyError:
            tokens = list(match.PS1_LEXER.Tokens(val.s))
            self.tokens_cache[val.s] = tokens

        # Replace values.
        ps1_str = self._ReplaceBackslashCodes(tokens)

        # Parse it like a double-quoted word (cached).
        # NOTE: This is copied from the PS4 logic in Tracer.
        try:
            ps1_word = self.parse_cache[ps1_str]
        except KeyError:
            w_parser = self.parse_ctx.MakeWordParserForPlugin(
                ps1_str, self.arena)
            try:
                ps1_word = w_parser.ReadForPlugin()
            except Exception as e:
                error_str = '<ERROR: cannot parse PS1>'
                t = token(Id.Lit_Chars, error_str, const.NO_INTEGER)
                ps1_word = word.CompoundWord([word_part.LiteralPart(t)])
            self.parse_cache[ps1_str] = ps1_word

        # Evaluate, e.g. "${debian_chroot}\u" -> '\u'
        # TODO: Handle runtime errors like unset variables, etc.
        val2 = self.ex.word_ev.EvalWordToString(ps1_word)
        return val2.s
Ejemplo n.º 3
0
  def testVarOps(self):
    ev = InitEvaluator()  # initializes x=xxx and y=yyy
    unset_sub = word_part.BracedVarSub(token(Id.VSub_Name, 'unset'))
    part_vals = []
    ev._EvalWordPart(unset_sub, part_vals)
    print(part_vals)

    set_sub = word_part.BracedVarSub(token(Id.VSub_Name, 'x'))
    part_vals = []
    ev._EvalWordPart(set_sub, part_vals)
    print(part_vals)

    # Now add some ops
    part = word_part.LiteralPart(token(Id.Lit_Chars, 'default'))
    arg_word = osh_word.CompoundWord([part])
    test_op = suffix_op.StringUnary(Id.VTest_ColonHyphen, arg_word)
    unset_sub.suffix_op = test_op
    set_sub.suffix_op = test_op

    part_vals = []
    ev._EvalWordPart(unset_sub, part_vals)
    print(part_vals)

    part_vals = []
    ev._EvalWordPart(set_sub, part_vals)
    print(part_vals)
Ejemplo n.º 4
0
def BraceExpandWords(words):
    # type: (List[word__CompoundWord]) -> List[word__CompoundWord]
    out = []  # type: List[word__CompoundWord]
    for w in words:
        if isinstance(w, word__BracedWordTree):
            parts_list = _BraceExpand(w.parts)
            out.extend(word.CompoundWord(p) for p in parts_list)
        else:
            out.append(w)
    return out
Ejemplo n.º 5
0
    def ReadForPlugin(self):
        # type: () -> word__CompoundWord
        """For $PS1, $PS4, etc.

    This is just like reading a here doc line.  "\n" is allowed, as well as the
    typical substitutions ${x} $(echo hi) $((1 + 2)).
    """
        w = osh_word.CompoundWord()
        self._ReadLikeDQ(None, w.parts)
        return w
Ejemplo n.º 6
0
  def _EvalRedirect(self, n):
    fd = REDIR_DEFAULT_FD[n.op.id] if n.fd == const.NO_INTEGER else n.fd
    if n.tag == redir_e.Redir:
      redir_type = REDIR_ARG_TYPES[n.op.id]  # could be static in the LST?

      if redir_type == redir_arg_type_e.Path:
        # NOTE: no globbing.  You can write to a file called '*.py'.
        val = self.word_ev.EvalWordToString(n.arg_word)
        if val.tag != value_e.Str:  # TODO: This error never fires
          util.error("Redirect filename must be a string, got %s", val)
          return None
        filename = val.s
        if not filename:
          # Whether this is fatal depends on errexit.
          util.error("Redirect filename can't be empty")
          return None

        return redirect.PathRedirect(n.op.id, fd, filename)

      elif redir_type == redir_arg_type_e.Desc:  # e.g. 1>&2
        val = self.word_ev.EvalWordToString(n.arg_word)
        if val.tag != value_e.Str:  # TODO: This error never fires
          util.error("Redirect descriptor should be a string, got %s", val)
          return None
        t = val.s
        if not t:
          util.error("Redirect descriptor can't be empty")
          return None
        try:
          target_fd = int(t)
        except ValueError:
          util.error(
              "Redirect descriptor should look like an integer, got %s", val)
          return None

        return redirect.DescRedirect(n.op.id, fd, target_fd)

      elif redir_type == redir_arg_type_e.Here:  # here word
        val = self.word_ev.EvalWordToString(n.arg_word)
        assert val.tag == value_e.Str, val
        # NOTE: bash and mksh both add \n
        return redirect.HereRedirect(fd, val.s + '\n')
      else:
        raise AssertionError('Unknown redirect op')

    elif n.tag == redir_e.HereDoc:
      # HACK: Wrap it in a word to evaluate.
      w = osh_word.CompoundWord(n.stdin_parts)
      val = self.word_ev.EvalWordToString(w)
      assert val.tag == value_e.Str, val
      return redirect.HereRedirect(fd, val.s)

    else:
      raise AssertionError('Unknown redirect type')
Ejemplo n.º 7
0
def TildeDetect(w):
    # type: (word_t) -> Optional[word_t]
    """Detect tilde expansion in a word.

  It might begin with  LiteralPart that needs to be turned into a TildeSubPart.
  (It depends on whether the second token begins with slash).

  If so, it return a new word.  Otherwise return None.

  NOTE:
  - The regex for Lit_TildeLike could be expanded.  Right now it's
    conservative, like Lit_Chars without the /.
  - It's possible to write this in a mutating style, since only the first token
    is changed.  But note that we CANNOT know this during lexing.
  """
    # NOTE: BracedWordTree, EmptyWord, etc. can't be tilde expanded
    if not isinstance(w, word__CompoundWord):
        return None

    assert w.parts, w

    part0 = w.parts[0]
    if _LiteralPartId(part0) != Id.Lit_TildeLike:
        return None
    assert isinstance(part0, word_part__LiteralPart)  # for MyPy

    if len(w.parts) == 1:  # can't be zero
        tilde_part = word_part.TildeSubPart(part0.token)
        return word.CompoundWord([tilde_part])

    part1 = w.parts[1]
    # NOTE: We could inspect the raw tokens.
    if _LiteralPartId(part1) == Id.Lit_Chars:
        assert isinstance(part1, word_part__LiteralPart)  # for MyPy
        if part1.token.val.startswith('/'):
            tilde_part_ = word_part.TildeSubPart(
                part0.token)  # type: word_part_t
            return word.CompoundWord([tilde_part_] + w.parts[1:])

    # It could be something like '~foo:bar', which doesn't have a slash.
    return None
Ejemplo n.º 8
0
    def testBraceExpand(self):
        w = _assertReadWord(self, 'hi')
        results = braces._BraceExpand(w.parts)
        self.assertEqual(1, len(results))
        for parts in results:
            _PrettyPrint(osh_word.CompoundWord(parts))
            print('')

        w = _assertReadWord(self, 'B-{a,b}-E')
        tree = braces._BraceDetect(w)
        self.assertEqual(3, len(tree.parts))
        _PrettyPrint(tree)

        results = braces._BraceExpand(tree.parts)
        self.assertEqual(2, len(results))
        for parts in results:
            _PrettyPrint(osh_word.CompoundWord(parts))
            print('')

        w = _assertReadWord(self, 'B-{a,={b,c,d}=,e}-E')
        tree = braces._BraceDetect(w)
        self.assertEqual(3, len(tree.parts))
        _PrettyPrint(tree)

        results = braces._BraceExpand(tree.parts)
        self.assertEqual(5, len(results))
        for parts in results:
            _PrettyPrint(osh_word.CompoundWord(parts))
            print('')

        w = _assertReadWord(self, 'B-{a,b}-{c,d}-E')
        tree = braces._BraceDetect(w)
        self.assertEqual(5, len(tree.parts))
        _PrettyPrint(tree)

        results = braces._BraceExpand(tree.parts)
        self.assertEqual(4, len(results))
        for parts in results:
            _PrettyPrint(osh_word.CompoundWord(parts))
            print('')
Ejemplo n.º 9
0
def DetectAssocPair(w):
  # type: (word__CompoundWord) -> Optional[Tuple[word__CompoundWord, word__CompoundWord]]
  """
  Like DetectAssignment, but for A=(['k']=v ['k2']=v)

  The key and the value are both strings.  So we just pick out word_part.
  Unlike a[k]=v, A=([k]=v) is NOT ambiguous, because the [k] syntax is only used
  for associative array literals, as opposed to indexed array literals.
  """
  parts = w.parts
  if _LiteralPartId(parts[0]) != Id.Lit_LBracket:
    return None

  n = len(parts)
  for i in xrange(n):
    id_ = _LiteralPartId(parts[i])
    if id_ == Id.Lit_ArrayLhsClose: # ]=
      # e.g. if we have [$x$y]=$a$b
      key = word.CompoundWord(parts[1:i])  # $x$y 
      value = word.CompoundWord(parts[i+1:])  # $a$b from
      return key, value

  return None
Ejemplo n.º 10
0
    def testMultiLine(self):
        w_parser = _InitWordParser("""\
ls foo

# Multiple newlines and comments should be ignored

ls bar
""")
        print('--MULTI')
        w = w_parser.ReadWord(lex_mode_e.Outer)
        parts = [word_part.LiteralPart(token(Id.Lit_Chars, 'ls'))]
        test_lib.AssertAsdlEqual(self, osh_word.CompoundWord(parts), w)

        w = w_parser.ReadWord(lex_mode_e.Outer)
        parts = [word_part.LiteralPart(token(Id.Lit_Chars, 'foo'))]
        test_lib.AssertAsdlEqual(self, osh_word.CompoundWord(parts), w)

        w = w_parser.ReadWord(lex_mode_e.Outer)
        t = token(Id.Op_Newline, '\n')
        test_lib.AssertAsdlEqual(self, osh_word.TokenWord(t), w)

        w = w_parser.ReadWord(lex_mode_e.Outer)
        parts = [word_part.LiteralPart(token(Id.Lit_Chars, 'ls'))]
        test_lib.AssertAsdlEqual(self, osh_word.CompoundWord(parts), w)

        w = w_parser.ReadWord(lex_mode_e.Outer)
        parts = [word_part.LiteralPart(token(Id.Lit_Chars, 'bar'))]
        test_lib.AssertAsdlEqual(self, osh_word.CompoundWord(parts), w)

        w = w_parser.ReadWord(lex_mode_e.Outer)
        t = token(Id.Op_Newline, '\n')
        test_lib.AssertAsdlEqual(self, osh_word.TokenWord(t), w)

        w = w_parser.ReadWord(lex_mode_e.Outer)
        t = token(Id.Eof_Real, '')
        test_lib.AssertAsdlEqual(self, osh_word.TokenWord(t), w)
Ejemplo n.º 11
0
  def _EvalPS4(self):
    """For set -x."""

    val = self.mem.GetVar('PS4')
    assert val.tag == value_e.Str

    s = val.s
    if s:
      first_char, ps4 = s[0], s[1:]
    else:
      first_char, ps4 = '+', ' '  # default

    # NOTE: This cache is slightly broken because aliases are mutable!  I think
    # thati s more or less harmless though.
    try:
      ps4_word = self.parse_cache[ps4]
    except KeyError:
      # We have to parse this at runtime.  PS4 should usually remain constant.
      w_parser = self.parse_ctx.MakeWordParserForPlugin(ps4, self.arena)

      try:
        ps4_word = w_parser.ReadForPlugin()
      except util.ParseError as e:
        error_str = '<ERROR: cannot parse PS4>'
        t = token(Id.Lit_Chars, error_str, const.NO_INTEGER)
        ps4_word = osh_word.CompoundWord([word_part.LiteralPart(t)])
      self.parse_cache[ps4] = ps4_word

    #print(ps4_word)

    # TODO: Repeat first character according process stack depth.  Where is
    # that stored?  In the executor itself?  It should be stored along with
    # the PID.  Need some kind of ShellProcessState or something.
    #
    # We should come up with a better mechanism.  Something like $PROC_INDENT
    # and $OIL_XTRACE_PREFIX.

    # TODO: Handle runtime errors!  For example, you could PS4='$(( 1 / 0 ))'
    # <ERROR: cannot evaluate PS4>
    prefix = self.word_ev.EvalWordToString(ps4_word)

    return first_char, prefix.s
Ejemplo n.º 12
0
    def _ReadArithWord(self):
        # type: () -> Tuple[word_t, bool]
        """Helper function for ReadArithWord."""
        self._Peek()

        if self.token_kind == Kind.Unknown:
            p_die('Unexpected token in arithmetic context',
                  token=self.cur_token)

        elif self.token_kind == Kind.Eof:
            # Just return EOF token
            w = osh_word.TokenWord(self.cur_token)  # type: word_t
            return w, False

        elif self.token_kind == Kind.Ignored:
            # Space should be ignored.  TODO: change this to SPACE_SPACE and
            # SPACE_NEWLINE?  or SPACE_TOK.
            self._Next(lex_mode_e.Arith)
            return None, True  # Tell wrapper to try again

        elif self.token_kind in (Kind.Arith, Kind.Right):
            # Id.Right_ArithSub IS just a normal token, handled by ArithParser
            self._Next(lex_mode_e.Arith)
            w = osh_word.TokenWord(self.cur_token)
            return w, False

        elif self.token_kind in (Kind.Lit, Kind.Left):
            w = self._ReadCompoundWord(lex_mode=lex_mode_e.Arith)
            return w, False

        elif self.token_kind == Kind.VSub:
            part = word_part.SimpleVarSub(self.cur_token)
            self._Next(lex_mode_e.Arith)
            w = osh_word.CompoundWord([part])
            return w, False

        else:
            assert False, ("Unexpected token parsing arith sub: %s" %
                           self.cur_token)

        raise AssertionError("Shouldn't get here")
Ejemplo n.º 13
0
    def _ReadCompoundWord(self,
                          eof_type=Id.Undefined_Tok,
                          lex_mode=lex_mode_e.Outer,
                          empty_ok=True):
        # type: (Id_t, lex_mode_t, bool) -> word__CompoundWord
        """
    Precondition: Looking at the first token of the first word part
    Postcondition: Looking at the token after, e.g. space or operator

    NOTE: eof_type is necessary because / is a literal, i.e. Lit_Slash, but it
    could be an operator delimiting a compound word.  Can we change lexer modes
    and remove this special case?
    """
        word = osh_word.CompoundWord()
        num_parts = 0
        done = False
        while not done:
            self._Peek()

            allow_done = empty_ok or num_parts != 0
            if allow_done and self.token_type == eof_type:
                done = True  # e.g. for ${foo//pat/replace}

            # Keywords like "for" are treated like literals
            elif self.token_kind in (Kind.Lit, Kind.History, Kind.KW,
                                     Kind.Assign, Kind.ControlFlow,
                                     Kind.BoolUnary, Kind.BoolBinary):
                if self.token_type == Id.Lit_EscapedChar:
                    part = word_part.EscapedLiteralPart(
                        self.cur_token)  # type: word_part_t
                else:
                    part = word_part.LiteralPart(self.cur_token)

                word.parts.append(part)

                if self.token_type == Id.Lit_VarLike:  # foo=
                    t = self.lexer.LookAhead(lex_mode_e.Outer)
                    if t.id == Id.Op_LParen:
                        self.lexer.PushHint(Id.Op_RParen,
                                            Id.Right_ArrayLiteral)
                        part2 = self._ReadArrayLiteralPart()
                        word.parts.append(part2)

            elif self.token_kind == Kind.VSub:
                part = word_part.SimpleVarSub(self.cur_token)
                word.parts.append(part)

            elif self.token_kind == Kind.ExtGlob:
                part = self._ReadExtGlobPart()
                word.parts.append(part)

            elif self.token_kind == Kind.Left:
                part = self._ReadLeftParts()
                word.parts.append(part)

            # NOT done yet, will advance below
            elif self.token_kind == Kind.Right:
                # Still part of the word; will be done on the next iter.
                if self.token_type == Id.Right_DoubleQuote:
                    pass
                elif self.token_type == Id.Right_CommandSub:
                    pass
                elif self.token_type == Id.Right_Subshell:
                    # LEXER HACK for (case x in x) ;; esac )
                    assert self.next_lex_mode is None  # Rewind before it's used
                    if self.lexer.MaybeUnreadOne():
                        self.lexer.PushHint(Id.Op_RParen, Id.Right_Subshell)
                        self._Next(lex_mode)
                    done = True
                else:
                    done = True

            elif self.token_kind == Kind.Ignored:
                done = True

            else:
                # LEXER HACK for unbalanced case clause.  'case foo in esac' is valid,
                # so to test for ESAC, we can read ) before getting a chance to
                # PushHint(Id.Op_RParen, Id.Right_CasePat).  So here we unread one
                # token and do it again.

                # We get Id.Op_RParen at top level:      case x in x) ;; esac
                # We get Id.Eof_RParen inside ComSub:  $(case x in x) ;; esac )
                if self.token_type in (Id.Op_RParen, Id.Eof_RParen):
                    assert self.next_lex_mode is None  # Rewind before it's used
                    if self.lexer.MaybeUnreadOne():
                        if self.token_type == Id.Eof_RParen:
                            # Redo translation
                            self.lexer.PushHint(Id.Op_RParen, Id.Eof_RParen)
                        self._Next(lex_mode)

                done = True  # anything we don't recognize means we're done

            if not done:
                self._Next(lex_mode)
            num_parts += 1
        return word
Ejemplo n.º 14
0
def _BraceDetect(w):
    # type: (word__CompoundWord) -> Optional[word__BracedWordTree]
    """Return a new word if the input word looks like a brace expansion.

  e.g. {a,b} or {1..10..2} (TODO)
  Do we want to accept {01..02} ?  zsh does make some attempt to do this too.

  NOTE: This is an iterative algorithm that uses a stack.  The grammar-based
  approach didn't seem natural.

  It's not LL(1) because of 'part*'.  And not LL(k) even?  Maybe it be handled
  with an LR parser?  In any case the imperative algorithm with 'early return'
  for a couple cases is fairly simple.

  Grammar:
    # an alternative is a literal, possibly empty, or another brace_expr

    part = <any part except LiteralPart>
    alt = part* | brace_expr

    # a brace_expr is group of at least 2 braced and comma-separated
    # alternatives, with optional prefix and suffix.
    brace_expr = part* '{' alt ',' alt (',' alt)* '}' part*
  """
    # Errors:
    # }a{    - stack depth dips below 0
    # {a,b}{ - Stack depth doesn't end at 0
    # {a}    - no comma, and also not an numeric range

    cur_parts = []  # type: List[word_part_t]
    stack = []  # type: List[_StackFrame]

    found = False

    for i, part in enumerate(w.parts):
        append = True
        if isinstance(part, word_part__LiteralPart):
            id_ = part.token.id
            if id_ == Id.Lit_LBrace:
                # Save prefix parts.  Start new parts list.
                new_frame = _StackFrame(cur_parts)
                stack.append(new_frame)
                cur_parts = []
                append = False
                found = True  # assume found, but can early exit with None later

            elif id_ == Id.Lit_Comma:  # Append a new alternative.
                # NOTE: Should we allow this:
                # ,{a,b}
                # or force this:
                # \,{a,b}
                # ?  We're forcing braces right now but not commas.
                if stack:
                    stack[-1].saw_comma = True

                    stack[-1].alt_part.words.append(
                        word.CompoundWord(cur_parts))
                    cur_parts = []  # clear
                    append = False

            elif id_ == Id.Lit_RBrace:
                if not stack:  # e.g. echo {a,b}{  -- unbalanced {
                    return None  # do not expand ANYTHING because of invalid syntax

                # Detect {1..10} and {1..10..2}

                #log('stack[-1]: %s', stack[-1])
                #log('cur_parts: %s', cur_parts)

                range_part = None
                # only allow {1..3}, not {a,1..3}
                if not stack[-1].saw_comma and len(cur_parts) == 1:
                    # It must be ONE part.  For example, -1..-100..-2 is initially
                    # lexed as a single Lit_Chars token.
                    part = cur_parts[0]
                    if (isinstance(part, word_part__LiteralPart)
                            and part.token.id == Id.Lit_Chars):
                        range_part = _RangePartDetect(part.token)
                        if range_part:
                            frame = stack.pop()
                            cur_parts = frame.cur_parts
                            cur_parts.append(range_part)
                            append = False

                # It doesn't look like a range -- process it as the last element in
                # {a,b,c}

                if not range_part:
                    if not stack[
                            -1].saw_comma:  # {foo} is not a real alternative
                        return None  # early return

                    stack[-1].alt_part.words.append(
                        word.CompoundWord(cur_parts))

                    frame = stack.pop()
                    cur_parts = frame.cur_parts
                    cur_parts.append(frame.alt_part)
                    append = False

        if append:
            cur_parts.append(part)

    if len(stack) != 0:
        return None

    if found:
        return word.BracedWordTree(cur_parts)
    else:
        return None
Ejemplo n.º 15
0
def _BraceDetect(w):
    # type: (word__CompoundWord) -> Optional[word__BracedWordTree]
    """
  Args:
    CompoundWord

  Returns:
    CompoundWord or None?

  Another option:

  Grammar:

    # an alternative is a literal, possibly empty, or another brace_expr

    part = <any part except LiteralPart>

    alt = part* | brace_expr

    # a brace_expr is group of at least 2 braced and comma-separated
    # alternatives, with optional prefix and suffix.
    brace_expr = part* '{' alt ',' alt (',' alt)* '}' part*

  Problem this grammar: it's not LL(1)
  Is it indirect left-recursive?
  What's the best way to handle it?  LR(1) parser?

  Iterative algorithm:

  Parse it with a stack?
    It's a stack that asserts there is at least one , in between {}

  Yeah just go through and when you see {, push another list.
  When you get ,  append to list
  When you get } and at least one ',', appendt o list
  When you get } without, then pop

  If there is no matching }, then abort with error

  if not balanced, return error too?
  """
    # Errors:
    # }a{    - stack depth dips below 0
    # {a,b}{ - Stack depth doesn't end at 0
    # {a}    - no comma, and also not an numeric range

    cur_parts = []  # type: List[word_part_t]
    stack = []  # type: List[_StackFrame]

    found = False

    for i, part in enumerate(w.parts):
        append = True
        if isinstance(part, word_part__LiteralPart):
            id_ = part.token.id
            if id_ == Id.Lit_LBrace:
                # Save prefix parts.  Start new parts list.
                new_frame = _StackFrame(cur_parts)
                stack.append(new_frame)
                cur_parts = []
                append = False
                found = True  # assume found, but can early exit with None later

            elif id_ == Id.Lit_Comma:
                # Append a new alternative.
                #print('*** Appending after COMMA', cur_parts)

                # NOTE: Should we allow this:
                # ,{a,b}
                # or force this:
                # \,{a,b}
                # ?  We're forcing braces right now but not commas.
                if stack:
                    stack[-1].saw_comma = True

                    stack[-1].alt_part.words.append(
                        word.CompoundWord(cur_parts))
                    cur_parts = []  # clear
                    append = False

            elif id_ == Id.Lit_RBrace:
                # TODO:
                # - Detect lack of , -- abort the whole thing
                # - Detect {1..10} and {1..10..2}
                #   - bash and zsh only -- this is NOT implemented by mksh
                #   - Use a regex on the middle part:
                #     - digit+ '..' digit+  ( '..' digit+ )?
                # - Char ranges are bash only!
                #
                # word_part.BracedIntRangePart()
                # word_part.CharRangePart()

                if not stack:  # e.g. echo }  -- unbalancd {
                    return None
                if not stack[-1].saw_comma:  # {foo} is not a real alternative
                    return None
                stack[-1].alt_part.words.append(word.CompoundWord(cur_parts))

                frame = stack.pop()
                cur_parts = frame.cur_parts
                cur_parts.append(frame.alt_part)
                append = False

        if append:
            cur_parts.append(part)

    if len(stack) != 0:
        return None

    if found:
        return word.BracedWordTree(cur_parts)
    else:
        return None
Ejemplo n.º 16
0
Archivo: word.py Proyecto: mrshu/oil
def ErrorWord(fmt, err):
  # type: (str, _ErrorWithLocation) -> word__CompoundWord
  error_str = fmt % err.UserErrorString()
  t = token(Id.Lit_Chars, error_str, const.NO_INTEGER)
  return word.CompoundWord([word_part.LiteralPart(t)])
Ejemplo n.º 17
0
    def Expr(self, pnode):
        # type: (PNode) -> expr_t
        """Walk the homogeneous parse tree and create a typed AST."""
        typ = pnode.typ
        tok = pnode.tok
        children = pnode.children

        #if typ in self.number2symbol:  # non-terminal
        if ISNONTERMINAL(typ):
            c = '-' if not children else len(children)
            #log('non-terminal %s %s', nt_name, c)

            if typ == grammar_nt.lvalue_list:
                return self._AssocBinary(children)

            if typ == grammar_nt.atom:
                if children[0].tok.id == Id.Op_LParen:
                    return self.Expr(children[1])
                else:
                    raise NotImplementedError

            if typ == grammar_nt.eval_input:
                # testlist_input: testlist NEWLINE* ENDMARKER
                return self.Expr(children[0])

            if typ == grammar_nt.testlist:
                # testlist: test (',' test)* [',']
                return self._AssocBinary(children)

            elif typ == grammar_nt.arith_expr:
                # expr: term (('+'|'-') term)*
                return self._AssocBinary(children)

            elif typ == grammar_nt.term:
                # term: factor (('*'|'/'|'div'|'mod') factor)*
                return self._AssocBinary(children)

            elif typ == grammar_nt.expr:
                # expr: xor_expr ('|' xor_expr)*
                return self._AssocBinary(children)

            elif typ == grammar_nt.shift_expr:
                # shift_expr: arith_expr (('<<'|'>>') arith_expr)*
                return self._AssocBinary(children)

            elif typ == grammar_nt.comparison:
                # comparison: expr (comp_op expr)*
                return self._AssocBinary(children)

            elif typ == grammar_nt.factor:
                # factor: ('+'|'-'|'~') factor | power
                # the power would have already been reduced
                assert len(children) == 2, children
                op, e = children
                assert isinstance(op.tok, syntax_asdl.token)
                return expr.Unary(op.tok, self.Expr(e))

            elif typ == grammar_nt.atom_expr:
                # atom_expr: ['await'] atom trailer*

                # NOTE: This would be shorter in a recursive style.
                base = self.Expr(children[0])
                n = len(children)
                for i in xrange(1, n):
                    pnode = children[i]
                    tok = pnode.tok
                    base = self._Trailer(base, pnode)

                return base

            elif typ == grammar_nt.power:
                # power: atom_expr ['^' factor]

                # This doesn't repeat, so it doesn't matter if it's left or right
                # associative.
                return self._AssocBinary(children)

            elif typ == grammar_nt.array_literal:
                left_tok = children[0].tok

                # Approximation for now.
                tokens = [
                    pnode.tok for pnode in children[1:-1]
                    if pnode.tok.id == Id.Lit_Chars
                ]
                array_words = [
                    word.CompoundWord([word_part.LiteralPart(t)])
                    for t in tokens
                ]  # type: List[word_t]
                return expr.ArrayLiteral(left_tok, array_words)

            elif typ == grammar_nt.regex_literal:
                left_tok = children[0].tok

                # Approximation for now.
                tokens = [
                    pnode.tok for pnode in children[1:-1]
                    if pnode.tok.id == Id.Expr_Name
                ]
                parts = [regex.Var(t) for t in tokens]  # type: List[regex_t]

                return expr.RegexLiteral(left_tok, regex.Concat(parts))

            elif typ == grammar_nt.command_sub:
                left_tok = children[0].tok

                # Approximation for now.
                tokens = [
                    pnode.tok for pnode in children[1:-1]
                    if pnode.tok.id == Id.Lit_Chars
                ]
                words = [
                    word.CompoundWord([word_part.LiteralPart(t)])
                    for t in tokens
                ]  # type: List[word_t]
                return expr.CommandSub(left_tok, command.SimpleCommand(words))

            elif typ == grammar_nt.expr_sub:
                left_tok = children[0].tok

                return expr.ExprSub(left_tok, self.Expr(children[1]))

            elif typ == grammar_nt.var_sub:
                left_tok = children[0].tok

                return expr.VarSub(left_tok, self.Expr(children[1]))

            elif typ == grammar_nt.dq_string:
                left_tok = children[0].tok

                tokens = [
                    pnode.tok for pnode in children[1:-1]
                    if pnode.tok.id == Id.Lit_Chars
                ]
                parts2 = [oil_word_part.Literal(t)
                          for t in tokens]  # type: List[oil_word_part_t]
                return expr.DoubleQuoted(left_tok, parts2)

            else:
                nt_name = self.number2symbol[typ]
                raise AssertionError("PNode type %d (%s) wasn't handled" %
                                     (typ, nt_name))

        else:  # Terminals should have a token
            #log('terminal %s', tok)

            if tok.id == Id.Expr_Name:
                return expr.Var(tok)
            elif tok.id == Id.Expr_Digits:
                return expr.Const(tok)

            else:
                raise AssertionError(tok.id)