Esempio n. 1
0
def _ThreeArgs(w_parser):
    # type: (_StringWordEmitter) -> bool_expr_t
    """Returns an expression tree to be evaluated."""
    w0 = w_parser.Read()
    w1 = w_parser.Read()
    w2 = w_parser.Read()

    # NOTE: Order is important here.

    binary_id = match.BracketBinary(w1.s)
    if binary_id != Id.Undefined_Tok:
        return bool_expr.Binary(binary_id, w0, w2)

    if w1.s == '-a':
        return bool_expr.LogicalAnd(bool_expr.WordTest(w0),
                                    bool_expr.WordTest(w2))

    if w1.s == '-o':
        return bool_expr.LogicalOr(bool_expr.WordTest(w0),
                                   bool_expr.WordTest(w2))

    if w0.s == '!':
        w_parser.Rewind(2)
        child = _TwoArgs(w_parser)
        return bool_expr.LogicalNot(child)

    if w0.s == '(' and w2.s == ')':
        return bool_expr.WordTest(w1)

    p_die('Expected binary operator, got %r (3 args)', w1.s, word=w1)
Esempio n. 2
0
 def _ParseStep(self):
     # type: () -> int
     self._Next()  # past Dots
     step = int(self._Eat(Id.Range_Int))
     if step == 0:
         p_die("Step can't be 0", span_id=self.span_id)
     return step
Esempio n. 3
0
def LeftIndex(p, w, left, unused_bp):
    # type: (TdopParser, word_t, arith_expr_t, int) -> arith_expr_t
    """Array indexing, in both LValue and RValue context.

  LValue: f[0] = 1  f[x+1] = 2
  RValue: a = f[0]  b = f[x+1]

  On RHS, you can have:
  1. a = f[0]
  2. a = f(x, y)[0]
  3. a = f[0][0]  # in theory, if we want character indexing?
     NOTE: a = f[0].charAt() is probably better

  On LHS, you can only have:
  1. a[0] = 1

  Nothing else is valid:
  2. function calls return COPIES.  They need a name, at least in osh.
  3. strings don't have mutable characters.
  """
    if not tdop.IsIndexable(left, p.parse_opts.parse_dynamic_arith()):
        p_die("The [ operator doesn't apply to this expression", word=w)
    index = p.ParseUntil(0)  # ] has bp = -1
    p.Eat(Id.Arith_RBracket)

    return arith_expr.Binary(word_.ArithId(w), left, index)
Esempio n. 4
0
  def _NameInRegex(self, negated_tok, tok):
    # type: (Token, Token) -> re_t

    if negated_tok:  # For error messages
      negated_speck = speck(negated_tok.id, negated_tok.span_id)
    else:
      negated_speck = None

    val = tok.val
    if val == 'dot':
      if negated_tok:
        p_die("Can't negate this symbol", token=tok)
      return tok

    if val in POSIX_CLASSES:
      return posix_class(negated_speck, val)

    perl = PERL_CLASSES.get(val)
    if perl is not None:
      return perl_class(negated_speck, perl)

    if val[0].isupper():  # e.g. HexDigit
      return re.Splice(tok)

    p_die("%r isn't a character class", val, token=tok)
Esempio n. 5
0
 def _PlaceList(self, p_node):
     # type: (PNode) -> List[place_expr_t]
     """
 place_list: expr (',' expr)*
 """
     assert p_node.typ == grammar_nt.place_list
     places = []  # type: List[place_expr_t]
     n = len(p_node.children)
     for i in xrange(0, n, 2):  # was children[::2]
         p = p_node.children[i]
         e = self.Expr(p)
         UP_e = e
         tag = e.tag_()
         if tag == expr_e.Var:  # COMPATIBILITY hack
             e = cast(expr__Var, UP_e)
             places.append(place_expr.Var(e.name))
         elif tag in (place_expr_e.Var, place_expr_e.Subscript,
                      place_expr_e.Attribute):
             places.append(cast(place_expr_t, UP_e))
         else:
             # This blame mechanism seems to work.  Otherwise we don't have a method
             # to blame an arbitrary expr_t.
             p_die("Can't assign to this expression",
                   token=p.tok if p.tok else None)
     return places
Esempio n. 6
0
  def _NameInClass(self, negated_tok, tok):
    # type: (Token, Token) -> class_literal_term_t
    """
    Like the above, but 'dot' doesn't mean anything.  And `d` is a literal 'd',
    not `digit`.
    """
    if negated_tok:  # For error messages
      negated_speck = speck(negated_tok.id, negated_tok.span_id)
    else:
      negated_speck = None

    val = tok.val

    # A bare, unquoted character literal.  In the grammar, this is expressed as
    # range_char without an ending.

    # d is NOT 'digit', it's a literal 'd'!
    if len(val) == 1:
      # Expr_Name matches VAR_NAME_RE, which starts with [a-zA-Z_]
      assert tok.id in (Id.Expr_Name, Id.Expr_DecInt)

      if negated_tok:  # [~d] is not allowed, only [~digit]
        p_die("Can't negate this symbol", token=tok)
      return class_literal_term.CharLiteral(tok)

    # digit, word, but not d, w, etc.
    if val in POSIX_CLASSES:
      return posix_class(negated_speck, val)

    perl = PERL_CLASSES.get(val)
    if perl is not None:
      return perl_class(negated_speck, perl)
    p_die("%r isn't a character class", val, token=tok)
Esempio n. 7
0
  def ParseUntil(self, rbp):
    # type: (int) -> arith_expr_t
    """
    Parse to the right, eating tokens until we encounter a token with binding
    power LESS THAN OR EQUAL TO rbp.
    """
    # TODO: use Kind.Eof
    if self.op_id in (Id.Eof_Real, Id.Eof_RParen, Id.Eof_Backtick):
      p_die('Unexpected end of input', word=self.cur_word)

    t = self.cur_word
    null_info = self.spec.LookupNud(self.op_id)

    self.Next()  # skip over the token, e.g. ! ~ + -
    node = null_info.nud(self, t, null_info.bp)

    while True:
      t = self.cur_word
      left_info = self.spec.LookupLed(self.op_id)

      # Examples:
      # If we see 1*2+  , rbp = 27 and lbp = 25, so stop.
      # If we see 1+2+  , rbp = 25 and lbp = 25, so stop.
      # If we see 1**2**, rbp = 26 and lbp = 27, so keep going.
      if rbp >= left_info.lbp:
        break
      self.Next()  # skip over the token, e.g. / *

      node = left_info.led(self, t, node, left_info.rbp)

    return node
Esempio n. 8
0
def _TwoArgs(w_parser):
    # type: (_StringWordEmitter) -> bool_expr_t
    """Returns an expression tree to be evaluated."""
    w0 = w_parser.Read()
    w1 = w_parser.Read()

    s0 = w0.s
    if s0 == '!':
        return bool_expr.LogicalNot(bool_expr.WordTest(w1))

    unary_id = Id.Undefined_Tok

    # Oil's preferred long flags
    if w0.s.startswith('--'):
        if s0 == '--dir':
            unary_id = Id.BoolUnary_d
        elif s0 == '--exists':
            unary_id = Id.BoolUnary_e
        elif s0 == '--file':
            unary_id = Id.BoolUnary_f
        elif s0 == '--symlink':
            unary_id = Id.BoolUnary_L

    if unary_id == Id.Undefined_Tok:
        unary_id = match.BracketUnary(w0.s)

    if unary_id == Id.Undefined_Tok:
        p_die('Expected unary operator, got %r (2 args)', w0.s, word=w0)

    return bool_expr.Unary(unary_id, w1)
Esempio n. 9
0
 def Eat(self, token_type):
   # type: (Id_t) -> None
   """Assert that we're at the current token and advance."""
   if not self.AtToken(token_type):
     p_die('Parser expected %s, got %s',
           ui.PrettyId(token_type), ui.PrettyId(self.op_id),
           word=self.cur_word)
   self.Next()
Esempio n. 10
0
  def ParseForBuiltin(self):
    # type: () -> bool_expr_t
    """For test builtin."""
    self._Next()

    node = self.ParseExpr()
    if self.bool_id != Id.Eof_Real:
      p_die('Unexpected trailing word %s', word_.Pretty(self.cur_word),
          word=self.cur_word)

    return node
Esempio n. 11
0
    def Parse(self):
        # type: () -> bool_expr_t
        self._Next()

        node = self.ParseExpr()
        if self.op_id != Id.Lit_DRightBracket:
            #p_die("Expected ]], got %r", self.cur_word, word=self.cur_word)
            # NOTE: This might be better as unexpected token, since ]] doesn't always
            # make sense.
            p_die('Expected ]]', word=self.cur_word)
        return node
Esempio n. 12
0
    def _ParseFormatStr(self):
        # type: () -> printf_part_t
        self._Next(lex_mode_e.PrintfPercent)  # move past %

        part = printf_part.Percent()
        while self.token_type in (Id.Format_Flag, Id.Format_Zero):
            # space and + could be implemented
            flag = self.cur_token.val
            if flag in '# +':
                p_die("osh printf doesn't support the %r flag",
                      flag,
                      token=self.cur_token)

            part.flags.append(self.cur_token)
            self._Next(lex_mode_e.PrintfPercent)

        if self.token_type in (Id.Format_Num, Id.Format_Star):
            part.width = self.cur_token
            self._Next(lex_mode_e.PrintfPercent)

        if self.token_type == Id.Format_Dot:
            part.precision = self.cur_token
            self._Next(lex_mode_e.PrintfPercent)  # past dot
            if self.token_type in (Id.Format_Num, Id.Format_Star,
                                   Id.Format_Zero):
                part.precision = self.cur_token
                self._Next(lex_mode_e.PrintfPercent)

        if self.token_type in (Id.Format_Type, Id.Format_Time):
            part.type = self.cur_token

            # ADDITIONAL VALIDATION outside the "grammar".
            if part.type.val in 'eEfFgG':
                p_die("osh printf doesn't support floating point",
                      token=part.type)
            # These two could be implemented.  %c needs utf-8 decoding.
            if part.type.val == 'c':
                p_die("osh printf doesn't support single characters (bytes)",
                      token=part.type)

        else:
            if self.cur_token.val:
                msg = 'Invalid printf format character'
            else:  # for printf '%'
                msg = 'Expected a printf format character'
            p_die(msg, token=self.cur_token)

        # Do this check AFTER the floating point checks
        if part.precision and part.type.val[-1] not in 'fsT':
            p_die("printf precision can't be specified with type %r" %
                  part.type.val,
                  token=part.precision)

        return part
Esempio n. 13
0
def _TwoArgs(w_parser):
    # type: (_StringWordEmitter) -> bool_expr_t
    """Returns an expression tree to be evaluated."""
    w0 = w_parser.Read()
    w1 = w_parser.Read()
    if w0.s == '!':
        return bool_expr.LogicalNot(bool_expr.WordTest(w1))
    unary_id = match.BracketUnary(w0.s)
    if unary_id == -1:
        # TODO:
        # - separate lookup by unary
        p_die('Expected unary operator, got %r (2 args)', w0.s, word=w0)
    return bool_expr.Unary(unary_id, w1)
Esempio n. 14
0
    def _ParseFormatStr(self):
        # type: () -> printf_part_t
        """ fmt production """
        self._Next(lex_mode_e.PrintfPercent)  # move past %

        part = printf_part.Percent()
        while self.token_type in (Id.Format_Flag, Id.Format_Zero):
            # space and + could be implemented
            flag = self.cur_token.val
            if flag in '# +':
                p_die("osh printf doesn't support the %r flag",
                      flag,
                      token=self.cur_token)

            part.flags.append(self.cur_token)
            self._Next(lex_mode_e.PrintfPercent)

        if self.token_type in (Id.Format_Num, Id.Format_Star):
            part.width = self.cur_token
            self._Next(lex_mode_e.PrintfPercent)

        if self.token_type == Id.Format_Dot:
            part.precision = self.cur_token
            self._Next(lex_mode_e.PrintfPercent)  # past dot
            if self.token_type in (Id.Format_Num, Id.Format_Star,
                                   Id.Format_Zero):
                part.precision = self.cur_token
                self._Next(lex_mode_e.PrintfPercent)

        if self.token_type in (Id.Format_Type, Id.Format_Time):
            part.type = self.cur_token

            # ADDITIONAL VALIDATION outside the "grammar".
            if part.type.val in 'eEfFgG':
                p_die("osh printf doesn't support floating point",
                      token=part.type)
            # These two could be implemented.  %c needs utf-8 decoding.
            if part.type.val == 'c':
                p_die("osh printf doesn't support single characters (bytes)",
                      token=part.type)

        elif self.token_type == Id.Unknown_Tok:
            p_die('Invalid printf format character', token=self.cur_token)

        else:
            p_die('Expected a printf format character', token=self.cur_token)

        return part
Esempio n. 15
0
def _TwoArgs(w_parser):
    # type: (_StringWordEmitter) -> bool_expr_t
    """Returns an expression tree to be evaluated."""
    w0 = w_parser.Read()
    # TODO: Implement --dir, --file, --exists here
    # --symlink, maybe --executable

    w1 = w_parser.Read()
    if w0.s == '!':
        return bool_expr.LogicalNot(bool_expr.WordTest(w1))
    unary_id = match.BracketUnary(w0.s)
    if unary_id == Id.Undefined_Tok:
        # TODO:
        # - separate lookup by unary
        p_die('Expected unary operator, got %r (2 args)', w0.s, word=w0)
    return bool_expr.Unary(unary_id, w1)
Esempio n. 16
0
  def _Tuple(self, children):
    # type: (List[PNode]) -> expr_t

    n = len(children)

    # (x) -- not a tuple
    if n == 1:
      return self.Expr(children[0])

    # x, and (x,) aren't allowed
    if n == 2:
      p_die('Write singleton tuples with tup(), not a trailing comma',
            token=children[1].tok)

    elts = []  # type: List[expr_t]
    for i in xrange(0, n, 2):  # skip commas
      p_node = children[i]
      elts.append(self.Expr(p_node))

    return expr.Tuple(elts, expr_context_e.Store)  # unused expr_context_e
Esempio n. 17
0
def _Classify(gr, tok):
  # type: (Grammar, Token) -> int

  # We have to match up what ParserGenerator.make_grammar() did when
  # calling make_label() and make_first().  See classify() in
  # opy/pgen2/driver.py.

  # 'x' and 'for' are both tokenized as Expr_Name.  This handles the 'for'
  # case.
  if tok.id == Id.Expr_Name:
    if tok.val in gr.keywords:
      return gr.keywords[tok.val]

  # This handles 'x'.
  typ = tok.id
  if typ in gr.tokens:
    return gr.tokens[typ]

  type_str = '' if tok.id == Id.Unknown_Tok else (' (%s)' % ui.PrettyId(tok.id))
  p_die('Unexpected token in expression mode%s', type_str, token=tok)
Esempio n. 18
0
def CheckLhsExpr(node, dynamic_arith, blame_word):
  # type: (arith_expr_t, bool, word_t) -> None
  """Determine if a node is a valid L-value by whitelisting tags.

  Valid:
    x = y
    a[1] = y
  Invalid:
    a[0][0] = y
  """
  UP_node = node
  if node.tag_() == arith_expr_e.Binary:
    node = cast(arith_expr__Binary, UP_node)
    if node.op_id == Id.Arith_LBracket and _VarRefOrWord(node.left, dynamic_arith):
      return
    # But a[0][0] = 1 is NOT valid.

  if _VarRefOrWord(node, dynamic_arith):
    return

  p_die("Left-hand side of this assignment is invalid", word=blame_word)
Esempio n. 19
0
    def ParseVarDecl(self, kw_token, lexer):
        # type: (Token, Lexer) -> Tuple[command__VarDecl, Token]
        """ var mylist = [1, 2, 3] """

        # TODO: We do need re-entrancy for var x = @[ (1+2) ] and such
        if self.parsing_expr:
            p_die("ShAssignment expression can't be nested like this",
                  token=kw_token)

        self.parsing_expr = True
        try:
            pnode, last_token = self.e_parser.Parse(lexer,
                                                    grammar_nt.oil_var_decl)
        finally:
            self.parsing_expr = False

        if 0:
            self.p_printer.Print(pnode)

        ast_node = self.tr.MakeVarDecl(pnode)
        ast_node.keyword = kw_token  # VarDecl didn't fill this in
        return ast_node, last_token
Esempio n. 20
0
  def Parse(self, lexer, start_symbol):
    # type: (Lexer, int) -> Tuple[PNode, Token]

    # Reuse the parser
    self.push_parser.setup(start_symbol)
    try:
      last_token = _PushOilTokens(self.parse_ctx, self.gr, self.push_parser,
                                  lexer)
    except parse.ParseError as e:
      #log('ERROR %s', e)
      # TODO:
      # - Describe what lexer mode we're in (Invalid syntax in regex)
      #   - Maybe say where the mode started
      # - Id.Unknown_Tok could say "This character is invalid"

      # ParseError has a "too much input" case but I haven't been able to
      # tickle it.  Mabye it's because of the Eof tokens?

      p_die('Syntax error in expression (near %s)', ui.PrettyId(e.tok.id),
            token=e.tok)
      #raise error.Parse('Syntax error in expression', token=e.tok)

    return self.push_parser.rootnode, last_token
Esempio n. 21
0
def Parse(lexer):
    # type: (Lexer) -> List[Token]
    """Given a QSN literal in a string, return the corresponding byte string.

  Grammar:
      qsn = SingleQuote Kind.Char* SingleQuote Whitespace? Eof_Real
  """
    tok = lexer.Read(lex_mode_e.QSN)
    # Caller ensures this.  It's really a left single quote.
    assert tok.id == Id.Right_SingleQuote

    result = []  # type: List[Token]
    while True:
        tok = lexer.Read(lex_mode_e.QSN)
        #log('tok = %s', tok)

        if tok.id == Id.Unknown_Tok:  # extra error
            p_die('Unexpected token in QSN string', token=tok)

        kind = consts.GetKind(tok.id)
        if kind != Kind.Char:
            break

        result.append(tok)

    if tok.id != Id.Right_SingleQuote:
        p_die('Expected closing single quote in QSN string', token=tok)

    # HACK: read in shell's SQ_C mode to get whitespace, which is disallowe
    # INSIDE QSN.  This gets Eof_Real too.
    tok = lexer.Read(lex_mode_e.SQ_C)

    # Doesn't work because we want to allow literal newlines / tabs
    if tok.id == Id.Char_Literals:
        if not _IsWhitespace(tok.val):
            p_die("Unexpected data after closing quote", token=tok)
        tok = lexer.Read(lex_mode_e.QSN)

    if tok.id != Id.Eof_Real:
        p_die('Unexpected token after QSN string', token=tok)

    return result
Esempio n. 22
0
    def _RangeChar(self, p_node):
        # type: (PNode) -> str
        """Evaluate a range endpoints.
    - the 'a' in 'a'-'z'
    - the \x00 in \x00-\x01
    etc.

    TODO: This function doesn't respect the LST invariant.
    """
        assert p_node.typ == grammar_nt.range_char, p_node
        children = p_node.children
        typ = children[0].typ
        if ISNONTERMINAL(typ):
            # 'a' in 'a'-'b'
            if typ == grammar_nt.sq_string:
                sq_part = cast(single_quoted, children[0].children[1].tok)
                tokens = sq_part.tokens
                if len(
                        tokens
                ) > 1:  # Can happen with multiline single-quoted strings
                    p_die(RANGE_POINT_TOO_LONG, part=sq_part)
                if len(tokens[0].val) > 1:
                    p_die(RANGE_POINT_TOO_LONG, part=sq_part)
                s = tokens[0].val[0]
                return s

            if typ == grammar_nt.char_literal:
                raise AssertionError('TODO')
                # TODO: This brings in a lot of dependencies, and this type checking
                # errors.  We want to respect the LST invariant anyway.

                #from osh import word_compile
                #tok = children[0].children[0].tok
                #s = word_compile.EvalCStringToken(tok.id, tok.val)
                #return s

            raise NotImplementedError()
        else:
            # Expr_Name or Expr_DecInt
            tok = p_node.tok
            if tok.id in (Id.Expr_Name, Id.Expr_DecInt):
                # For the a in a-z, 0 in 0-9
                if len(tok.val) != 1:
                    p_die(RANGE_POINT_TOO_LONG, token=tok)
                return tok.val[0]

            raise NotImplementedError()
Esempio n. 23
0
 def _GetLine(self):
   # type: () -> Optional[str]
   p_die("Here docs aren't allowed in expressions", token=self.blame_token)
Esempio n. 24
0
  def _ReAtom(self, p_atom):
    # type: (PNode) -> re_t
    """
    re_atom: (
        char_literal
    """
    assert p_atom.typ == grammar_nt.re_atom, p_atom.typ

    children = p_atom.children
    typ = children[0].typ

    if ISNONTERMINAL(typ):
      p_child = p_atom.children[0]
      if typ == grammar_nt.class_literal:
        return re.ClassLiteral(False, self._ClassLiteral(p_child))

      if typ == grammar_nt.braced_var_sub:
        return cast(braced_var_sub, p_child.children[1].tok)

      if typ == grammar_nt.dq_string:
        return cast(double_quoted, p_child.children[1].tok)

      if typ == grammar_nt.sq_string:
        return cast(single_quoted, p_child.children[1].tok)

      if typ == grammar_nt.simple_var_sub:
        return simple_var_sub(children[0].tok)

      if typ == grammar_nt.char_literal:
        return children[0].tok

      raise NotImplementedError(typ)

    else:
      tok = children[0].tok

      # Special punctuation
      if tok.id in (Id.Expr_Dot, Id.Arith_Caret, Id.Expr_Dollar):
        return speck(tok.id, tok.span_id)

      # TODO: d digit can turn into PosixClass and PerlClass right here!
      # It's parsing.
      if tok.id == Id.Expr_Name:
        return self._NameInRegex(None, tok)

      if tok.id == Id.Expr_Symbol:
        # Validate symbols here, like we validate PerlClass, etc.
        if tok.val in ('%start', '%end', 'dot'):
          return tok
        p_die("Unexpected token %r in regex", tok.val, token=tok)

      if tok.id == Id.Expr_At:
        # | '@' Expr_Name
        return re.Splice(children[1].tok)

      if tok.id == Id.Arith_Tilde:
        # | '~' [Expr_Name | class_literal]
        typ = children[1].typ
        if ISNONTERMINAL(typ):
          return re.ClassLiteral(True, self._ClassLiteral(children[1]))
        else:
          return self._NameInRegex(tok, children[1].tok)

      if tok.id == Id.Op_LParen:
        # | '(' regex ')'

        # Note: in ERE (d+) is the same as <d+>.  That is, Group becomes
        # Capture.
        return re.Group(self._Regex(children[1]))

      if tok.id == Id.Arith_Less:
        # | '<' regex [':' name_type] '>'

        regex = self._Regex(children[1])

        n = len(children)
        if n == 5:
          # TODO: Add type expression
          # YES
          #   < d+ '.' d+ : ratio Float >
          #   < d+ : month Int >
          # INVALID
          #   < d+ : month List[int] >
          name_tok = children[3].children[0].tok
        else:
          name_tok = None

        return re.Capture(regex, name_tok)

      if tok.id == Id.Arith_Colon:
        # | ':' '(' regex ')'
        raise NotImplementedError(Id_str(tok.id))

      raise NotImplementedError(Id_str(tok.id))
Esempio n. 25
0
    def ParseFactor(self):
        # type: () -> bool_expr_t
        """
    Factor  : WORD
            | UNARY_OP WORD
            | WORD BINARY_OP WORD
            | '(' Expr ')'
    """
        if self.b_kind == Kind.BoolUnary:
            # Just save the type and not the token itself?
            op = self.op_id
            self._Next()
            w = self.cur_word
            # e.g. [[ -f < ]].  But [[ -f '<' ]] is OK

            tag = w.tag_()
            if tag != word_e.Compound and tag != word_e.String:
                p_die('Invalid argument to unary operator', word=w)
            self._Next()
            node = bool_expr.Unary(op, w)  # type: bool_expr_t
            return node

        if self.b_kind == Kind.Word:
            # Peek ahead another token.
            t2 = self._LookAhead()
            t2_op_id = word_.BoolId(t2)
            t2_b_kind = consts.GetKind(t2_op_id)

            #log('t2 %s / t2_op_id %s / t2_b_kind %s', t2, t2_op_id, t2_b_kind)
            # Op for < and >, -a and -o pun
            if t2_b_kind == Kind.BoolBinary or t2_op_id in (Id.Op_Less,
                                                            Id.Op_Great):
                left = self.cur_word

                self._Next()
                op = self.op_id

                # TODO: Need to change to lex_mode_e.BashRegex.
                # _Next(lex_mode) then?
                is_regex = t2_op_id == Id.BoolBinary_EqualTilde
                if is_regex:
                    self._Next(lex_mode=lex_mode_e.BashRegex)
                else:
                    self._Next()

                right = self.cur_word
                if is_regex:
                    # NOTE: StaticEval for checking regex syntax isn't enough.  We could
                    # need to pass do_ere so that the quoted parts get escaped.
                    #ok, s, unused_quoted = word_.StaticEval(right)
                    pass

                self._Next()
                return bool_expr.Binary(op, left, right)
            else:
                # [[ foo ]]
                w = self.cur_word
                self._Next()
                return bool_expr.WordTest(w)

        if self.op_id == Id.Op_LParen:
            self._Next()
            node = self.ParseExpr()
            if self.op_id != Id.Op_RParen:
                p_die('Expected ), got %s',
                      word_.Pretty(self.cur_word),
                      word=self.cur_word)
            self._Next()
            return node

        # It's not WORD, UNARY_OP, or '('
        p_die('Unexpected token in boolean expression', word=self.cur_word)
Esempio n. 26
0
    def Parse(self):
        # type: () -> word_part__BracedRange
        self._Next()
        if self.token_type == Id.Range_Int:
            part = self._ParseRange(self.token_type)

            # Check step validity and fill in a default
            start = int(part.start)
            end = int(part.end)
            if start < end:
                if part.step == NO_STEP:
                    part.step = 1
                if part.step <= 0:  # 0 step is not allowed
                    p_die('Invalid step %d for ascending integer range',
                          part.step,
                          span_id=self.span_id)
            elif start > end:
                if part.step == NO_STEP:
                    part.step = -1
                if part.step >= 0:  # 0 step is not allowed
                    p_die('Invalid step %d for descending integer range',
                          part.step,
                          span_id=self.span_id)
            else:
                # {1..1}  singleton range is dumb but I suppose consistent
                part.step = 1

        elif self.token_type == Id.Range_Char:
            part = self._ParseRange(self.token_type)

            # Compare integers because mycpp doesn't support < on strings!
            start_num = ord(part.start[0])
            end_num = ord(part.end[0])

            # Check step validity and fill in a default
            if start_num < end_num:
                if part.step == NO_STEP:
                    part.step = 1
                if part.step <= 0:  # 0 step is not allowed
                    p_die('Invalid step %d for ascending character range',
                          part.step,
                          span_id=self.span_id)
            elif start_num > end_num:
                if part.step == NO_STEP:
                    part.step = -1
                if part.step >= 0:  # 0 step is not allowed
                    p_die('Invalid step %d for descending character range',
                          part.step,
                          span_id=self.span_id)
            else:
                # {a..a}  singleton range is dumb but I suppose consistent
                part.step = 1

            # Check matching cases
            upper1 = part.start.isupper()
            upper2 = part.end.isupper()
            if upper1 != upper2:
                p_die('Mismatched cases in character range',
                      span_id=self.span_id)

        else:
            raise _NotARange('')

        # prevent unexpected trailing tokens
        self._Eat(Id.Eol_Tok)
        return part
Esempio n. 27
0
def _PushOilTokens(parse_ctx, gr, p, lex):
    # type: (ParseContext, Grammar, parse.Parser, Lexer) -> Token
    """Push tokens onto pgen2's parser.

  Returns the last token so it can be reused/seen by the CommandParser.
  """
    #log('keywords = %s', gr.keywords)
    #log('tokens = %s', gr.tokens)

    last_token = None  # type: Optional[Token]
    prev_was_newline = False

    balance = 0  # to ignore newlines

    while True:
        if last_token:  # e.g. left over from WordParser
            tok = last_token
            #log('last_token = %s', last_token)
            last_token = None
        else:
            tok = lex.Read(lex_mode_e.Expr)
            #log('tok = %s', tok)

        # Comments and whitespace.  Newlines aren't ignored.
        if consts.GetKind(tok.id) == Kind.Ignored:
            continue

        # For multiline lists, maps, etc.
        if tok.id == Id.Op_Newline:
            if balance > 0:
                #log('*** SKIPPING NEWLINE')
                continue
            # Eliminate duplicate newline tokens.  It makes the grammar simpler, and
            # it's consistent with CPython's lexer and our own WordParser.
            if prev_was_newline:
                continue
            prev_was_newline = True
        else:
            prev_was_newline = False

        balance += _OTHER_BALANCE.get(tok.id, 0)
        #log('BALANCE after seeing %s = %d', tok.id, balance)

        #if tok.id == Id.Expr_Name and tok.val in KEYWORDS:
        #  tok.id = KEYWORDS[tok.val]
        #  log('Replaced with %s', tok.id)

        assert tok.id < 256, Id_str(tok.id)

        ilabel = _Classify(gr, tok)
        #log('tok = %s, ilabel = %d', tok, ilabel)

        if p.addtoken(tok.id, tok, ilabel):
            return tok

        #
        # Mututally recursive calls into the command/word parsers.
        #

        if mylib.PYTHON:
            if tok.id == Id.Left_PercentParen:  # %(
                left_tok = tok
                lex.PushHint(Id.Op_RParen, Id.Right_ShArrayLiteral)

                # Blame the opening token
                line_reader = reader.DisallowedLineReader(parse_ctx.arena, tok)
                w_parser = parse_ctx.MakeWordParser(lex, line_reader)
                words = []
                close_tok = None  # type: Optional[Token]
                while True:
                    w = w_parser.ReadWord(lex_mode_e.ShCommand)
                    if 0:
                        log('w = %s', w)

                    if w.tag_() == word_e.Token:
                        tok = cast(Token, w)
                        if tok.id == Id.Right_ShArrayLiteral:
                            close_tok = tok
                            break
                        elif tok.id == Id.Op_Newline:  # internal newlines allowed
                            continue
                        else:
                            # Token
                            p_die('Unexpected token in array literal: %r',
                                  tok.val,
                                  word=w)

                    assert isinstance(w, compound_word)  # for MyPy
                    words.append(w)

                words2 = braces.BraceDetectAll(words)
                words3 = word_.TildeDetectAll(words2)

                typ = Id.Expr_CastedDummy

                lit_part = sh_array_literal(left_tok, words3)
                opaque = cast(Token, lit_part)  # HACK for expr_to_ast
                done = p.addtoken(typ, opaque, gr.tokens[typ])
                assert not done  # can't end the expression

                # Now push the closing )
                ilabel = _Classify(gr, close_tok)
                done = p.addtoken(tok.id, close_tok, ilabel)
                assert not done  # can't end the expression

                continue

            # $(  @(  &(
            if tok.id in (Id.Left_DollarParen, Id.Left_AtParen,
                          Id.Left_AmpParen):

                left_token = tok

                lex.PushHint(Id.Op_RParen, Id.Eof_RParen)
                line_reader = reader.DisallowedLineReader(parse_ctx.arena, tok)
                c_parser = parse_ctx.MakeParserForCommandSub(
                    line_reader, lex, Id.Eof_RParen)
                node = c_parser.ParseCommandSub()
                # A little gross: Copied from osh/word_parse.py
                right_token = c_parser.w_parser.cur_token

                cs_part = command_sub(left_token, node)
                cs_part.spids.append(left_token.span_id)
                cs_part.spids.append(right_token.span_id)

                typ = Id.Expr_CastedDummy
                opaque = cast(Token, cs_part)  # HACK for expr_to_ast
                done = p.addtoken(typ, opaque, gr.tokens[typ])
                assert not done  # can't end the expression

                # Now push the closing )
                ilabel = _Classify(gr, right_token)
                done = p.addtoken(right_token.id, right_token, ilabel)
                assert not done  # can't end the expression

                continue

            if tok.id == Id.Left_DoubleQuote:
                left_token = tok
                line_reader = reader.DisallowedLineReader(parse_ctx.arena, tok)
                w_parser = parse_ctx.MakeWordParser(lex, line_reader)

                parts = []  # type: List[word_part_t]
                last_token = w_parser.ReadDoubleQuoted(left_token, parts)
                expr_dq_part = double_quoted(left_token, parts)

                typ = Id.Expr_CastedDummy
                opaque = cast(Token, expr_dq_part)  # HACK for expr_to_ast
                done = p.addtoken(typ, opaque, gr.tokens[typ])
                assert not done  # can't end the expression

                continue

            if tok.id == Id.Left_DollarBrace:
                left_token = tok
                line_reader = reader.DisallowedLineReader(parse_ctx.arena, tok)
                w_parser = parse_ctx.MakeWordParser(lex, line_reader)

                part, last_token = w_parser.ReadBracedVarSub(left_token)

                # It's casted word_part__BracedVarSub -> dummy -> expr__BracedVarSub!
                typ = Id.Expr_CastedDummy
                opaque = cast(Token, part)  # HACK for expr_to_ast
                done = p.addtoken(typ, opaque, gr.tokens[typ])
                assert not done  # can't end the expression

                continue

            # '' and r'' and c''
            if tok.id in (Id.Left_SingleQuote, Id.Left_RSingleQuote,
                          Id.Left_CSingleQuote):
                if tok.id == Id.Left_CSingleQuote:
                    sq_mode = lex_mode_e.SQ_C
                else:
                    sq_mode = lex_mode_e.SQ_Raw

                left_token = tok
                line_reader = reader.DisallowedLineReader(parse_ctx.arena, tok)
                w_parser = parse_ctx.MakeWordParser(lex, line_reader)

                tokens = []  # type: List[Token]
                last_token = w_parser.ReadSingleQuoted(sq_mode, left_token,
                                                       tokens, True)

                sq_part = single_quoted(left_token, tokens)

                typ = Id.Expr_CastedDummy
                opaque = cast(Token, sq_part)  # HACK for expr_to_ast
                done = p.addtoken(typ, opaque, gr.tokens[typ])
                assert not done  # can't end the expression
                continue

    else:
        # We never broke out -- EOF is too soon (how can this happen???)
        raise parse.ParseError("incomplete input", tok.id, tok)
Esempio n. 28
0
def LeftError(p, t, left, rbp):
  # type: (TdopParser, word_t, arith_expr_t, int) -> arith_expr_t
  # Hm is this not called because of binding power?
  p_die("Token can't be used in infix position", word=t)
  return None  # never reached
Esempio n. 29
0
    def DoRedirect(self, node, local_symbols):
        #print(node, file=sys.stderr)
        op_spid = node.op.span_id
        op_id = node.op.id
        self.cursor.PrintUntil(op_spid)

        # TODO:
        # - Do < and <& the same way.
        # - How to handle here docs and here docs?
        # - >> becomes >+ or >-, or maybe >>>

        #if node.tag == redir_e.Redir:
        if False:
            if node.fd == runtime.NO_SPID:
                if op_id == Id.Redir_Great:
                    self.f.write('>')  # Allow us to replace the operator
                    self.cursor.SkipUntil(op_spid + 1)
                elif op_id == Id.Redir_GreatAnd:
                    self.f.write('> !')  # Replace >& 2 with > !2
                    spid = word_.LeftMostSpanForWord(node.arg_word)
                    self.cursor.SkipUntil(spid)
                    #self.DoWordInCommand(node.arg_word)

            else:
                # NOTE: Spacing like !2>err.txt vs !2 > err.txt can be done in the
                # formatter.
                self.f.write('!%d ' % node.fd)
                if op_id == Id.Redir_Great:
                    self.f.write('>')
                    self.cursor.SkipUntil(op_spid + 1)
                elif op_id == Id.Redir_GreatAnd:
                    self.f.write('> !')  # Replace 1>& 2 with !1 > !2
                    spid = word_.LeftMostSpanForWord(node.arg_word)
                    self.cursor.SkipUntil(spid)

            self.DoWordInCommand(node.arg_word, local_symbols)

        #elif node.tag == redir_e.HereDoc:
        elif False:
            ok, delimiter, delim_quoted = word_.StaticEval(node.here_begin)
            if not ok:
                p_die('Invalid here doc delimiter', word=node.here_begin)

            # Turn everything into <<.  We just change the quotes
            self.f.write('<<')

            #here_begin_spid2 = word_.RightMostSpanForWord(node.here_begin)
            if delim_quoted:
                self.f.write(" '''")
            else:
                self.f.write(' """')

            delim_end_spid = word_.RightMostSpanForWord(node.here_begin)
            self.cursor.SkipUntil(delim_end_spid + 1)

            #self.cursor.SkipUntil(here_begin_spid + 1)

            # Now print the lines.  TODO: Have a flag to indent these to the level of
            # the owning command, e.g.
            #   cat <<EOF
            # EOF
            # Or since most here docs are the top level, you could just have a hack
            # for a fixed indent?  TODO: Look at real use cases.
            for part in node.stdin_parts:
                self.DoWordPart(part, local_symbols)

            self.cursor.SkipUntil(node.here_end_span_id + 1)
            if delim_quoted:
                self.f.write("'''\n")
            else:
                self.f.write('"""\n')

            # Need
            #self.cursor.SkipUntil(here_end_spid2)

        else:
            raise AssertionError(node.__class__.__name__)

        # <<< 'here word'
        # << 'here word'
        #
        # 2> out.txt
        # !2 > out.txt

        # cat 1<< EOF
        # hello $name
        # EOF
        # cat !1 << """
        # hello $name
        # """
        #
        # cat << 'EOF'
        # no expansion
        # EOF
        #   cat <<- 'EOF'
        #   no expansion and indented
        #
        # cat << '''
        # no expansion
        # '''
        #   cat << '''
        #   no expansion and indented
        #   '''

        # Warn about multiple here docs on a line.
        # As an obscure feature, allow
        # cat << \'ONE' << \"TWO"
        # 123
        # ONE
        # 234
        # TWO
        # The _ is an indicator that it's not a string to be piped in.
        pass
Esempio n. 30
0
def NullError(p, t, bp):
  # type: (TdopParser, word_t, int) -> arith_expr_t
  # TODO: I need position information
  p_die("Token can't be used in prefix position", word=t)
  return None  # never reached