예제 #1
0
파일: expr_to_ast.py 프로젝트: grahamc/oil
  def _CompareChain(self, children):
    # type: (List[PNode]) -> expr_t
    """
    comparison: expr (comp_op expr)*
    """
    cmp_ops = []  # type: List[speck]
    comparators = []  # type: List[expr_t]
    left = self.Expr(children[0])

    i = 1
    n = len(children)
    while i < n:
      op_children = children[i].children
      tok1 = op_children[0].tok
      if len(op_children) == 2:
        # Blame the first token
        if tok1.id == Id.Expr_Not:  # not in
          op = speck(Id.Node_NotIn, tok1.span_id)
        elif tok1.id == Id.Expr_Is:  # is not
          op = speck(Id.Node_IsNot, tok1.span_id)
        else:
          raise AssertionError()
      else:
        # is, <, ==, etc.
        op = speck(tok1.id, tok1.span_id)

      cmp_ops.append(op)
      i += 1
      comparators.append(self.Expr(children[i]))
      i += 1
    return expr.Compare(left, cmp_ops, comparators)
예제 #2
0
파일: expr_to_ast.py 프로젝트: grahamc/oil
  def _NameInClass(self, negated_tok, tok):
    # type: (Token, Token) -> class_literal_term_t
    """
    Like the above, but 'dot' doesn't mean anything.  And `d` is a literal 'd',
    not `digit`.
    """
    if negated_tok:  # For error messages
      negated_speck = speck(negated_tok.id, negated_tok.span_id)
    else:
      negated_speck = None

    val = tok.val

    # A bare, unquoted character literal.  In the grammar, this is expressed as
    # range_char without an ending.

    # d is NOT 'digit', it's a literal 'd'!
    if len(val) == 1:
      # Expr_Name matches VAR_NAME_RE, which starts with [a-zA-Z_]
      assert tok.id in (Id.Expr_Name, Id.Expr_DecInt)

      if negated_tok:  # [~d] is not allowed, only [~digit]
        p_die("Can't negate this symbol", token=tok)
      return class_literal_term.CharLiteral(tok)

    # digit, word, but not d, w, etc.
    if val in POSIX_CLASSES:
      return posix_class(negated_speck, val)

    perl = PERL_CLASSES.get(val)
    if perl is not None:
      return perl_class(negated_speck, perl)
    p_die("%r isn't a character class", val, token=tok)
예제 #3
0
파일: expr_to_ast.py 프로젝트: grahamc/oil
  def _NameInRegex(self, negated_tok, tok):
    # type: (Token, Token) -> re_t

    if negated_tok:  # For error messages
      negated_speck = speck(negated_tok.id, negated_tok.span_id)
    else:
      negated_speck = None

    val = tok.val
    if val == 'dot':
      if negated_tok:
        p_die("Can't negate this symbol", token=tok)
      return tok

    if val in POSIX_CLASSES:
      return posix_class(negated_speck, val)

    perl = PERL_CLASSES.get(val)
    if perl is not None:
      return perl_class(negated_speck, perl)

    if val[0].isupper():  # e.g. HexDigit
      return re.Splice(tok)

    p_die("%r isn't a character class", val, token=tok)
예제 #4
0
  def _NameInClass(self, negated_tok, tok):
    # type: (token, token) -> class_literal_term_t
    """
    Like the above, but 'dot' doesn't mean anything.
    """
    if negated_tok:  # For error messages
      negated_speck = speck(negated_tok.id, negated_tok.span_id)
    else:
      negated_speck = None

    val = tok.val
    if val in self.POSIX_CLASSES:
      return posix_class(negated_speck, val)

    perl = self.PERL_CLASSES.get(val)
    if perl:
      return perl_class(negated_speck, perl)
    p_die("%r isn't a character class", val, token=tok)
예제 #5
0
  def _NameInRegex(self, negated_tok, tok):
    # type: (token, token) -> re_t

    if negated_tok:  # For error messages
      negated_speck = speck(negated_tok.id, negated_tok.span_id)
    else:
      negated_speck = None

    val = tok.val
    if val == 'dot':
      if negated_tok:
        p_die("Can't negate this symbol", token=tok)
      return tok

    if val in self.POSIX_CLASSES:
      return posix_class(negated_speck, val)

    perl = self.PERL_CLASSES.get(val)
    if perl:
      return perl_class(negated_speck, perl)

    p_die("%r isn't a character class", val, token=tok)
예제 #6
0
파일: expr_to_ast.py 프로젝트: grahamc/oil
  def _ReAtom(self, p_atom):
    # type: (PNode) -> re_t
    """
    re_atom: (
        char_literal
    """
    assert p_atom.typ == grammar_nt.re_atom, p_atom.typ

    children = p_atom.children
    typ = children[0].typ

    if ISNONTERMINAL(typ):
      p_child = p_atom.children[0]
      if typ == grammar_nt.class_literal:
        return re.ClassLiteral(False, self._ClassLiteral(p_child))

      if typ == grammar_nt.braced_var_sub:
        return cast(braced_var_sub, p_child.children[1].tok)

      if typ == grammar_nt.dq_string:
        return cast(double_quoted, p_child.children[1].tok)

      if typ == grammar_nt.sq_string:
        return cast(single_quoted, p_child.children[1].tok)

      if typ == grammar_nt.simple_var_sub:
        return simple_var_sub(children[0].tok)

      if typ == grammar_nt.char_literal:
        return children[0].tok

      raise NotImplementedError(typ)

    else:
      tok = children[0].tok

      # Special punctuation
      if tok.id in (Id.Expr_Dot, Id.Arith_Caret, Id.Expr_Dollar):
        return speck(tok.id, tok.span_id)

      # TODO: d digit can turn into PosixClass and PerlClass right here!
      # It's parsing.
      if tok.id == Id.Expr_Name:
        return self._NameInRegex(None, tok)

      if tok.id == Id.Expr_Symbol:
        # Validate symbols here, like we validate PerlClass, etc.
        if tok.val in ('%start', '%end', 'dot'):
          return tok
        p_die("Unexpected token %r in regex", tok.val, token=tok)

      if tok.id == Id.Expr_At:
        # | '@' Expr_Name
        return re.Splice(children[1].tok)

      if tok.id == Id.Arith_Tilde:
        # | '~' [Expr_Name | class_literal]
        typ = children[1].typ
        if ISNONTERMINAL(typ):
          return re.ClassLiteral(True, self._ClassLiteral(children[1]))
        else:
          return self._NameInRegex(tok, children[1].tok)

      if tok.id == Id.Op_LParen:
        # | '(' regex ')'

        # Note: in ERE (d+) is the same as <d+>.  That is, Group becomes
        # Capture.
        return re.Group(self._Regex(children[1]))

      if tok.id == Id.Arith_Less:
        # | '<' regex [':' name_type] '>'

        regex = self._Regex(children[1])

        n = len(children)
        if n == 5:
          # TODO: Add type expression
          # YES
          #   < d+ '.' d+ : ratio Float >
          #   < d+ : month Int >
          # INVALID
          #   < d+ : month List[int] >
          name_tok = children[3].children[0].tok
        else:
          name_tok = None

        return re.Capture(regex, name_tok)

      if tok.id == Id.Arith_Colon:
        # | ':' '(' regex ')'
        raise NotImplementedError(Id_str(tok.id))

      raise NotImplementedError(Id_str(tok.id))
예제 #7
0
  def _ReAtom(self, p_atom):
    # type: (PNode) -> re_t
    """
    re_atom: (
        char_literal
    """
    assert p_atom.typ == grammar_nt.re_atom, p_atom.typ

    children = p_atom.children
    typ = children[0].typ

    if ISNONTERMINAL(typ):
      p_child = p_atom.children[0]
      if typ == grammar_nt.class_literal:
        return re.ClassLiteral(False, self._ClassLiteral(p_child))

      if typ == grammar_nt.braced_var_sub:
        return cast(braced_var_sub, p_child.children[1].tok)

      if typ == grammar_nt.dq_string:
        return cast(double_quoted, p_child.children[1].tok)

      if typ == grammar_nt.sq_string:
        return cast(single_quoted, p_child.children[1].tok)

      if typ == grammar_nt.simple_var_sub:
        return simple_var_sub(children[0].tok)

      if typ == grammar_nt.char_literal:
        return children[0].tok

      raise NotImplementedError(typ)

    else:
      tok = children[0].tok

      # Special punctuation
      if tok.id in (Id.Expr_Dot, Id.Arith_Caret, Id.Expr_Dollar):
        return speck(tok.id, tok.span_id)

      # TODO: d digit can turn into PosixClass and PerlClass right here!
      # It's parsing.
      if tok.id == Id.Expr_Name:
        return self._NameInRegex(None, tok)

      if tok.id == Id.Expr_Symbol:
        # Validate symbols here, like we validate PerlClass, etc.
        if tok.val in ('%start', '%end', 'dot'):
          return tok
        p_die("Unexpected token %r in regex", tok.val, token=tok)

      if tok.id == Id.Expr_At:
        # | '@' Expr_Name
        return re.Splice(children[1].tok)

      if tok.id == Id.Arith_Tilde:
        # | '~' [Expr_Name | class_literal]
        typ = children[1].typ
        if ISNONTERMINAL(typ):
          ch = children[1].children
          return re.ClassLiteral(True, self._ClassLiteral(children[1]))
        else:
          return self._NameInRegex(tok, children[1].tok)

      if tok.id == Id.Op_LParen:
        # | '(' regex ['as' name_type] ')'

        # TODO: Add variable
        return re.Group(self._Regex(children[1]))

      if tok.id == Id.Arith_Colon:
        # | ':' '(' regex ')'
        raise NotImplementedError(tok.id)

      raise NotImplementedError(tok.id)
예제 #8
0
  def _ReadBracedVarSub(self, left_token, d_quoted):
    # type: (Token, bool) -> braced_var_sub
    """For the ${} expression language.

    NAME        = [a-zA-Z_][a-zA-Z0-9_]*
    NUMBER      = [0-9]+                    # ${10}, ${11}, ...

    Subscript   = '[' ('@' | '*' | ArithExpr) ']'
    VarSymbol   = '!' | '@' | '#' | ...
    VarOf       = NAME Subscript?
                | NUMBER      # no subscript allowed, none of these are arrays
                              # ${@[1]} doesn't work, even though slicing does
                | VarSymbol

    TEST_OP     = '-' | ':-' | '=' | ':=' | '+' | ':+' | '?' | ':?'
    STRIP_OP    = '#' | '##' | '%' | '%%'
    CASE_OP     = ',' | ',,' | '^' | '^^'

    UnaryOp     = TEST_OP | STRIP_OP | CASE_OP | ...
    Match       = ('/' | '#' | '%') WORD       # match all / prefix / suffix
    VarExpr     = VarOf
                | VarOf UnaryOp WORD
                | VarOf ':' ArithExpr (':' ArithExpr )?
                | VarOf '/' Match '/' WORD

    LengthExpr  = '#' VarOf  # can't apply operators after length

    RefOrKeys   = '!' VarExpr  # CAN apply operators after a named ref
                               # ${!ref[0]} vs ${!keys[@]} resolved later

    PrefixQuery = '!' NAME ('*' | '@')  # list variable names with a prefix

    VarSub      = LengthExpr
                | RefOrKeys
                | PrefixQuery
                | VarExpr

    NOTES:
    - Arithmetic expressions are used twice, inside subscripts ${a[x+1]} and
      slicing ${a:x+1:y+2}
    - ${#} and ${!} need LL(2) lookahead (considering how my tokenizer works)
    - @ and * are technically arithmetic expressions in this implementation
    - We don't account for bash 4.4: ${param@operator} -- Q E P A a.  Note that
      it's also vectorized.

    Strictness over bash:
    echo ${a[0][0]} doesn't do anything useful, so we disallow it from the
    grammar
    ! and # prefixes can't be composed, even though named refs can be composed
    with other operators
    '#' means 4 different things: length prefix, VarSymbol, UnaryOp to strip a
    prefix, and it can also be a literal part of WORD.

    From the parser's point of view, the prefix # can't be combined with
    UnaryOp/slicing/matching, and the ! can.  However

    ${a[@]:1:2} is not allowed
    ${#a[@]:1:2} is allowed, but gives the wrong answer
    """
    if d_quoted:
      arg_lex_mode = lex_mode_e.VSub_ArgDQ
    else:
      arg_lex_mode = lex_mode_e.VSub_ArgUnquoted

    self._Next(lex_mode_e.VSub_1)
    self._Peek()

    ty = self.token_type

    if ty == Id.VSub_Pound:
      # Disambiguate
      next_id = self.lexer.LookAhead(lex_mode_e.VSub_1)
      if next_id not in (Id.Unknown_Tok, Id.Right_DollarBrace):
        # e.g. a name, '#' is the prefix
        self._Next(lex_mode_e.VSub_1)
        part = self._ParseVarOf()

        self._Peek()
        if self.token_type != Id.Right_DollarBrace:
          p_die('Expected } after length expression', token=self.cur_token)

        part.prefix_op = speck(ty, self.cur_token.span_id)

      else:  # not a prefix, '#' is the variable
        part = self._ParseVarExpr(arg_lex_mode)

    elif ty == Id.VSub_Bang:
      next_id = self.lexer.LookAhead(lex_mode_e.VSub_1)
      if next_id not in (Id.Unknown_Tok, Id.Right_DollarBrace):
        # e.g. a name, '!' is the prefix
        # ${!a} -- this is a ref
        # ${!3} -- this is ref
        # ${!a[1]} -- this is a ref
        # ${!a[@]} -- this is a keys
        # No lookahead -- do it in a second step, or at runtime
        self._Next(lex_mode_e.VSub_1)
        part = self._ParseVarExpr(arg_lex_mode, allow_query=True)

        part.prefix_op = speck(ty, self.cur_token.span_id)

      else:  # not a prefix, '!' is the variable
        part = self._ParseVarExpr(arg_lex_mode)

    # VS_NAME, VS_NUMBER, symbol that isn't # or !
    elif self.token_kind == Kind.VSub:
      part = self._ParseVarExpr(arg_lex_mode)

    else:
      # e.g. ${^}
      p_die('Unexpected token in ${}', token=self.cur_token)

    part.spids.append(left_token.span_id)

    # Does this work?
    right_spid = self.cur_token.span_id
    part.spids.append(right_spid)

    return part