Esempio n. 1
0
  def _NameInRegex(self, negated_tok, tok):
    # type: (Token, Token) -> re_t

    if negated_tok:  # For error messages
      negated_speck = speck(negated_tok.id, negated_tok.span_id)
    else:
      negated_speck = None

    val = tok.val
    if val == 'dot':
      if negated_tok:
        p_die("Can't negate this symbol", token=tok)
      return tok

    if val in POSIX_CLASSES:
      return posix_class(negated_speck, val)

    perl = PERL_CLASSES.get(val)
    if perl is not None:
      return perl_class(negated_speck, perl)

    if val[0].isupper():  # e.g. HexDigit
      return re.Splice(tok)

    p_die("%r isn't a character class", val, token=tok)
Esempio n. 2
0
  def _ReAtom(self, p_atom):
    # type: (PNode) -> re_t
    """
    re_atom: (
        char_literal
    """
    assert p_atom.typ == grammar_nt.re_atom, p_atom.typ

    children = p_atom.children
    typ = children[0].typ

    if ISNONTERMINAL(typ):
      p_child = p_atom.children[0]
      if typ == grammar_nt.class_literal:
        return re.ClassLiteral(False, self._ClassLiteral(p_child))

      if typ == grammar_nt.braced_var_sub:
        return cast(braced_var_sub, p_child.children[1].tok)

      if typ == grammar_nt.dq_string:
        return cast(double_quoted, p_child.children[1].tok)

      if typ == grammar_nt.sq_string:
        return cast(single_quoted, p_child.children[1].tok)

      if typ == grammar_nt.simple_var_sub:
        return simple_var_sub(children[0].tok)

      if typ == grammar_nt.char_literal:
        return children[0].tok

      raise NotImplementedError(typ)

    else:
      tok = children[0].tok

      # Special punctuation
      if tok.id in (Id.Expr_Dot, Id.Arith_Caret, Id.Expr_Dollar):
        return speck(tok.id, tok.span_id)

      # TODO: d digit can turn into PosixClass and PerlClass right here!
      # It's parsing.
      if tok.id == Id.Expr_Name:
        return self._NameInRegex(None, tok)

      if tok.id == Id.Expr_Symbol:
        # Validate symbols here, like we validate PerlClass, etc.
        if tok.val in ('%start', '%end', 'dot'):
          return tok
        p_die("Unexpected token %r in regex", tok.val, token=tok)

      if tok.id == Id.Expr_At:
        # | '@' Expr_Name
        return re.Splice(children[1].tok)

      if tok.id == Id.Arith_Tilde:
        # | '~' [Expr_Name | class_literal]
        typ = children[1].typ
        if ISNONTERMINAL(typ):
          return re.ClassLiteral(True, self._ClassLiteral(children[1]))
        else:
          return self._NameInRegex(tok, children[1].tok)

      if tok.id == Id.Op_LParen:
        # | '(' regex ')'

        # Note: in ERE (d+) is the same as <d+>.  That is, Group becomes
        # Capture.
        return re.Group(self._Regex(children[1]))

      if tok.id == Id.Arith_Less:
        # | '<' regex [':' name_type] '>'

        regex = self._Regex(children[1])

        n = len(children)
        if n == 5:
          # TODO: Add type expression
          # YES
          #   < d+ '.' d+ : ratio Float >
          #   < d+ : month Int >
          # INVALID
          #   < d+ : month List[int] >
          name_tok = children[3].children[0].tok
        else:
          name_tok = None

        return re.Capture(regex, name_tok)

      if tok.id == Id.Arith_Colon:
        # | ':' '(' regex ')'
        raise NotImplementedError(Id_str(tok.id))

      raise NotImplementedError(Id_str(tok.id))
Esempio n. 3
0
  def _ReAtom(self, p_atom):
    # type: (PNode) -> re_t
    """
    re_atom: (
        char_literal
    """
    assert p_atom.typ == grammar_nt.re_atom, p_atom.typ

    children = p_atom.children
    typ = children[0].typ

    if ISNONTERMINAL(typ):
      p_child = p_atom.children[0]
      if typ == grammar_nt.class_literal:
        return re.ClassLiteral(False, self._ClassLiteral(p_child))

      if typ == grammar_nt.braced_var_sub:
        return cast(braced_var_sub, p_child.children[1].tok)

      if typ == grammar_nt.dq_string:
        return cast(double_quoted, p_child.children[1].tok)

      if typ == grammar_nt.sq_string:
        return cast(single_quoted, p_child.children[1].tok)

      if typ == grammar_nt.simple_var_sub:
        return simple_var_sub(children[0].tok)

      if typ == grammar_nt.char_literal:
        return children[0].tok

      raise NotImplementedError(typ)

    else:
      tok = children[0].tok

      # Special punctuation
      if tok.id in (Id.Expr_Dot, Id.Arith_Caret, Id.Expr_Dollar):
        return speck(tok.id, tok.span_id)

      # TODO: d digit can turn into PosixClass and PerlClass right here!
      # It's parsing.
      if tok.id == Id.Expr_Name:
        return self._NameInRegex(None, tok)

      if tok.id == Id.Expr_Symbol:
        # Validate symbols here, like we validate PerlClass, etc.
        if tok.val in ('%start', '%end', 'dot'):
          return tok
        p_die("Unexpected token %r in regex", tok.val, token=tok)

      if tok.id == Id.Expr_At:
        # | '@' Expr_Name
        return re.Splice(children[1].tok)

      if tok.id == Id.Arith_Tilde:
        # | '~' [Expr_Name | class_literal]
        typ = children[1].typ
        if ISNONTERMINAL(typ):
          ch = children[1].children
          return re.ClassLiteral(True, self._ClassLiteral(children[1]))
        else:
          return self._NameInRegex(tok, children[1].tok)

      if tok.id == Id.Op_LParen:
        # | '(' regex ['as' name_type] ')'

        # TODO: Add variable
        return re.Group(self._Regex(children[1]))

      if tok.id == Id.Arith_Colon:
        # | ':' '(' regex ')'
        raise NotImplementedError(tok.id)

      raise NotImplementedError(tok.id)