Ejemplo n.º 1
0
 def get_literal(character, is_case_insensitive):
     if is_case_insensitive and not suppress_case_insensitive:
         lowercase = character.lower()
         uppercase = character.upper()
         if lowercase != uppercase:
             return Regex.Literal([(ord(lowercase), ord(lowercase)),
                                   (ord(uppercase), ord(uppercase))])
     return Regex.Literal([(ord(character), ord(character))])
Ejemplo n.º 2
0
 def parse_character_class(self):
     """
     Parse a character class ([...]) expression from the string at its current index.
     @return: a Regex.Literal object representing the characters
     """
     characters = self.parse_character_class_expression()
     return Regex.Literal([i for i in characters])
Ejemplo n.º 3
0
 def visit_literal(self, literal):
     new_literal = Regex.Literal([])
     new_literal.characters = literal.characters
     self.stack.append(new_literal)
Ejemplo n.º 4
0
    def parse_literal(self, suppress_case_insensitive=False):
        """
        Parse a single character from the string, from its current index into a literal expression
        @return: a Regex.Literal or Regex.LiteralExcept object representing the character.
        """
        def get_literal(character, is_case_insensitive):
            if is_case_insensitive and not suppress_case_insensitive:
                lowercase = character.lower()
                uppercase = character.upper()
                if lowercase != uppercase:
                    return Regex.Literal([(ord(lowercase), ord(lowercase)),
                                          (ord(uppercase), ord(uppercase))])
            return Regex.Literal([(ord(character), ord(character))])

        if self.get_next_if(u'.'):
            return Regex.Literal([(1, 0x10FFFF)])
        elif self.get_next_if(u'\\'):
            if self.get_next_if(u'w'):
                return Regex.Literal(
                    [Parser.lowercase, Parser.uppercase, Parser.underscore])
            elif self.get_next_if(u'W'):
                return Regex.LiteralExcept(
                    [Parser.lowercase, Parser.uppercase, Parser.underscore])
            elif self.get_next_if(u'r'):
                return Regex.Literal([Parser.carriage_return])
            elif self.get_next_if(u'n'):
                return Regex.Literal([Parser.line_feed])
            elif self.get_next_if(u't'):
                return Regex.Literal([Parser.tab])
            elif self.get_next_if(u's'):
                return Regex.Literal([Parser.space])
            elif self.get_next_if(u'd'):
                return Regex.Literal([Parser.digits])
            elif self.get_next_if(u'v'):
                return Regex.Literal([Parser.vertical_tab])
            elif self.get_next_if(u'f'):
                return Regex.Literal([Parser.form_feed])
            elif self.get_next_if(u'x'):
                codepoint = self.parse_hex_digits(2)
                return Regex.Literal([(codepoint, codepoint)])
            elif self.get_next_if(u'p'):
                coverage = self.parse_unicode_expression()
                return Regex.Literal(coverage)
            elif self.get_next_if(u'N'):
                coverage = self.parse_unicode_name()
                return Regex.Literal(coverage)
            elif self.get_next_if(u'P'):
                coverage = self.parse_unicode_expression()
                return Regex.LiteralExcept(coverage)
            elif self.get_next_if(u'u'):
                codepoint = self.parse_hex_digits(4)
                return Regex.Literal([(codepoint, codepoint)])
            elif self.get_next_if(u'U'):
                codepoint = self.parse_hex_digits(6)
                return Regex.Literal([(codepoint, codepoint)])
            else:
                return get_literal(self.get_next(), self.is_case_insensitive)
        else:
            character = self.get_next()
            if character in Parser.closing:
                raise RegexParserExpected("character", self.text,
                                          self.index - 1)
            elif character in Parser.special:
                raise RegexParserInvalidCharacter(character)
            return get_literal(character, self.is_case_insensitive)