Example #1
0
 def class_test(self, cclass):
     result = []
     for c in range(ord('a'), ord('z') + 1):
         if cclass.test(character(c)):
             result.append(character(c))
     result = ''.join(result)
     return result
Example #2
0
 def class_test(self, cclass):
     result = []
     for c in range(ord('a'), ord('z') + 1):
         if cclass.test(character(c)):
             result.append(character(c))
     result = ''.join(result)
     return result
Example #3
0
 def test_character(self):
     self.assertTrue(py2.character(0x2A) == "\x2A")
     self.assertTrue(py2.character(0x2A) == u"\x2A")
     self.assertTrue(isinstance(py2.character(0x2A), type(u"")))
     if sys.version_info[0] < 3:
         self.assertFalse(isinstance(py2.character(0x2A), type("")))
     else:
         self.assertTrue(isinstance(py2.character(0x2A), type("")))
Example #4
0
 def test_ucd_blocks(self):
     class_basic_latin = unicode5.CharClass.ucd_block('Basic Latin')
     self.assertTrue(class_basic_latin is unicode5.CharClass.ucd_block(
         'basiclatin'), "block name normalization")
     for code in range3(0x80):
         self.assertTrue(class_basic_latin.test(character(code)))
     self.assertFalse(class_basic_latin.test(character(0x80)))
     # randomly pick one of the other blocks
     class_basic_latin = unicode5.CharClass.ucd_block('Arrows')
     self.assertFalse(class_basic_latin.test(character(0x2150)))
     self.assertTrue(class_basic_latin.test(character(0x2190)))
Example #5
0
 def test_ucd_blocks(self):
     class_basic_latin = unicode5.CharClass.ucd_block('Basic Latin')
     self.assertTrue(
         class_basic_latin is unicode5.CharClass.ucd_block('basiclatin'),
         "block name normalization")
     for code in range3(0x80):
         self.assertTrue(class_basic_latin.test(character(code)))
     self.assertFalse(class_basic_latin.test(character(0x80)))
     # randomly pick one of the other blocks
     class_basic_latin = unicode5.CharClass.ucd_block('Arrows')
     self.assertFalse(class_basic_latin.test(character(0x2150)))
     self.assertTrue(class_basic_latin.test(character(0x2190)))
Example #6
0
    def test_name_start(self):
        """Productions::

            [4] NameStartChar ::= ":" | [A-Z] | "_" | [a-z] |
                [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] |
                [#x37F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] |
                [#x2C00-#x2FEF] | [#x3001-#xD7FF] | [#xF900-#xFDCF] |
                [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]

            [5] NameChar ::= NameStartChar | "-" | "." | [0-9] | #xB7 |
                [#x0300-#x036F] | [#x203F-#x2040]"""
        n_namestartchars = 0
        n_namechars = 0
        for code in range3(0x10000):
            c = character(code)
            if structures.is_name_char(c):
                n_namechars += 1
                if structures.is_name_start_char(c):
                    n_namestartchars += 1
            else:
                self.assertFalse(structures.is_name_start_char(c),
                                 "NameStart not a name char: %s" % c)
        self.assertTrue(n_namechars == 54129,
                        "name char total %i" % n_namechars)
        self.assertTrue(n_namestartchars == 54002,
                        "name start char total %i" % n_namestartchars)
Example #7
0
    def test_name_start(self):
        """Productions::

            [4] NameStartChar ::= ":" | [A-Z] | "_" | [a-z] |
                [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] |
                [#x37F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] |
                [#x2C00-#x2FEF] | [#x3001-#xD7FF] | [#xF900-#xFDCF] |
                [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]

            [5] NameChar ::= NameStartChar | "-" | "." | [0-9] | #xB7 |
                [#x0300-#x036F] | [#x203F-#x2040]"""
        n_namestartchars = 0
        n_namechars = 0
        for code in range3(0x10000):
            c = character(code)
            if structures.is_name_char(c):
                n_namechars += 1
                if structures.is_name_start_char(c):
                    n_namestartchars += 1
            else:
                self.assertFalse(structures.is_name_start_char(c),
                                 "NameStart not a name char: %s" % c)
        self.assertTrue(n_namechars == 54129,
                        "name char total %i" % n_namechars)
        self.assertTrue(n_namestartchars == 54002,
                        "name start char total %i" % n_namestartchars)
Example #8
0
def test_data(mem_cache):
    with mem_cache.open() as collection:
        for i in range3(26):
            e = collection.new_entity()
            e.set_key(str(i))
            e['Value'].set_from_value(character(0x41 + i))
            e['Expires'].set_from_value(
                iso.TimePoint.from_unix_time(time.time() + 10 * i))
            collection.insert_entity(e)
Example #9
0
def test_data(mem_cache):
    with mem_cache.open() as collection:
        for i in range3(26):
            e = collection.new_entity()
            e.set_key(str(i))
            e['Value'].set_from_value(character(0x41 + i))
            e['Expires'].set_from_value(
                iso.TimePoint.from_unix_time(time.time() + 10 * i))
            collection.insert_entity(e)
Example #10
0
 def test_negate_char_class(self):
     """Check the Negation function"""
     min_char = character(0)
     max_char = character(maxunicode)
     char_class_tests = [[[], [[min_char, max_char]]],
                         [[['b', 'c']], [[min_char, 'a'], ['d', max_char]]],
                         [[['b', 'c'], ['e', 'f']],
                          [[min_char, 'a'], ['d', 'd'], ['g', max_char]]]]
     for test in char_class_tests:
         c1 = unicode5.CharClass(*test[0])
         c2 = unicode5.CharClass(c1)
         c2.negate()
         c3 = unicode5.CharClass(*test[1])
         self.assertTrue(
             c2 == c3, "%s negated to %s, expected %s" %
             (repr(c1), repr(c2), repr(c3)))
         c2.negate()
         self.assertTrue(c2 == c1,
                         "%s double negation got %s" % (repr(c1), repr(c2)))
Example #11
0
 def find_edges(self, test_func, max):
     edges = []
     flag = False
     for code in range3(max + 1):
         c = character(code)
         if flag != test_func(c):
             flag = not flag
             edges.append(code)
     if flag:
         edges.append(max + 1)
     return edges
Example #12
0
 def find_edges(self, test_func, max):
     edges = []
     flag = False
     for code in range3(max + 1):
         c = character(code)
         if flag != test_func(c):
             flag = not flag
             edges.append(code)
     if flag:
         edges.append(max + 1)
     return edges
Example #13
0
 def test_negate_char_class(self):
     """Check the Negation function"""
     min_char = character(0)
     max_char = character(maxunicode)
     char_class_tests = [
         [[], [[min_char, max_char]]],
         [[['b', 'c']], [[min_char, 'a'], ['d', max_char]]],
         [[['b', 'c'], ['e', 'f']], [
             [min_char, 'a'], ['d', 'd'], ['g', max_char]]]
     ]
     for test in char_class_tests:
         c1 = unicode5.CharClass(*test[0])
         c2 = unicode5.CharClass(c1)
         c2.negate()
         c3 = unicode5.CharClass(*test[1])
         self.assertTrue(c2 == c3, "%s negated to %s, expected %s" %
                         (repr(c1), repr(c2), repr(c3)))
         c2.negate()
         self.assertTrue(c2 == c1,
                         "%s double negation got %s" % (repr(c1), repr(c2)))
Example #14
0
 def test_ucd_classes(self):
     class_cc = unicode5.CharClass.ucd_category('Cc')
     class_c = unicode5.CharClass.ucd_category('C')
     for code in range3(0x20):
         self.assertTrue(class_cc.test(character(code)))
         self.assertTrue(class_c.test(character(code)))
     for code in range3(0x7F, 0xA0):
         self.assertTrue(class_cc.test(character(code)))
         self.assertTrue(class_c.test(character(code)))
     self.assertFalse(class_cc.test(character(0xAD)))
     self.assertTrue(class_c.test(character(0xAD)))
     self.assertTrue(
         unicode5.CharClass.ucd_category('Cf').test(character(0xAD)))
Example #15
0
 def test_ucd_classes(self):
     class_cc = unicode5.CharClass.ucd_category('Cc')
     class_c = unicode5.CharClass.ucd_category('C')
     for code in range3(0x20):
         self.assertTrue(class_cc.test(character(code)))
         self.assertTrue(class_c.test(character(code)))
     for code in range3(0x7F, 0xA0):
         self.assertTrue(class_cc.test(character(code)))
         self.assertTrue(class_c.test(character(code)))
     self.assertFalse(class_cc.test(character(0xAD)))
     self.assertTrue(class_c.test(character(0xAD)))
     self.assertTrue(
         unicode5.CharClass.ucd_category('Cf').test(character(0xAD)))
Example #16
0
 def test_character(self):
     self.assertTrue(py2.character(0x2A) == "\x2A")
     self.assertTrue(py2.character(0x2A) == u"\x2A")
     self.assertTrue(isinstance(py2.character(0x2A), type(u"")))
     if sys.version_info[0] < 3:
         self.assertFalse(isinstance(py2.character(0x2A), type("")))
     else:
         self.assertTrue(isinstance(py2.character(0x2A), type("")))
     # character must also be able to convert bytes, even if they
     # have values outside the ASCII range
     self.assertTrue(py2.character(py2.byte(0x2A)) == "\x2A")
     self.assertTrue(py2.character(py2.byte(0xe9)) == py2.ul("\xE9"))
     self.assertTrue(py2.join_characters(list(u"Caf\xe9")) == u"Caf\xe9")
     self.assertTrue(py2.join_characters([u"Caf\xe9"]) == u"Caf\xe9")
Example #17
0
 def test_constructor(self):
     c = unicode5.CharClass()
     if MAX_CHAR < 0x10FFFF:
         logging.warn("unicode5 tests truncated to character(0x%X) by "
                      "narrow python build" % MAX_CHAR)
     for code in range3(MAX_CHAR + 1):
         self.assertFalse(c.test(character(code)))
     c = unicode5.CharClass('a')
     self.assertTrue(self.class_test(c) == 'a')
     c = unicode5.CharClass(('a', 'z'))
     self.assertTrue(self.class_test(c) == 'abcdefghijklmnopqrstuvwxyz')
     c = unicode5.CharClass('abcxyz')
     self.assertTrue(
         len(c.ranges) == 2, "No range optimization: %s" % repr(c.ranges))
     self.assertTrue(self.class_test(c) == 'abcxyz')
     cc = unicode5.CharClass(c)
     self.assertTrue(self.class_test(cc) == 'abcxyz')
Example #18
0
 def test_character(self):
     self.assertTrue(py2.character(0x2A) == "\x2A")
     self.assertTrue(py2.character(0x2A) == u"\x2A")
     self.assertTrue(isinstance(py2.character(0x2A), type(u"")))
     if sys.version_info[0] < 3:
         self.assertFalse(isinstance(py2.character(0x2A), type("")))
     else:
         self.assertTrue(isinstance(py2.character(0x2A), type("")))
     # character must also be able to convert bytes, even if they
     # have values outside the ASCII range
     self.assertTrue(py2.character(py2.byte(0x2A)) == "\x2A")
     self.assertTrue(py2.character(py2.byte(0xe9)) == py2.ul("\xE9"))
     self.assertTrue(py2.join_characters(list(u"Caf\xe9")) ==
                     u"Caf\xe9")
     self.assertTrue(py2.join_characters([u"Caf\xe9"]) == u"Caf\xe9")
 def test_wildcard_esc(self):
     """::
     [37a] WildcardEsc ::= '.'"""
     p = xsi.RegularExpressionParser(".*")
     cclass = p.require_wildcard_esc()
     self.assertTrue(p.pos == 1)
     self.assertFalse(cclass.test("\x0A"), "Line feed in .")
     self.assertFalse(cclass.test("\x0D"), "Carriage return in .")
     for i in range3(100):
         # do a few random tests
         j = random.randint(0, maxunicode)
         if j in (10, 13):
             continue
         self.assertTrue(cclass.test(character(j)),
                         "Random char not in . character(%04X)" % j)
     p = xsi.RegularExpressionParser("x")
     try:
         cclass = p.require_wildcard_esc()
         self.fail(".")
     except xsi.RegularExpressionError:
         pass
Example #20
0
 def test_wildcard_esc(self):
     """::
     [37a] WildcardEsc ::= '.'"""
     p = xsi.RegularExpressionParser(".*")
     cclass = p.require_wildcard_esc()
     self.assertTrue(p.pos == 1)
     self.assertFalse(cclass.test("\x0A"), "Line feed in .")
     self.assertFalse(cclass.test("\x0D"), "Carriage return in .")
     for i in range3(100):
         # do a few random tests
         j = random.randint(0, maxunicode)
         if j in (10, 13):
             continue
         self.assertTrue(cclass.test(character(j)),
                         "Random char not in . character(%04X)" % j)
     p = xsi.RegularExpressionParser("x")
     try:
         cclass = p.require_wildcard_esc()
         self.fail(".")
     except xsi.RegularExpressionError:
         pass
Example #21
0
 def test_constructor(self):
     c = unicode5.CharClass()
     if MAX_CHAR < 0x10FFFF:
         logging.warn("unicode5 tests truncated to character(0x%X) by "
                      "narrow python build" % MAX_CHAR)
     for code in range3(MAX_CHAR + 1):
         self.assertFalse(c.test(character(code)))
     c = unicode5.CharClass('a')
     self.assertTrue(self.class_test(c) == 'a')
     c = unicode5.CharClass(('a', 'z'))
     self.assertTrue(self.class_test(c) == 'abcdefghijklmnopqrstuvwxyz')
     c = unicode5.CharClass('abcxyz')
     self.assertTrue(
         len(c.ranges) == 2, "No range optimization: %s" % repr(c.ranges))
     self.assertTrue(self.class_test(c) == 'abcxyz')
     cc = unicode5.CharClass(c)
     self.assertTrue(self.class_test(cc) == 'abcxyz')
     c = unicode5.CharClass(('a', 'c'), ('e', 'g'), 'd')
     self.assertTrue(
         len(c.ranges) == 1,
         "Missing range optimization: %s" % repr(c.ranges))
Example #22
0
 def test_pos_char_group(self):
     """::
     posCharGroup ::= ( charRange | charClassEsc )+ """
     p = xsi.RegularExpressionParser("\\^-b^xa-c\\?-A\\p{Sc}")
     test = ul("$^_`abcx?@A\xa2\xa3\xa4\xa5")
     cclass = p.require_pos_char_group()
     for i in range3(256):
         c = character(i)
         self.assertTrue(cclass.test(c) == (c in test),
                         "Bad test on character: %s" % repr(c))
     # The - character is a valid character range only at the
     # beginning or end of a positive character group
     p = xsi.RegularExpressionParser("-a-c")
     cclass = p.require_pos_char_group()
     p = xsi.RegularExpressionParser("a-c-]")
     cclass = p.require_pos_char_group()
     p = xsi.RegularExpressionParser("A-C-a-c")
     try:
         cclass = p.require_pos_char_group()
         self.fail("hypen accepted within range")
     except xsi.RegularExpressionError:
         pass
Example #23
0
 def test_neg_char_group(self):
     """::
     negCharGroup ::= '^' posCharGroup """
     p = xsi.RegularExpressionParser("^\\^-b^xa-c\\?-A\\p{Sc}")
     test = ul("$^_`abcx?@A\xa2\xa3\xa4\xa5")
     cclass = p.require_neg_char_group()
     for i in range3(256):
         c = character(i)
         self.assertTrue(cclass.test(c) != (c in test),
                         "Bad test on character: %s" % repr(c))
     p = xsi.RegularExpressionParser("^-a-c")
     cclass = p.require_neg_char_group()
     p = xsi.RegularExpressionParser("^a-c-]")
     cclass = p.require_neg_char_group()
     # The ^ character is only valid at the beginning of a positive
     # character group if it is part of a negative character group
     # this rule is automatically honoured by the parser
     p = xsi.RegularExpressionParser("^A-C-a-c")
     try:
         cclass = p.require_neg_char_group()
         self.fail("hypen accepted within range")
     except xsi.RegularExpressionError:
         pass
 def test_pos_char_group(self):
     """::
     posCharGroup ::= ( charRange | charClassEsc )+ """
     p = xsi.RegularExpressionParser("\\^-b^xa-c\\?-A\\p{Sc}")
     test = ul("$^_`abcx?@A\xa2\xa3\xa4\xa5")
     cclass = p.require_pos_char_group()
     for i in range3(256):
         c = character(i)
         self.assertTrue(
             cclass.test(c) == (c in test),
             "Bad test on character: %s" % repr(c))
     # The - character is a valid character range only at the
     # beginning or end of a positive character group
     p = xsi.RegularExpressionParser("-a-c")
     cclass = p.require_pos_char_group()
     p = xsi.RegularExpressionParser("a-c-]")
     cclass = p.require_pos_char_group()
     p = xsi.RegularExpressionParser("A-C-a-c")
     try:
         cclass = p.require_pos_char_group()
         self.fail("hypen accepted within range")
     except xsi.RegularExpressionError:
         pass
 def test_neg_char_group(self):
     """::
     negCharGroup ::= '^' posCharGroup """
     p = xsi.RegularExpressionParser("^\\^-b^xa-c\\?-A\\p{Sc}")
     test = ul("$^_`abcx?@A\xa2\xa3\xa4\xa5")
     cclass = p.require_neg_char_group()
     for i in range3(256):
         c = character(i)
         self.assertTrue(
             cclass.test(c) != (c in test),
             "Bad test on character: %s" % repr(c))
     p = xsi.RegularExpressionParser("^-a-c")
     cclass = p.require_neg_char_group()
     p = xsi.RegularExpressionParser("^a-c-]")
     cclass = p.require_neg_char_group()
     # The ^ character is only valid at the beginning of a positive
     # character group if it is part of a negative character group
     # this rule is automatically honoured by the parser
     p = xsi.RegularExpressionParser("^A-C-a-c")
     try:
         cclass = p.require_neg_char_group()
         self.fail("hypen accepted within range")
     except xsi.RegularExpressionError:
         pass
Example #26
0
    def test_basics(self):
        """Tests for basic character classes.

        alpha = lowalpha | upalpha
        lowalpha = "a" | ... | "z"
        upalpha  = "A" | ... | "Z"
        digit = "0" | ... | "9"
        alphanum = alpha | digit
        reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | ","
        unreserved  = alphanum | mark
        mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
        """
        # UPALPHA = <any US-ASCII uppercase letter "A".."Z">
        upalpha = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
        for i in range3(0, 256):
            c = character(i)
            self.assertTrue(
                uri.is_upalpha(c) == (c in upalpha),
                "is_upalpha(chr(%i))" % i)
        lowalpha = "abcdefghijklmnopqrstuvwxyz"
        for i in range3(0, 256):
            c = character(i)
            self.assertTrue(uri.is_lowalpha(c) == (c in lowalpha),
                            "is_lowalpha(chr(%i))" % i)
        alpha = upalpha + lowalpha
        for i in range3(0, 256):
            c = character(i)
            self.assertTrue(uri.is_alpha(c) == (c in alpha),
                            "is_alpha(chr(%i))" % i)
        digit = "0123456789"
        for i in range3(0, 256):
            c = character(i)
            self.assertTrue(uri.is_digit(c) == (c in digit),
                            "is_digit(chr(%i))" % i)
        alphanum = alpha + digit
        for i in range3(0, 256):
            c = character(i)
            self.assertTrue(uri.is_alphanum(c) == (c in alphanum),
                            "is_alphanum(chr(%i))" % i)
        reserved = ";/?:@&=+$,"
        for i in range3(0, 256):
            c = character(i)
            self.assertTrue(uri.is_reserved_2396(c) == (c in reserved),
                            "is_reserved_2396(chr(%i))" % i)
        reserved = ";/?:@&=+$,[]"
        for i in range3(0, 256):
            c = character(i)
            self.assertTrue(uri.is_reserved(c) == (c in reserved),
                            "is_reserved(chr(%i))" % i)
        mark = "-_.!~*'()"
        for i in range3(0, 256):
            c = character(i)
            self.assertTrue(uri.is_mark(c) == (c in mark),
                            "is_mark(chr(%i))" % i)
        unreserved = alphanum + mark
        for i in range3(0, 256):
            c = character(i)
            self.assertTrue(uri.is_unreserved(c) == (c in unreserved),
                            "is_unreserved(chr(%i))" % i)
        control = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D"\
            "\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C"\
            "\x1D\x1E\x1F\x7F"
        for i in range3(0, 256):
            c = character(i)
            self.assertTrue(
                uri.is_control(c) == (c in control), "is_control(chr(%i))" % i)
        space = " "
        for i in range3(0, 256):
            c = character(i)
            self.assertTrue(uri.is_space(c) == (c in space),
                            "is_space(chr(%i))" % i)
        delims = "<>#%\""
        for i in range3(0, 256):
            c = character(i)
            self.assertTrue(
                uri.is_delims(c) == (c in delims), "is_delims(chr(%i))" % i)
        unwise_2396 = "{}|\\^[]`"
        for i in range3(0, 256):
            c = character(i)
            self.assertTrue(
                uri.is_unwise_2396(c) == (c in unwise_2396),
                "is_unwise_2396(chr(%i))" % i)
        unwise = "{}|\\^`"
        for i in range3(0, 256):
            c = character(i)
            self.assertTrue(
                uri.is_unwise(c) == (c in unwise), "is_unwise(chr(%i))" % i)
        authority_reserved = ";:@?/"
        for i in range3(0, 256):
            c = character(i)
            self.assertTrue(uri.is_authority_reserved(c) == (
                c in authority_reserved), "is_authority_reserved(chr(%i))" % i)
        path_segment_reserved = "/;=?"
        for i in range3(0, 256):
            c = character(i)
            self.assertTrue(uri.is_path_segment_reserved(c) ==
                            (c in path_segment_reserved),
                            "is_path_segment_reserved(chr(%i))" % i)
        query_reserved = ";/?:@&=+,$"
        for i in range3(0, 256):
            c = character(i)
            self.assertTrue(uri.is_query_reserved(c) == (c in query_reserved),
                            "is_query_reserved(chr(%i))" % i)
Example #27
0
 def test_codecs(self):
     m = ul('Caf\xe9')
     e = structures.XMLEntity(b'Caf\xc3\xa9')
     self.assertTrue(e.bom is False, 'defaulted utf-8 BOM detection')
     for c in m:
         self.assertTrue(
             e.the_char == c,
             "Print: parsing utf-8 got %s instead of %s" %
             (repr(e.the_char), repr(c)))
         e.next_char()
     e = structures.XMLEntity(b'Caf\xe9', 'latin_1')
     self.assertTrue(e.bom is False, 'latin_1 BOM detection')
     for c in m:
         self.assertTrue(
             e.the_char == c,
             "Print: parsing latin-1 got %s instead of %s" %
             (repr(e.the_char), repr(c)))
         e.next_char()
     # This string should be automatically detected
     e = structures.XMLEntity(b'\xff\xfeC\x00a\x00f\x00\xe9\x00')
     self.assertTrue(e.bom is True, 'utf-16-le BOM detection')
     for c in m:
         self.assertTrue(
             e.the_char == c,
             "Print: parsing utf-16LE got %s instead of %s" %
             (repr(e.the_char), repr(c)))
         e.next_char()
     e = structures.XMLEntity(b'\xfe\xff\x00C\x00a\x00f\x00\xe9')
     self.assertTrue(e.bom is True, 'utf-16-be BOM detection')
     for c in m:
         self.assertTrue(
             e.the_char == c,
             "Print: parsing utf-16BE got %s instead of %s" %
             (repr(e.the_char), repr(c)))
         e.next_char()
     e = structures.XMLEntity(b'\xef\xbb\xbfCaf\xc3\xa9', 'utf-8')
     self.assertTrue(e.bom is False, 'utf-8 BOM detection')
     for c in m:
         self.assertTrue(
             e.the_char == c,
             "Print: parsing utf-8 with BOM got %s instead of %s" %
             (repr(e.the_char), repr(c)))
         e.next_char()
     e = structures.XMLEntity(b'Caf\xe9')
     for c in 'Ca':
         e.next_char()
     e.change_encoding('ISO-8859-1')
     self.assertTrue(e.the_char == 'f', "Bad encoding change")
     e.next_char()
     self.assertTrue(
         e.the_char == character(0xE9),
         "Print: change encoding got %s instead of %s" %
         (repr(e.the_char), repr(character(0xE9))))
     e = structures.XMLEntity(b'C\x00a\x00f\x00\xe9\x00', 'utf-16-le')
     self.assertTrue(e.bom is False, 'utf-16-le no BOM detection error')
     for c in m:
         self.assertTrue(
             e.the_char == c,
             "Print: parsing utf-16LE no BOM got %s instead of %s" %
             (repr(e.the_char), repr(c)))
         e.next_char()
     # add <? to trigger auto-detection
     e = structures.XMLEntity(b'\x00<\x00?\x00C\x00a\x00f\x00\xe9')
     self.assertTrue(e.bom is False, 'utf-16-be no BOM detection error')
     for c in ul("<?") + m:
         self.assertTrue(
             e.the_char == c,
             "Print: parsing utf-16BE no BOM got %s instead of %s" %
             (repr(e.the_char), repr(c)))
         e.next_char()
     e = structures.XMLEntity(b'\xfe\xff\xfe\xff\x00C\x00a\x00f\x00\xe9')
     for c in character(0xfeff) + m:
         self.assertTrue(
             e.the_char == c,
             "Print: parsing double BOM got %s instead of %s" %
             (repr(e.the_char), repr(c)))
         e.next_char()
Example #28
0
 def test_codecs(self):
     m = ul("Caf\xe9")
     e = structures.XMLEntity(b"Caf\xc3\xa9")
     self.assertTrue(e.bom is False, "defaulted utf-8 BOM detection")
     for c in m:
         self.assertTrue(e.the_char == c, "Print: parsing utf-8 got %s instead of %s" % (repr(e.the_char), repr(c)))
         e.next_char()
     e = structures.XMLEntity(b"Caf\xe9", "latin_1")
     self.assertTrue(e.bom is False, "latin_1 BOM detection")
     for c in m:
         self.assertTrue(
             e.the_char == c, "Print: parsing latin-1 got %s instead of %s" % (repr(e.the_char), repr(c))
         )
         e.next_char()
     # This string should be automatically detected
     e = structures.XMLEntity(b"\xff\xfeC\x00a\x00f\x00\xe9\x00")
     self.assertTrue(e.bom is True, "utf-16-le BOM detection")
     for c in m:
         self.assertTrue(
             e.the_char == c, "Print: parsing utf-16LE got %s instead of %s" % (repr(e.the_char), repr(c))
         )
         e.next_char()
     e = structures.XMLEntity(b"\xfe\xff\x00C\x00a\x00f\x00\xe9")
     self.assertTrue(e.bom is True, "utf-16-be BOM detection")
     for c in m:
         self.assertTrue(
             e.the_char == c, "Print: parsing utf-16BE got %s instead of %s" % (repr(e.the_char), repr(c))
         )
         e.next_char()
     e = structures.XMLEntity(b"\xef\xbb\xbfCaf\xc3\xa9", "utf-8")
     self.assertTrue(e.bom is False, "utf-8 BOM detection")
     for c in m:
         self.assertTrue(
             e.the_char == c, "Print: parsing utf-8 with BOM got %s instead of %s" % (repr(e.the_char), repr(c))
         )
         e.next_char()
     e = structures.XMLEntity(b"Caf\xe9")
     for c in "Ca":
         e.next_char()
     e.change_encoding("ISO-8859-1")
     self.assertTrue(e.the_char == "f", "Bad encoding change")
     e.next_char()
     self.assertTrue(
         e.the_char == character(0xE9),
         "Print: change encoding got %s instead of %s" % (repr(e.the_char), repr(character(0xE9))),
     )
     e = structures.XMLEntity(b"C\x00a\x00f\x00\xe9\x00", "utf-16-le")
     self.assertTrue(e.bom is False, "utf-16-le no BOM detection error")
     for c in m:
         self.assertTrue(
             e.the_char == c, "Print: parsing utf-16LE no BOM got %s instead of %s" % (repr(e.the_char), repr(c))
         )
         e.next_char()
     # add <? to trigger auto-detection
     e = structures.XMLEntity(b"\x00<\x00?\x00C\x00a\x00f\x00\xe9")
     self.assertTrue(e.bom is False, "utf-16-be no BOM detection error")
     for c in ul("<?") + m:
         self.assertTrue(
             e.the_char == c, "Print: parsing utf-16BE no BOM got %s instead of %s" % (repr(e.the_char), repr(c))
         )
         e.next_char()
     e = structures.XMLEntity(b"\xfe\xff\xfe\xff\x00C\x00a\x00f\x00\xe9")
     for c in character(0xFEFF) + m:
         self.assertTrue(
             e.the_char == c, "Print: parsing double BOM got %s instead of %s" % (repr(e.the_char), repr(c))
         )
         e.next_char()
Example #29
0
    def test_basics(self):
        """Tests for basic character classes.

        alpha = lowalpha | upalpha
        lowalpha = "a" | ... | "z"
        upalpha  = "A" | ... | "Z"
        digit = "0" | ... | "9"
        alphanum = alpha | digit
        reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | ","
        unreserved  = alphanum | mark
        mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
        """
        # UPALPHA = <any US-ASCII uppercase letter "A".."Z">
        upalpha = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
        for i in range3(0, 256):
            c = character(i)
            self.assertTrue(
                uri.is_upalpha(c) == (c in upalpha),
                "is_upalpha(chr(%i))" % i)
        lowalpha = "abcdefghijklmnopqrstuvwxyz"
        for i in range3(0, 256):
            c = character(i)
            self.assertTrue(uri.is_lowalpha(c) == (c in lowalpha),
                            "is_lowalpha(chr(%i))" % i)
        alpha = upalpha + lowalpha
        for i in range3(0, 256):
            c = character(i)
            self.assertTrue(uri.is_alpha(c) == (c in alpha),
                            "is_alpha(chr(%i))" % i)
        digit = "0123456789"
        for i in range3(0, 256):
            c = character(i)
            self.assertTrue(uri.is_digit(c) == (c in digit),
                            "is_digit(chr(%i))" % i)
        alphanum = alpha + digit
        for i in range3(0, 256):
            c = character(i)
            self.assertTrue(uri.is_alphanum(c) == (c in alphanum),
                            "is_alphanum(chr(%i))" % i)
        reserved = ";/?:@&=+$,"
        for i in range3(0, 256):
            c = character(i)
            self.assertTrue(uri.is_reserved_2396(c) == (c in reserved),
                            "is_reserved_2396(chr(%i))" % i)
        reserved = ";/?:@&=+$,[]"
        for i in range3(0, 256):
            c = character(i)
            self.assertTrue(uri.is_reserved(c) == (c in reserved),
                            "is_reserved(chr(%i))" % i)
        mark = "-_.!~*'()"
        for i in range3(0, 256):
            c = character(i)
            self.assertTrue(uri.is_mark(c) == (c in mark),
                            "is_mark(chr(%i))" % i)
        unreserved = alphanum + mark
        for i in range3(0, 256):
            c = character(i)
            self.assertTrue(uri.is_unreserved(c) == (c in unreserved),
                            "is_unreserved(chr(%i))" % i)
        control = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D"\
            "\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C"\
            "\x1D\x1E\x1F\x7F"
        for i in range3(0, 256):
            c = character(i)
            self.assertTrue(
                uri.is_control(c) == (c in control), "is_control(chr(%i))" % i)
        space = " "
        for i in range3(0, 256):
            c = character(i)
            self.assertTrue(uri.is_space(c) == (c in space),
                            "is_space(chr(%i))" % i)
        delims = "<>#%\""
        for i in range3(0, 256):
            c = character(i)
            self.assertTrue(
                uri.is_delims(c) == (c in delims), "is_delims(chr(%i))" % i)
        unwise_2396 = "{}|\\^[]`"
        for i in range3(0, 256):
            c = character(i)
            self.assertTrue(
                uri.is_unwise_2396(c) == (c in unwise_2396),
                "is_unwise_2396(chr(%i))" % i)
        unwise = "{}|\\^`"
        for i in range3(0, 256):
            c = character(i)
            self.assertTrue(
                uri.is_unwise(c) == (c in unwise), "is_unwise(chr(%i))" % i)
        authority_reserved = ";:@?/"
        for i in range3(0, 256):
            c = character(i)
            self.assertTrue(uri.is_authority_reserved(c) == (
                c in authority_reserved), "is_authority_reserved(chr(%i))" % i)
        path_segment_reserved = "/;=?"
        for i in range3(0, 256):
            c = character(i)
            self.assertTrue(uri.is_path_segment_reserved(c) ==
                            (c in path_segment_reserved),
                            "is_path_segment_reserved(chr(%i))" % i)
        query_reserved = ";/?:@&=+,$"
        for i in range3(0, 256):
            c = character(i)
            self.assertTrue(uri.is_query_reserved(c) == (c in query_reserved),
                            "is_query_reserved(chr(%i))" % i)
Example #30
0
 def test_codecs(self):
     m = ul('Caf\xe9')
     e = structures.XMLEntity(b'Caf\xc3\xa9')
     self.assertTrue(e.bom is False, 'defaulted utf-8 BOM detection')
     for c in m:
         self.assertTrue(
             e.the_char == c, "Print: parsing utf-8 got %s instead of %s" %
             (repr(e.the_char), repr(c)))
         e.next_char()
     e = structures.XMLEntity(b'Caf\xe9', 'latin_1')
     self.assertTrue(e.bom is False, 'latin_1 BOM detection')
     for c in m:
         self.assertTrue(
             e.the_char == c,
             "Print: parsing latin-1 got %s instead of %s" %
             (repr(e.the_char), repr(c)))
         e.next_char()
     # This string should be automatically detected
     e = structures.XMLEntity(b'\xff\xfeC\x00a\x00f\x00\xe9\x00')
     self.assertTrue(e.bom is True, 'utf-16-le BOM detection')
     for c in m:
         self.assertTrue(
             e.the_char == c,
             "Print: parsing utf-16LE got %s instead of %s" %
             (repr(e.the_char), repr(c)))
         e.next_char()
     e = structures.XMLEntity(b'\xfe\xff\x00C\x00a\x00f\x00\xe9')
     self.assertTrue(e.bom is True, 'utf-16-be BOM detection')
     for c in m:
         self.assertTrue(
             e.the_char == c,
             "Print: parsing utf-16BE got %s instead of %s" %
             (repr(e.the_char), repr(c)))
         e.next_char()
     e = structures.XMLEntity(b'\xef\xbb\xbfCaf\xc3\xa9', 'utf-8')
     self.assertTrue(e.bom is False, 'utf-8 BOM detection')
     for c in m:
         self.assertTrue(
             e.the_char == c,
             "Print: parsing utf-8 with BOM got %s instead of %s" %
             (repr(e.the_char), repr(c)))
         e.next_char()
     e = structures.XMLEntity(b'Caf\xe9')
     for c in 'Ca':
         e.next_char()
     e.change_encoding('ISO-8859-1')
     self.assertTrue(e.the_char == 'f', "Bad encoding change")
     e.next_char()
     self.assertTrue(
         e.the_char == character(0xE9),
         "Print: change encoding got %s instead of %s" %
         (repr(e.the_char), repr(character(0xE9))))
     e = structures.XMLEntity(b'C\x00a\x00f\x00\xe9\x00', 'utf-16-le')
     self.assertTrue(e.bom is False, 'utf-16-le no BOM detection error')
     for c in m:
         self.assertTrue(
             e.the_char == c,
             "Print: parsing utf-16LE no BOM got %s instead of %s" %
             (repr(e.the_char), repr(c)))
         e.next_char()
     # add <? to trigger auto-detection
     e = structures.XMLEntity(b'\x00<\x00?\x00C\x00a\x00f\x00\xe9')
     self.assertTrue(e.bom is False, 'utf-16-be no BOM detection error')
     for c in ul("<?") + m:
         self.assertTrue(
             e.the_char == c,
             "Print: parsing utf-16BE no BOM got %s instead of %s" %
             (repr(e.the_char), repr(c)))
         e.next_char()
     e = structures.XMLEntity(b'\xfe\xff\xfe\xff\x00C\x00a\x00f\x00\xe9')
     for c in character(0xfeff) + m:
         self.assertTrue(
             e.the_char == c,
             "Print: parsing double BOM got %s instead of %s" %
             (repr(e.the_char), repr(c)))
         e.next_char()
Example #31
0
 def test_letnum(self):
     """Basic syntax definitions::
         <let-num> ::= <upper> | <lower> | <number>
         <let-num-hyp> ::= <upper> | <lower> | <number> | "-"
     """
     for i in range3(0x00, 0x2D):
         self.assertFalse(urn.is_upper(character(i)))
         self.assertFalse(urn.is_lower(character(i)))
         self.assertFalse(urn.is_number(character(i)))
         self.assertFalse(urn.is_letnum(character(i)))
         self.assertFalse(urn.is_letnumhyp(character(i)))
     self.assertFalse(urn.is_upper(character(0x2D)))
     self.assertFalse(urn.is_lower(character(0x2D)))
     self.assertFalse(urn.is_number(character(0x2D)))
     self.assertFalse(urn.is_letnum(character(0x2D)))
     self.assertTrue(urn.is_letnumhyp(character(0x2D)))
     for i in range3(0x2E, 0x30):
         self.assertFalse(urn.is_upper(character(i)))
         self.assertFalse(urn.is_lower(character(i)))
         self.assertFalse(urn.is_number(character(i)))
         self.assertFalse(urn.is_letnum(character(i)))
         self.assertFalse(urn.is_letnumhyp(character(i)))
     for i in range3(0x30, 0x3A):
         self.assertFalse(urn.is_upper(character(i)))
         self.assertFalse(urn.is_lower(character(i)))
         self.assertTrue(urn.is_number(character(i)))
         self.assertTrue(urn.is_letnum(character(i)))
         self.assertTrue(urn.is_letnumhyp(character(i)))
     for i in range3(0x3A, 0x41):
         self.assertFalse(urn.is_upper(character(i)))
         self.assertFalse(urn.is_lower(character(i)))
         self.assertFalse(urn.is_number(character(i)))
         self.assertFalse(urn.is_letnum(character(i)))
         self.assertFalse(urn.is_letnumhyp(character(i)))
     for i in range3(0x41, 0x5B):
         self.assertTrue(urn.is_upper(character(i)))
         self.assertFalse(urn.is_lower(character(i)))
         self.assertFalse(urn.is_number(character(i)))
         self.assertTrue(urn.is_letnum(character(i)))
         self.assertTrue(urn.is_letnumhyp(character(i)))
     for i in range3(0x5B, 0x61):
         self.assertFalse(urn.is_upper(character(i)))
         self.assertFalse(urn.is_lower(character(i)))
         self.assertFalse(urn.is_number(character(i)))
         self.assertFalse(urn.is_letnum(character(i)))
         self.assertFalse(urn.is_letnumhyp(character(i)))
     for i in range3(0x61, 0x7B):
         self.assertFalse(urn.is_upper(character(i)))
         self.assertTrue(urn.is_lower(character(i)))
         self.assertFalse(urn.is_number(character(i)))
         self.assertTrue(urn.is_letnum(character(i)))
         self.assertTrue(urn.is_letnumhyp(character(i)))
     for i in range3(0x7B, 0xFF):
         self.assertFalse(urn.is_upper(character(i)))
         self.assertFalse(urn.is_lower(character(i)))
         self.assertFalse(urn.is_number(character(i)))
         self.assertFalse(urn.is_letnum(character(i)))
         self.assertFalse(urn.is_letnumhyp(character(i)))
Example #32
0
 def test_trans(self):
     # controls
     for i in range3(0x00, 0x21):
         self.assertFalse(urn.is_reserved(character(i)))
         self.assertFalse(urn.is_other(character(i)))
         self.assertFalse(urn.is_hex(character(i)))
         self.assertFalse(urn.is_trans(character(i)))
     # !
     self.assertFalse(urn.is_reserved(character(0x21)))
     self.assertTrue(urn.is_other(character(0x21)))
     self.assertFalse(urn.is_hex(character(0x21)))
     self.assertTrue(urn.is_trans(character(0x21)))
     # "
     self.assertFalse(urn.is_reserved(character(0x22)))
     self.assertFalse(urn.is_other(character(0x22)))
     self.assertFalse(urn.is_hex(character(0x22)))
     self.assertFalse(urn.is_trans(character(0x22)))
     # #
     self.assertTrue(urn.is_reserved(character(0x23)))
     self.assertFalse(urn.is_other(character(0x23)))
     self.assertFalse(urn.is_hex(character(0x23)))
     self.assertTrue(urn.is_trans(character(0x23)))
     # $
     self.assertFalse(urn.is_reserved(character(0x24)))
     self.assertTrue(urn.is_other(character(0x24)))
     self.assertFalse(urn.is_hex(character(0x24)))
     self.assertTrue(urn.is_trans(character(0x24)))
     # %
     self.assertTrue(urn.is_reserved(character(0x25)))
     self.assertFalse(urn.is_other(character(0x25)))
     self.assertFalse(urn.is_hex(character(0x25)))
     self.assertTrue(urn.is_trans(character(0x25)))
     # &
     self.assertFalse(urn.is_reserved(character(0x26)))
     self.assertFalse(urn.is_other(character(0x26)))
     self.assertFalse(urn.is_hex(character(0x26)))
     self.assertFalse(urn.is_trans(character(0x26)))
     # ' ( ) * + , - .
     for i in range3(0x27, 0x2F):
         self.assertFalse(urn.is_reserved(character(i)))
         self.assertTrue(urn.is_other(character(i)))
         self.assertFalse(urn.is_hex(character(i)))
         self.assertTrue(urn.is_trans(character(i)))
     # /
     self.assertTrue(urn.is_reserved(character(0x2F)))
     self.assertFalse(urn.is_other(character(0x2F)))
     self.assertFalse(urn.is_hex(character(0x2F)))
     self.assertTrue(urn.is_trans(character(0x2F)))
     # digits
     for i in range3(0x30, 0x3A):
         self.assertFalse(urn.is_reserved(character(i)))
         self.assertFalse(urn.is_other(character(i)))
         self.assertTrue(urn.is_hex(character(i)))
         self.assertTrue(urn.is_trans(character(i)))
     # : ;
     for i in range3(0x3A, 0x3C):
         self.assertFalse(urn.is_reserved(character(i)))
         self.assertTrue(urn.is_other(character(i)))
         self.assertFalse(urn.is_hex(character(i)))
         self.assertTrue(urn.is_trans(character(i)))
     # <
     self.assertFalse(urn.is_reserved(character(0x3C)))
     self.assertFalse(urn.is_other(character(0x3C)))
     self.assertFalse(urn.is_hex(character(0x3C)))
     self.assertFalse(urn.is_trans(character(0x3C)))
     # =
     self.assertFalse(urn.is_reserved(character(0x3D)))
     self.assertTrue(urn.is_other(character(0x3D)))
     self.assertFalse(urn.is_hex(character(0x3D)))
     self.assertTrue(urn.is_trans(character(0x3D)))
     # >
     self.assertFalse(urn.is_reserved(character(0x3E)))
     self.assertFalse(urn.is_other(character(0x3E)))
     self.assertFalse(urn.is_hex(character(0x3E)))
     self.assertFalse(urn.is_trans(character(0x3E)))
     # ?
     self.assertTrue(urn.is_reserved(character(0x3F)))
     self.assertFalse(urn.is_other(character(0x3F)))
     self.assertFalse(urn.is_hex(character(0x3F)))
     self.assertTrue(urn.is_trans(character(0x3F)))
     # @
     self.assertFalse(urn.is_reserved(character(0x40)))
     self.assertTrue(urn.is_other(character(0x40)))
     self.assertFalse(urn.is_hex(character(0x40)))
     self.assertTrue(urn.is_trans(character(0x40)))
     # A-F
     for i in range3(0x41, 0x47):
         self.assertFalse(urn.is_reserved(character(i)))
         self.assertFalse(urn.is_other(character(i)))
         self.assertTrue(urn.is_hex(character(i)))
         self.assertTrue(urn.is_trans(character(i)))
     # G-Z
     for i in range3(0x47, 0x5B):
         self.assertFalse(urn.is_reserved(character(i)))
         self.assertFalse(urn.is_other(character(i)))
         self.assertFalse(urn.is_hex(character(i)))
         self.assertTrue(urn.is_trans(character(i)))
     # [ \ ] ^
     for i in range3(0x5B, 0x5F):
         self.assertFalse(urn.is_reserved(character(i)))
         self.assertFalse(urn.is_other(character(i)))
         self.assertFalse(urn.is_hex(character(i)))
         self.assertFalse(urn.is_trans(character(i)))
     # _
     self.assertFalse(urn.is_reserved(character(0x5F)))
     self.assertTrue(urn.is_other(character(0x5F)))
     self.assertFalse(urn.is_hex(character(0x5F)))
     self.assertTrue(urn.is_trans(character(0x5F)))
     # `
     self.assertFalse(urn.is_reserved(character(0x60)))
     self.assertFalse(urn.is_other(character(0x60)))
     self.assertFalse(urn.is_hex(character(0x60)))
     self.assertFalse(urn.is_trans(character(0x60)))
     # a-f
     for i in range3(0x61, 0x67):
         self.assertFalse(urn.is_reserved(character(i)))
         self.assertFalse(urn.is_other(character(i)))
         self.assertTrue(urn.is_hex(character(i)))
         self.assertTrue(urn.is_trans(character(i)))
     # g-z
     for i in range3(0x67, 0x7B):
         self.assertFalse(urn.is_reserved(character(i)))
         self.assertFalse(urn.is_other(character(i)))
         self.assertFalse(urn.is_hex(character(i)))
         self.assertTrue(urn.is_trans(character(i)))
     # { | } ~ control and 8-bit characters
     for i in range3(0x7B, 0xFF):
         self.assertFalse(urn.is_reserved(character(i)))
         self.assertFalse(urn.is_other(character(i)))
         self.assertFalse(urn.is_hex(character(i)))
         self.assertFalse(urn.is_trans(character(i)))