Example #1
0
    def test_nid(self):
        """Syntax for nid::

            <NID> ::= <let-num> [ 1,31<let-num-hyp> ]

        The Namespace Identifier is case insensitive

        To avoid confusion with the "urn:" identifier, the NID "urn" is
        reserved and MUST NOT be used."""
        positive = [
            "a", "A", "0", "z", "Z", "9",
            "abB2-", "aB2-b", "a2-bB", "a-bB2",
            "abcdef1234567890ABCDEF1234567890",
            "0-------------------------------",
            "12345678901234567890123456789012",
            "isbn", "ISBN"]
        for nid in positive:
            try:
                u = urn.URN(nid=nid, nss="bar")
            except uri.URIException:
                self.fail("POSTIVE nid test: %s" % nid)
            self.assertTrue(u.nid == nid, "case preserved")
            uparsed = uri.URI.from_octets('urn:%s:bar' % nid)
            self.assertTrue(uparsed.nid == nid, "parsed case preserved")
            self.assertTrue(uparsed == u)
            self.assertTrue(str(uparsed) == str(u))
            # The Namespace Identifier is case insensitive
            uupper = urn.URN(nid=nid.upper(), nss="bar")
            ulower = urn.URN(nid=nid.lower(), nss="bar")
            self.assertTrue(ulower == uupper)
            self.assertTrue(ulower == u)
            self.assertTrue(uupper == u)
            self.assertTrue(u.nid == nid, "case preserved")
            # canonical form is lower case
            ucanonical = u.canonicalize()
            self.assertTrue(str(ucanonical) == str(ulower))
            if nid != nid.lower():
                self.assertFalse(str(ucanonical) == str(u))
                self.assertFalse(str(ucanonical) == str(uupper))
        negative = [
            "", "-", "-a", "-A", "-0",
            # check '.' and '+' specifically
            ".", "+", "a.", "a+", "a+b", "a.b",
            "abcdef1234567890ABCDEF1234567890a",
            # check urn specifically
            "urn", "URN", "Urn"]
        for nid in negative:
            try:
                u = urn.URN(nid=nid, nss="bar")
                self.fail("NEGATIVE nid test: %s" % nid)
            except uri.URIException:
                pass
            try:
                u = uri.URI.from_octets('urn:%s:bar' % nid)
                self.fail("NEGATIVE nid parse test: %s" % nid)
            except uri.URIException:
                pass
Example #2
0
 def test_constructor(self):
     try:
         u = urn.URN()
         self.fail("empty constructor")
     except ValueError:
         pass
     try:
         u = urn.URN(nid="foo")
         self.fail("namespace-specific string missing")
     except ValueError:
         pass
     u = urn.URN(nid="foo", nss="a123,456")
     self.assertTrue(isinstance(u, urn.URN))
     self.assertTrue(str(u) == "urn:foo:a123,456")
     self.assertTrue(is_unicode(u.nid))
     self.assertTrue(is_unicode(u.nss))
     u = uri.URI.from_octets('urn:foo:a123,456')
     self.assertTrue(isinstance(u, urn.URN))
     self.assertTrue(str(u) == 'urn:foo:a123,456')
Example #3
0
 def test_scheme_case(self):
     """The leading "urn:" sequence is case-insensitive."""
     u0 = urn.URN(nid="foo", nss="bar")
     u1 = uri.URI.from_octets('URN:foo:bar')
     u2 = uri.URI.from_octets('urn:foo:bar')
     self.assertTrue(u0 == u1)
     self.assertTrue(u0 == u2)
     self.assertTrue(u1 == u2)
     self.assertFalse(str(u0) == str(u1))
     self.assertTrue(str(u0) == str(u2))
     self.assertFalse(str(u1) == str(u2))
     # canonical form is lower case scheme
     self.assertFalse(str(u0.canonicalize()) == str(u1))
     self.assertTrue(str(u0.canonicalize()) == str(u2))
     self.assertTrue(str(u1.canonicalize()) == str(u2))
     self.assertTrue(str(u1.canonicalize()) == str(u0))
     self.assertTrue(str(u2.canonicalize()) == str(u0))
     self.assertFalse(str(u2.canonicalize()) == str(u1))
Example #4
0
    def test_nss(self):
        """Syntax for URN char::

            <trans> | "%" <hex> <hex>

        Translation is done by encoding each character outside the URN
        character set as a sequence of one to six octets using UTF-8
        encoding [5], and the encoding of each of those octets as "%"
        followed by two characters from the <hex> character set above.

        the character [%] used in a literal sense MUST be encoded

        a character MUST NOT be "%"-encoded if the character is not a
        reserved character

        SHOULD NOT use [other reserved characters] characters in
        unencoded form

        each character outside the URN character set [is encoded] as a
        sequence of one to six octets using UTF-8 encoding

        The presence of an "%" character in an URN MUST be followed by
        two characters from the <hex> character set

        In addition, octet 0 (0 hex) should NEVER be used, in either
        unencoded or %-encoded form."""
        trans_tests = {
            ul('\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10'
               '\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f '
               '!"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\'
               ']^_`abcdefghijklmnopqrstuvwxyz{|}~\x7f'):
            '%01%02%03%04%05%06%07%08%09%0A%0B%0C%0D%0E%0F%10'
            '%11%12%13%14%15%16%17%18%19%1A%1B%1C%1D%1E%1F%20'
            '!%22%23$%25%26\'()*+,-.%2F0123456789:;%3C=%3E%3F@ABCDEFGHIJKLMN'
            'OPQRSTUVWXYZ%5B%5C%5D%5E_%60abcdefghijklmnopqrstuvwxyz%7B%7C%7D'
            '%7E%7F',
            u8(b'\xe8\x8b\xb1\xe5\x9b\xbd'): '%E8%8B%B1%E5%9B%BD',
            ul('Caf\xe9'): 'Caf%C3%A9'
            }
        for src, dst in dict_items(trans_tests):
            self.assertTrue(
                urn.translate_to_urnchar(src) == dst,
                "%s -> \n%s, expected \n%s" %
                (repr(src),
                 repr(urn.translate_to_urnchar(src)),
                 repr(dst)))
            self.assertTrue(
                urn.translate_from_urnchar(dst) == src,
                "%s -> \n%s, expected \n%s" %
                (repr(dst),
                 repr(urn.translate_from_urnchar(dst)), repr(src)))
            u = urn.URN(nid='foo', nss=dst)
            self.assertTrue(u.nss == dst)
            u = uri.URI.from_octets('urn:foo:%s' % dst)
            self.assertTrue(u.nss == dst)
        for wrong in ("100% wrong", "Zero%00"):
            try:
                urn.translate_from_urnchar(wrong)
                self.fail("%s test in URN" % repr(wrong))
            except ValueError:
                pass
        try:
            urn.translate_to_urnchar("Zero\x00Byte")
            self.fail("Zero byte test in URN")
        except ValueError:
            pass
        # let's invent a scheme whereby the reserved characters
        # include . which is reserved for special meaning and
        # / is used unencoded as a path separator (even though
        # it is reserved and *SHOULD* be encoded

        def dot(c):
            return c == "."

        src = "urn:path:.steve/file%2Ename/easy_come%2Feasy_go"
        u = uri.URI.from_octets(src)
        path = u.nss.replace('.', 'users/')
        path = [urn.translate_from_urnchar(s) for s in path.split('/')]
        self.assertTrue(path == [
            'users', 'steve', 'file.name', 'easy_come/easy_go'],
            "Parsed: %s" % repr(path))
        path = path[1:]
        # / is always reserved so we don't need to call this out
        path = [urn.translate_to_urnchar(x, dot) for x in path]
        # add the newly reserved characters after translation...
        path = '.' + '/'.join(path)
        u2 = urn.URN(nid='path', nss=path)
        self.assertTrue(u == u2)
        self.assertTrue(str(u) == str(u2))