Ejemplo n.º 1
0
class NamePrep:
    """ Implements preparation of internationalized domain names.

    This class implements preparing internationalized domain names using the
    rules defined in RFC 3491, section 4 (Conversion operations).

    We do not perform step 4 since we deal with unicode representations of
    domain names and do not convert from or to ASCII representations using
    punycode encoding. When such a conversion is needed, the C{idna} standard
    library provides the C{ToUnicode()} and C{ToASCII()} functions. Note that
    C{idna} itself assumes UseSTD3ASCIIRules to be false.

    The following steps are performed by C{prepare()}:

      - Split the domain name in labels at the dots (RFC 3490, 3.1)
      - Apply nameprep proper on each label (RFC 3491)
      - Enforce the restrictions on ASCII characters in host names by
        assuming STD3ASCIIRules to be true. (STD 3)
      - Rejoin the labels using the label separator U+002E (full stop).

    """

    # Prohibited characters.
    prohibiteds = [
        unichr(n)
        for n in chain(range(0x00, 0x2c + 1), range(0x2e, 0x2f + 1),
                       range(0x3a, 0x40 + 1), range(0x5b, 0x60 +
                                                    1), range(0x7b, 0x7f + 1))
    ]

    def prepare(self, string):
        result = []

        labels = idna.dots.split(string)

        if labels and len(labels[-1]) == 0:
            trailing_dot = u'.'
            del labels[-1]
        else:
            trailing_dot = u''

        for label in labels:
            result.append(self.nameprep(label))

        return u".".join(result) + trailing_dot

    def check_prohibiteds(self, string):
        for c in string:
            if c in self.prohibiteds:
                raise UnicodeError("Invalid character %s" % repr(c))

    def nameprep(self, label):
        label = idna.nameprep(label)
        self.check_prohibiteds(label)
        if label[0] == u'-':
            raise UnicodeError("Invalid leading hyphen-minus")
        if label[-1] == u'-':
            raise UnicodeError("Invalid trailing hyphen-minus")
        return label
Ejemplo n.º 2
0
 def test_unichr(self):
     """
     unichar exists and returns a unicode string with the given code point.
     """
     self.assertEqual(unichr(0x2603), u"\N{SNOWMAN}")
Ejemplo n.º 3
0
Archivo: test.py Proyecto: wl86/amazon
#
#
# def GBK2312():
#     password = ''
#     while True:
#         head = random.randint(0xb0, 0xf7)
#         body = random.randint(0xa1, 0xf9)  # 在head区号为55的那一块最后5个汉字是乱码,为了方便缩减下范围
#         val = f'{head:x}{body:x}'
#         str = bytes.fromhex(val).decode('gb2312')
#         if str:
#             password = str
#         else:
#
#
#         return str
#
#
# for i in range(10):
#     g = GBK2312()
#     print(g)

import codecs

from twisted.python.compat import unichr

start,end = (0x4E00, 0x9FA5)

with codecs.open("chinese.txt", "wb", encoding="utf-8") as f:
 for codepoint in range(int(start),int(end)):
  f.write(unichr(codepoint))  #写出汉字