Example #1
1
def _fixcp1252(envkey=None):
    """
    Fixup cp1252 codec in order to use it as a real superset of latin-1

    :Parameters:
     - `envkey`: The environment key to lookup. If this key is set and ``1``
       the charset definition won't be fixed and this function is a no-op.
       If unset or ``None``, no lookup is made.

    :Types:
     - `envkey`: ``str``
    """
    import os
    if envkey is not None and os.environ.get(envkey) == '1':
        return

    import codecs
    from encodings import cp1252

    try:
        dmap = cp1252.decoding_map # pylint: disable = E1101
    except AttributeError:
        dtable = list(cp1252.decoding_table)
        codepoint = 0
        try:
            while True:
                codepoint = dtable.index(u'\ufffe', codepoint)
                dtable[codepoint] = unichr(codepoint)
        except ValueError:
            # no more undefined points there
            pass
        dtable = u''.join(dtable)
        cp1252.decoding_table = dtable
        cp1252.encoding_table = codecs.charmap_build(dtable)
    else:
        # Python 2.4
        for key, value in dmap.iteritems():
            if value is None:
                dmap[key] = key
        cp1252.encoding_map = codecs.make_encoding_map(dmap)
Example #2
0
def _restricted_codec(name, chars):
	INVALID = u'\uFFFE'
	decoding_table = (u''.join([INVALID for i in xrange(0x00, 0x20)]) +
					  u''.join([unichr(i) if (chars is None or chr(i) in chars) else INVALID
								for i in xrange(0x20, 0x7F)]) +
					  u''.join([INVALID for i in xrange(0x7F, 0x100)]))
	encoding_table = codecs.charmap_build(decoding_table)
	_charmap_codec(name, decoding_table, encoding_table)
Example #3
0
def _teletex_codec(name):
	# this is actually ISO register entries 6 and 156, a (mostly) superset
	# of entries 102 and 103, as allowed by ITU-T rec X.680 
	decoding_table = (u''.join([unichr(i) for i in xrange(0xA0)]) +
					  u'\u00A0\u00A1\u00A2\u00A3\uFFFE\u00A5\uFFFE\u00A7'
					  u'\u00A4\u2018\u201C\u00AB\u2190\u2191\u2192\u2193'
					  u'\u00B0\u00B1\u00B2\u00B3\u00D7\u00B5\u00B6\u00B7'
					  u'\u00F7\u2019\u201D\u00BB\u00BC\u00BD\u00BE\u00BF'
					  u'\uFFFE\u0300\u0301\u0302\u0303\u0304\u0306\u0307'
					  u'\u0308\uFFFE\u030A\u0327\u0332\u030B\u0328\u030C'
					  u'\u2015\u00B9\u00AE\u00A9\u2122\u266a\u00AC\u00A6'
					  u'\uFFFE\uFFFE\uFFFE\uFFFE\u215B\u215C\u215D\u215E'
					  u'\u2126\u00C6\u00D0\u00AA\u0126\uFFFE\u0132\u013F'
					  u'\u0141\u00D8\u0152\u00BA\u00DE\u0166\u014A\u0149'
					  u'\u0138\u00E6\u0111\u00F0\u0127\u0131\u0133\u0140'
					  u'\u0142\u00F8\u0153\u00DF\u00FE\u0167\u014B\u00AD')
	encoding_table = codecs.charmap_build(decoding_table)
	_charmap_codec(name, decoding_table, encoding_table)
Example #4
0
class IncrementalEncoder(codecs.IncrementalEncoder):

    def encode(self, input, final = False):
        return codecs.charmap_encode(input, self.errors, encoding_table)[0]


class IncrementalDecoder(codecs.IncrementalDecoder):

    def decode(self, input, final = False):
        return codecs.charmap_decode(input, self.errors, decoding_table)[0]


class StreamWriter(Codec, codecs.StreamWriter):
    pass


class StreamReader(Codec, codecs.StreamReader):
    pass


def getregentry():
    return codecs.CodecInfo(name='cp1250', encode=Codec().encode, decode=Codec().decode, incrementalencoder=IncrementalEncoder, incrementaldecoder=IncrementalDecoder, streamreader=StreamReader, streamwriter=StreamWriter)


decoding_table = u'\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7f\u20ac\ufffe\u201a\ufffe\u201e\u2026\u2020\u2021\ufffe\u2030\u0160\u2039\u015a\u0164\u017d\u0179\ufffe\u2018\u2019\u201c\u201d\u2022\u2013\u2014\ufffe\u2122\u0161\u203a\u015b\u0165\u017e\u017a\xa0\u02c7\u02d8\u0141\xa4\u0104\xa6\xa7\xa8\xa9\u015e\xab\xac\xad\xae\u017b\xb0\xb1\u02db\u0142\xb4\xb5\xb6\xb7\xb8\u0105\u015f\xbb\u013d\u02dd\u013e\u017c\u0154\xc1\xc2\u0102\xc4\u0139\u0106\xc7\u010c\xc9\u0118\xcb\u011a\xcd\xce\u010e\u0110\u0143\u0147\xd3\xd4\u0150\xd6\xd7\u0158\u016e\xda\u0170\xdc\xdd\u0162\xdf\u0155\xe1\xe2\u0103\xe4\u013a\u0107\xe7\u010d\xe9\u0119\xeb\u011b\xed\xee\u010f\u0111\u0144\u0148\xf3\xf4\u0151\xf6\xf7\u0159\u016f\xfa\u0171\xfc\xfd\u0163\u02d9'
encoding_table = codecs.charmap_build(decoding_table)
# okay decompyling c:\Users\PC\wotsources\files\originals\res_bw\scripts\common\lib\encodings\cp1250.pyc 
# decompiled 1 files: 1 okay, 0 failed, 0 verify failed
# 2016.08.04 19:59:12 Støední Evropa (letní èas)
Example #5
0
    def test_objecttypes(self):
        # check all types defined in Objects/
        size = test.test_support.calcobjsize
        vsize = test.test_support.calcvobjsize
        check = self.check_sizeof
        # bool
        check(True, size("l"))
        # buffer
        with test.test_support.check_py3k_warnings():
            check(buffer(""), size("2P2Pil"))
        # builtin_function_or_method
        check(len, size("3P"))
        # bytearray
        samples = ["", "u" * 100000]
        for sample in samples:
            x = bytearray(sample)
            check(x, vsize("iPP") + x.__alloc__())
        # bytearray_iterator
        check(iter(bytearray()), size("PP"))
        # cell
        def get_cell():
            x = 42

            def inner():
                return x

            return inner

        check(get_cell().func_closure[0], size("P"))
        # classobj (old-style class)
        class class_oldstyle:
            def method():
                pass

        check(class_oldstyle, size("7P"))
        # instance (old-style class)
        check(class_oldstyle(), size("3P"))
        # instancemethod (old-style class)
        check(class_oldstyle().method, size("4P"))
        # complex
        check(complex(0, 1), size("2d"))
        # code
        check(get_cell().func_code, size("4i8Pi3P"))
        # BaseException
        check(BaseException(), size("3P"))
        # UnicodeEncodeError
        check(UnicodeEncodeError("", u"", 0, 0, ""), size("5P2PP"))
        # UnicodeDecodeError
        check(UnicodeDecodeError("", "", 0, 0, ""), size("5P2PP"))
        # UnicodeTranslateError
        check(UnicodeTranslateError(u"", 0, 1, ""), size("5P2PP"))
        # method_descriptor (descriptor object)
        check(str.lower, size("2PP"))
        # classmethod_descriptor (descriptor object)
        # XXX
        # member_descriptor (descriptor object)
        import datetime

        check(datetime.timedelta.days, size("2PP"))
        # getset_descriptor (descriptor object)
        import __builtin__

        check(__builtin__.file.closed, size("2PP"))
        # wrapper_descriptor (descriptor object)
        check(int.__add__, size("2P2P"))
        # dictproxy
        class C(object):
            pass

        check(C.__dict__, size("P"))
        # method-wrapper (descriptor object)
        check({}.__iter__, size("2P"))
        # dict
        check({}, size("3P2P" + 8 * "P2P"))
        x = {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8}
        check(x, size("3P2P" + 8 * "P2P") + 16 * struct.calcsize("P2P"))
        # dictionary-keyiterator
        check({}.iterkeys(), size("P2PPP"))
        # dictionary-valueiterator
        check({}.itervalues(), size("P2PPP"))
        # dictionary-itemiterator
        check({}.iteritems(), size("P2PPP"))
        # ellipses
        check(Ellipsis, size(""))
        # EncodingMap
        import codecs, encodings.iso8859_3

        x = codecs.charmap_build(encodings.iso8859_3.decoding_table)
        check(x, size("32B2iB"))
        # enumerate
        check(enumerate([]), size("l3P"))
        # file
        check(self.file, size("4P2i4P3i3P3i"))
        # float
        check(float(0), size("d"))
        # sys.floatinfo
        check(sys.float_info, vsize("") + self.P * len(sys.float_info))
        # frame
        import inspect

        CO_MAXBLOCKS = 20
        x = inspect.currentframe()
        ncells = len(x.f_code.co_cellvars)
        nfrees = len(x.f_code.co_freevars)
        extras = x.f_code.co_stacksize + x.f_code.co_nlocals + ncells + nfrees - 1
        check(x, vsize("12P3i" + CO_MAXBLOCKS * "3i" + "P" + extras * "P"))
        # function
        def func():
            pass

        check(func, size("9P"))

        class c:
            @staticmethod
            def foo():
                pass

            @classmethod
            def bar(cls):
                pass

            # staticmethod
            check(foo, size("P"))
            # classmethod
            check(bar, size("P"))

        # generator
        def get_gen():
            yield 1

        check(get_gen(), size("Pi2P"))
        # integer
        check(1, size("l"))
        check(100, size("l"))
        # iterator
        check(iter("abc"), size("lP"))
        # callable-iterator
        import re

        check(re.finditer("", ""), size("2P"))
        # list
        samples = [[], [1, 2, 3], ["1", "2", "3"]]
        for sample in samples:
            check(sample, vsize("PP") + len(sample) * self.P)
        # sortwrapper (list)
        # XXX
        # cmpwrapper (list)
        # XXX
        # listiterator (list)
        check(iter([]), size("lP"))
        # listreverseiterator (list)
        check(reversed([]), size("lP"))
        # long
        check(0L, vsize(""))
        check(1L, vsize("") + self.longdigit)
        check(-1L, vsize("") + self.longdigit)
        PyLong_BASE = 2 ** sys.long_info.bits_per_digit
        check(long(PyLong_BASE), vsize("") + 2 * self.longdigit)
        check(long(PyLong_BASE ** 2 - 1), vsize("") + 2 * self.longdigit)
        check(long(PyLong_BASE ** 2), vsize("") + 3 * self.longdigit)
        # module
        check(unittest, size("P"))
        # None
        check(None, size(""))
        # object
        check(object(), size(""))
        # property (descriptor object)
        class C(object):
            def getx(self):
                return self.__x

            def setx(self, value):
                self.__x = value

            def delx(self):
                del self.__x

            x = property(getx, setx, delx, "")
            check(x, size("4Pi"))

        # PyCObject
        # PyCapsule
        # XXX
        # rangeiterator
        check(iter(xrange(1)), size("4l"))
        # reverse
        check(reversed(""), size("PP"))
        # set
        # frozenset
        PySet_MINSIZE = 8
        samples = [[], range(10), range(50)]
        s = size("3P2P" + PySet_MINSIZE * "lP" + "lP")
        for sample in samples:
            minused = len(sample)
            if minused == 0:
                tmp = 1
            # the computation of minused is actually a bit more complicated
            # but this suffices for the sizeof test
            minused = minused * 2
            newsize = PySet_MINSIZE
            while newsize <= minused:
                newsize = newsize << 1
            if newsize <= 8:
                check(set(sample), s)
                check(frozenset(sample), s)
            else:
                check(set(sample), s + newsize * struct.calcsize("lP"))
                check(frozenset(sample), s + newsize * struct.calcsize("lP"))
        # setiterator
        check(iter(set()), size("P3P"))
        # slice
        check(slice(1), size("3P"))
        # str
        vh = test.test_support._vheader
        check("", struct.calcsize(vh + "lic"))
        check("abc", struct.calcsize(vh + "lic") + 3)
        # super
        check(super(int), size("3P"))
        # tuple
        check((), vsize(""))
        check((1, 2, 3), vsize("") + 3 * self.P)
        # tupleiterator
        check(iter(()), size("lP"))
        # type
        # (PyTypeObject + PyNumberMethods +  PyMappingMethods +
        #  PySequenceMethods + PyBufferProcs)
        s = vsize("P2P15Pl4PP9PP11PI") + struct.calcsize("41P 10P 3P 6P")

        class newstyleclass(object):
            pass

        check(newstyleclass, s)
        # builtin type
        check(int, s)
        # NotImplementedType
        import types

        check(types.NotImplementedType, s)
        # unicode
        usize = len(u"\0".encode("unicode-internal"))
        samples = [u"", u"1" * 100]
        # we need to test for both sizes, because we don't know if the string
        # has been cached
        for s in samples:
            check(s, size("PPlP") + usize * (len(s) + 1))
        # weakref
        import weakref

        check(weakref.ref(int), size("2Pl2P"))
        # weakproxy
        # XXX
        # weakcallableproxy
        check(weakref.proxy(int), size("2Pl2P"))
        # xrange
        check(xrange(1), size("3l"))
        check(xrange(66000), size("3l"))
 def test_objecttypes(self):
     # check all types defined in Objects/
     size = test.support.calcobjsize
     vsize = test.support.calcvobjsize
     check = self.check_sizeof
     # bool
     check(True, vsize('') + self.longdigit)
     # buffer
     # XXX
     # builtin_function_or_method
     check(len, size('4P')) # XXX check layout
     # bytearray
     samples = [b'', b'u'*100000]
     for sample in samples:
         x = bytearray(sample)
         check(x, vsize('n2Pi') + x.__alloc__())
     # bytearray_iterator
     check(iter(bytearray()), size('nP'))
     # bytes
     check(b'', vsize('n') + 1)
     check(b'x' * 10, vsize('n') + 11)
     # cell
     def get_cell():
         x = 42
         def inner():
             return x
         return inner
     check(get_cell().__closure__[0], size('P'))
     # code
     check(get_cell().__code__, size('5i9Pi3P'))
     check(get_cell.__code__, size('5i9Pi3P'))
     def get_cell2(x):
         def inner():
             return x
         return inner
     check(get_cell2.__code__, size('5i9Pi3P') + 1)
     # complex
     check(complex(0,1), size('2d'))
     # method_descriptor (descriptor object)
     check(str.lower, size('3PP'))
     # classmethod_descriptor (descriptor object)
     # XXX
     # member_descriptor (descriptor object)
     import datetime
     check(datetime.timedelta.days, size('3PP'))
     # getset_descriptor (descriptor object)
     import collections
     check(collections.defaultdict.default_factory, size('3PP'))
     # wrapper_descriptor (descriptor object)
     check(int.__add__, size('3P2P'))
     # method-wrapper (descriptor object)
     check({}.__iter__, size('2P'))
     # dict
     check({}, size('n2P' + '2nPn' + 8*'n2P'))
     longdict = {1:1, 2:2, 3:3, 4:4, 5:5, 6:6, 7:7, 8:8}
     check(longdict, size('n2P' + '2nPn') + 16*struct.calcsize('n2P'))
     # dictionary-keyiterator
     check({}.keys(), size('P'))
     # dictionary-valueiterator
     check({}.values(), size('P'))
     # dictionary-itemiterator
     check({}.items(), size('P'))
     # dictionary iterator
     check(iter({}), size('P2nPn'))
     # dictproxy
     class C(object): pass
     check(C.__dict__, size('P'))
     # BaseException
     check(BaseException(), size('5Pb'))
     # UnicodeEncodeError
     check(UnicodeEncodeError("", "", 0, 0, ""), size('5Pb 2P2nP'))
     # UnicodeDecodeError
     check(UnicodeDecodeError("", b"", 0, 0, ""), size('5Pb 2P2nP'))
     # UnicodeTranslateError
     check(UnicodeTranslateError("", 0, 1, ""), size('5Pb 2P2nP'))
     # ellipses
     check(Ellipsis, size(''))
     # EncodingMap
     import codecs, encodings.iso8859_3
     x = codecs.charmap_build(encodings.iso8859_3.decoding_table)
     check(x, size('32B2iB'))
     # enumerate
     check(enumerate([]), size('n3P'))
     # reverse
     check(reversed(''), size('nP'))
     # float
     check(float(0), size('d'))
     # sys.floatinfo
     check(sys.float_info, vsize('') + self.P * len(sys.float_info))
     # frame
     import inspect
     CO_MAXBLOCKS = 20
     x = inspect.currentframe()
     ncells = len(x.f_code.co_cellvars)
     nfrees = len(x.f_code.co_freevars)
     extras = x.f_code.co_stacksize + x.f_code.co_nlocals +\
               ncells + nfrees - 1
     check(x, vsize('12P3ic' + CO_MAXBLOCKS*'3i' + 'P' + extras*'P'))
     # function
     def func(): pass
     check(func, size('12P'))
     class c():
         @staticmethod
         def foo():
             pass
         @classmethod
         def bar(cls):
             pass
         # staticmethod
         check(foo, size('PP'))
         # classmethod
         check(bar, size('PP'))
     # generator
     def get_gen(): yield 1
     check(get_gen(), size('Pb2PPP'))
     # iterator
     check(iter('abc'), size('lP'))
     # callable-iterator
     import re
     check(re.finditer('',''), size('2P'))
     # list
     samples = [[], [1,2,3], ['1', '2', '3']]
     for sample in samples:
         check(sample, vsize('Pn') + len(sample)*self.P)
     # sortwrapper (list)
     # XXX
     # cmpwrapper (list)
     # XXX
     # listiterator (list)
     check(iter([]), size('lP'))
     # listreverseiterator (list)
     check(reversed([]), size('nP'))
     # int
     check(0, vsize(''))
     check(1, vsize('') + self.longdigit)
     check(-1, vsize('') + self.longdigit)
     PyLong_BASE = 2**sys.int_info.bits_per_digit
     check(int(PyLong_BASE), vsize('') + 2*self.longdigit)
     check(int(PyLong_BASE**2-1), vsize('') + 2*self.longdigit)
     check(int(PyLong_BASE**2), vsize('') + 3*self.longdigit)
     # module
     check(unittest, size('PnPPP'))
     # None
     check(None, size(''))
     # NotImplementedType
     check(NotImplemented, size(''))
     # object
     check(object(), size(''))
     # property (descriptor object)
     class C(object):
         def getx(self): return self.__x
         def setx(self, value): self.__x = value
         def delx(self): del self.__x
         x = property(getx, setx, delx, "")
         check(x, size('4Pi'))
     # PyCapsule
     # XXX
     # rangeiterator
     check(iter(range(1)), size('4l'))
     # reverse
     check(reversed(''), size('nP'))
     # range
     check(range(1), size('4P'))
     check(range(66000), size('4P'))
     # set
     # frozenset
     PySet_MINSIZE = 8
     samples = [[], range(10), range(50)]
     s = size('3nP' + PySet_MINSIZE*'nP' + '2nP')
     for sample in samples:
         minused = len(sample)
         if minused == 0: tmp = 1
         # the computation of minused is actually a bit more complicated
         # but this suffices for the sizeof test
         minused = minused*2
         newsize = PySet_MINSIZE
         while newsize <= minused:
             newsize = newsize << 1
         if newsize <= 8:
             check(set(sample), s)
             check(frozenset(sample), s)
         else:
             check(set(sample), s + newsize*struct.calcsize('nP'))
             check(frozenset(sample), s + newsize*struct.calcsize('nP'))
     # setiterator
     check(iter(set()), size('P3n'))
     # slice
     check(slice(0), size('3P'))
     # super
     check(super(int), size('3P'))
     # tuple
     check((), vsize(''))
     check((1,2,3), vsize('') + 3*self.P)
     # type
     # static type: PyTypeObject
     s = vsize('P2n15Pl4Pn9Pn11PIP')
     check(int, s)
     # (PyTypeObject + PyAsyncMethods + PyNumberMethods + PyMappingMethods +
     #  PySequenceMethods + PyBufferProcs + 4P)
     s = vsize('P2n17Pl4Pn9Pn11PIP') + struct.calcsize('34P 3P 3P 10P 2P 4P')
     # Separate block for PyDictKeysObject with 4 entries
     s += struct.calcsize("2nPn") + 4*struct.calcsize("n2P")
     # class
     class newstyleclass(object): pass
     check(newstyleclass, s)
     # dict with shared keys
     check(newstyleclass().__dict__, size('n2P' + '2nPn'))
     # unicode
     # each tuple contains a string and its expected character size
     # don't put any static strings here, as they may contain
     # wchar_t or UTF-8 representations
     samples = ['1'*100, '\xff'*50,
                '\u0100'*40, '\uffff'*100,
                '\U00010000'*30, '\U0010ffff'*100]
     asciifields = "nnbP"
     compactfields = asciifields + "nPn"
     unicodefields = compactfields + "P"
     for s in samples:
         maxchar = ord(max(s))
         if maxchar < 128:
             L = size(asciifields) + len(s) + 1
         elif maxchar < 256:
             L = size(compactfields) + len(s) + 1
         elif maxchar < 65536:
             L = size(compactfields) + 2*(len(s) + 1)
         else:
             L = size(compactfields) + 4*(len(s) + 1)
         check(s, L)
     # verify that the UTF-8 size is accounted for
     s = chr(0x4000)   # 4 bytes canonical representation
     check(s, size(compactfields) + 4)
     # compile() will trigger the generation of the UTF-8
     # representation as a side effect
     compile(s, "<stdin>", "eval")
     check(s, size(compactfields) + 4 + 4)
     # TODO: add check that forces the presence of wchar_t representation
     # TODO: add check that forces layout of unicodefields
     # weakref
     import weakref
     check(weakref.ref(int), size('2Pn2P'))
     # weakproxy
     # XXX
     # weakcallableproxy
     check(weakref.proxy(int), size('2Pn2P'))
Example #7
0
 def test_objecttypes(self):
     # check all types defined in Objects/
     h = self.header
     vh = self.vheader
     size = self.calcsize
     check = self.check_sizeof
     # bool
     check(True, size(h + 'l'))
     # buffer
     check(buffer(''), size(h + '2P2Pil'))
     # builtin_function_or_method
     check(len, size(h + '3P'))
     # bytearray
     samples = ['', 'u'*100000]
     for sample in samples:
         x = bytearray(sample)
         check(x, size(vh + 'iPP') + x.__alloc__() * self.c)
     # bytearray_iterator
     check(iter(bytearray()), size(h + 'PP'))
     # cell
     def get_cell():
         x = 42
         def inner():
             return x
         return inner
     check(get_cell().func_closure[0], size(h + 'P'))
     # classobj (old-style class)
     class class_oldstyle():
         def method():
             pass
     check(class_oldstyle, size(h + '6P'))
     # instance (old-style class)
     check(class_oldstyle(), size(h + '3P'))
     # instancemethod (old-style class)
     check(class_oldstyle().method, size(h + '4P'))
     # complex
     check(complex(0,1), size(h + '2d'))
     # code
     check(get_cell().func_code, size(h + '4i8Pi2P'))
     # BaseException
     check(BaseException(), size(h + '3P'))
     # UnicodeEncodeError
     check(UnicodeEncodeError("", u"", 0, 0, ""), size(h + '5P2PP'))
     # UnicodeDecodeError
     check(UnicodeDecodeError("", "", 0, 0, ""), size(h + '5P2PP'))
     # UnicodeTranslateError
     check(UnicodeTranslateError(u"", 0, 1, ""), size(h + '5P2PP'))
     # method_descriptor (descriptor object)
     check(str.lower, size(h + '2PP'))
     # classmethod_descriptor (descriptor object)
     # XXX
     # member_descriptor (descriptor object)
     import datetime
     check(datetime.timedelta.days, size(h + '2PP'))
     # getset_descriptor (descriptor object)
     import __builtin__
     check(__builtin__.file.closed, size(h + '2PP'))
     # wrapper_descriptor (descriptor object)
     check(int.__add__, size(h + '2P2P'))
     # dictproxy
     class C(object): pass
     check(C.__dict__, size(h + 'P'))
     # method-wrapper (descriptor object)
     check({}.__iter__, size(h + '2P'))
     # dict
     check({}, size(h + '3P2P' + 8*'P2P'))
     x = {1:1, 2:2, 3:3, 4:4, 5:5, 6:6, 7:7, 8:8}
     check(x, size(h + '3P2P' + 8*'P2P') + 16*size('P2P'))
     # dictionary-keyiterator
     check({}.iterkeys(), size(h + 'P2PPP'))
     # dictionary-valueiterator
     check({}.itervalues(), size(h + 'P2PPP'))
     # dictionary-itemiterator
     check({}.iteritems(), size(h + 'P2PPP'))
     # ellipses
     check(Ellipsis, size(h + ''))
     # EncodingMap
     import codecs, encodings.iso8859_3
     x = codecs.charmap_build(encodings.iso8859_3.decoding_table)
     check(x, size(h + '32B2iB'))
     # enumerate
     check(enumerate([]), size(h + 'l3P'))
     # file
     check(self.file, size(h + '4P2i4P3i3Pi'))
     # float
     check(float(0), size(h + 'd'))
     # sys.floatinfo
     check(sys.float_info, size(vh) + self.P * len(sys.float_info))
     # frame
     import inspect
     CO_MAXBLOCKS = 20
     x = inspect.currentframe()
     ncells = len(x.f_code.co_cellvars)
     nfrees = len(x.f_code.co_freevars)
     extras = x.f_code.co_stacksize + x.f_code.co_nlocals +\
              ncells + nfrees - 1
     check(x, size(vh + '12P3i' + CO_MAXBLOCKS*'3i' + 'P' + extras*'P'))
     # function
     def func(): pass
     check(func, size(h + '9P'))
     class c():
         @staticmethod
         def foo():
             pass
         @classmethod
         def bar(cls):
             pass
         # staticmethod
         check(foo, size(h + 'P'))
         # classmethod
         check(bar, size(h + 'P'))
     # generator
     def get_gen(): yield 1
     check(get_gen(), size(h + 'Pi2P'))
     # integer
     check(1, size(h + 'l'))
     check(100, size(h + 'l'))
     # iterator
     check(iter('abc'), size(h + 'lP'))
     # callable-iterator
     import re
     check(re.finditer('',''), size(h + '2P'))
     # list
     samples = [[], [1,2,3], ['1', '2', '3']]
     for sample in samples:
         check(sample, size(vh + 'PP') + len(sample)*self.P)
     # sortwrapper (list)
     # XXX
     # cmpwrapper (list)
     # XXX
     # listiterator (list)
     check(iter([]), size(h + 'lP'))
     # listreverseiterator (list)
     check(reversed([]), size(h + 'lP'))
     # long
     check(0L, size(vh + 'H') - self.H)
     check(1L, size(vh + 'H'))
     check(-1L, size(vh + 'H'))
     check(32768L, size(vh + 'H') + self.H)
     check(32768L*32768L-1, size(vh + 'H') + self.H)
     check(32768L*32768L, size(vh + 'H') + 2*self.H)
     # module
     check(unittest, size(h + 'P'))
     # None
     check(None, size(h + ''))
     # object
     check(object(), size(h + ''))
     # property (descriptor object)
     class C(object):
         def getx(self): return self.__x
         def setx(self, value): self.__x = value
         def delx(self): del self.__x
         x = property(getx, setx, delx, "")
         check(x, size(h + '4Pi'))
     # PyCObject
     # XXX
     # rangeiterator
     check(iter(xrange(1)), size(h + '4l'))
     # reverse
     check(reversed(''), size(h + 'PP'))
     # set
     # frozenset
     PySet_MINSIZE = 8
     samples = [[], range(10), range(50)]
     s = size(h + '3P2P' + PySet_MINSIZE*'lP' + 'lP')
     for sample in samples:
         minused = len(sample)
         if minused == 0: tmp = 1
         # the computation of minused is actually a bit more complicated
         # but this suffices for the sizeof test
         minused = minused*2
         newsize = PySet_MINSIZE
         while newsize <= minused:
             newsize = newsize << 1
         if newsize <= 8:
             check(set(sample), s)
             check(frozenset(sample), s)
         else:
             check(set(sample), s + newsize*struct.calcsize('lP'))
             check(frozenset(sample), s + newsize*struct.calcsize('lP'))
     # setiterator
     check(iter(set()), size(h + 'P3P'))
     # slice
     check(slice(1), size(h + '3P'))
     # str
     check('', size(vh + 'lic'))
     check('abc', size(vh + 'lic') + 3*self.c)
     # super
     check(super(int), size(h + '3P'))
     # tuple
     check((), size(vh))
     check((1,2,3), size(vh) + 3*self.P)
     # tupleiterator
     check(iter(()), size(h + 'lP'))
     # type
     # (PyTypeObject + PyNumberMethods +  PyMappingMethods +
     #  PySequenceMethods + PyBufferProcs)
     s = size(vh + 'P2P15Pl4PP9PP11PI') + size('41P 10P 3P 6P')
     class newstyleclass(object):
         pass
     check(newstyleclass, s)
     # builtin type
     check(int, s)
     # NotImplementedType
     import types
     check(types.NotImplementedType, s)
     # unicode
     usize = len(u'\0'.encode('unicode-internal'))
     samples = [u'', u'1'*100]
     # we need to test for both sizes, because we don't know if the string
     # has been cached
     for s in samples:
         check(s, size(h + 'PPlP') + usize * (len(s) + 1))
     # weakref
     import weakref
     check(weakref.ref(int), size(h + '2Pl2P'))
     # weakproxy
     # XXX
     # weakcallableproxy
     check(weakref.proxy(int), size(h + '2Pl2P'))
     # xrange
     check(xrange(1), size(h + '3l'))
     check(xrange(66000), size(h + '3l'))
Example #8
0
def make_sloppy_codec(encoding):
    """
    Take a codec name, and return a 'sloppy' version of that codec that can
    encode and decode the unassigned bytes in that encoding.

    Single-byte encodings in the standard library are defined using some
    boilerplate classes surrounding the functions that do the actual work,
    `codecs.charmap_decode` and `charmap_encode`. This function, given an
    encoding name, *defines* those boilerplate classes.
    """
    # Make an array of all 256 possible bytes.
    all_bytes = bytearray(range(256))

    # Get a list of what they would decode to in Latin-1.
    sloppy_chars = list(all_bytes.decode('latin-1'))

    # Get a list of what they decode to in the given encoding. Use the
    # replacement character for unassigned bytes.
    if PY26:
        decoded_chars = all_bytes.decode(encoding, 'replace')
    else:
        decoded_chars = all_bytes.decode(encoding, errors='replace')

    # Update the sloppy_chars list. Each byte that was successfully decoded
    # gets its decoded value in the list. The unassigned bytes are left as
    # they are, which gives their decoding in Latin-1.
    for i, char in enumerate(decoded_chars):
        if char != REPLACEMENT_CHAR:
            sloppy_chars[i] = char

    # Create the data structures that tell the charmap methods how to encode
    # and decode in this sloppy encoding.
    decoding_table = ''.join(sloppy_chars)
    encoding_table = codecs.charmap_build(decoding_table)

    # Now produce all the class boilerplate. Look at the Python source for
    # `encodings.cp1252` for comparison; this is almost exactly the same,
    # except I made it follow pep8.
    class Codec(codecs.Codec):
        def encode(self, input, errors='strict'):
            return codecs.charmap_encode(input, errors, encoding_table)

        def decode(self, input, errors='strict'):
            return codecs.charmap_decode(input, errors, decoding_table)

    class IncrementalEncoder(codecs.IncrementalEncoder):
        def encode(self, input, final=False):
            return codecs.charmap_encode(input, self.errors, encoding_table)[0]

    class IncrementalDecoder(codecs.IncrementalDecoder):
        def decode(self, input, final=False):
            return codecs.charmap_decode(input, self.errors, decoding_table)[0]

    class StreamWriter(Codec, codecs.StreamWriter):
        pass

    class StreamReader(Codec, codecs.StreamReader):
        pass

    return codecs.CodecInfo(
        name='sloppy-' + encoding,
        encode=Codec().encode,
        decode=Codec().decode,
        incrementalencoder=IncrementalEncoder,
        incrementaldecoder=IncrementalDecoder,
        streamreader=StreamReader,
        streamwriter=StreamWriter,
    )
Example #9
0
File: atarist.py Project: hick/x84
    u'\u03c4'   #  0xE7 -> GREEK SMALL LETTER TAU
    u'\u03a6'   #  0xE8 -> GREEK CAPITAL LETTER PHI
    u'\u0398'   #  0xE9 -> GREEK CAPITAL LETTER THETA
    u'\u03a9'   #  0xEA -> GREEK CAPITAL LETTER OMEGA
    u'\u03b4'   #  0xEB -> GREEK SMALL LETTER DELTA
    u'\u222e'   #  0xEC -> CONTOUR INTEGRAL
    u'\u03c6'   #  0xED -> GREEK SMALL LETTER PHI
    u'\u2208'   #  0xEE -> ELEMENT OF SIGN
    u'\u2229'   #  0xEF -> INTERSECTION
    u'\u2261'   #  0xF0 -> IDENTICAL TO
    u'\xb1'     #  0xF1 -> PLUS-MINUS SIGN
    u'\u2265'   #  0xF2 -> GREATER-THAN OR EQUAL TO
    u'\u2264'   #  0xF3 -> LESS-THAN OR EQUAL TO
    u'\u2320'   #  0xF4 -> TOP HALF INTEGRAL
    u'\u2321'   #  0xF5 -> BOTTOM HALF INTEGRAL
    u'\xf7'     #  0xF6 -> DIVISION SIGN
    u'\u2248'   #  0xF7 -> ALMOST EQUAL TO
    u'\xb0'     #  0xF8 -> DEGREE SIGN
    u'\u2219'   #  0xF9 -> BULLET OPERATOR
    u'\xb7'     #  0xFA -> MIDDLE DOT
    u'\u221a'   #  0xFB -> SQUARE ROOT
    u'\u207f'   #  0xFC -> SUPERSCRIPT LATIN SMALL LETTER N
    u'\xb2'     #  0xFD -> SUPERSCRIPT TWO
    u'\xb3'     #  0xFE -> SUPERSCRIPT THREE
    u'\xaf'     #  0xFF -> MACRON
)

### Encoding table
ENCODING_TABLE = codecs.charmap_build(DECODING_TABLE)

Example #10
0
 def test_charmap_build(self):
     import codecs
     assert codecs.charmap_build(u'123456') == {49: 0, 50: 1, 51: 2,
                                                52: 3, 53: 4, 54: 5}
Example #11
0
File: test_sys.py Project: d11/rts
    def test_objecttypes(self):
        # check all types defined in Objects/
        h = self.header
        vh = self.vheader
        size = self.calcsize
        check = self.check_sizeof
        # bool
        check(True, size(vh) + self.longdigit)
        # buffer
        # XXX
        # builtin_function_or_method
        check(len, size(h + '3P'))
        # bytearray
        samples = [b'', b'u'*100000]
        for sample in samples:
            x = bytearray(sample)
            check(x, size(vh + 'iPP') + x.__alloc__() * self.c)
        # bytearray_iterator
        check(iter(bytearray()), size(h + 'PP'))
        # cell
        def get_cell():
            x = 42
            def inner():
                return x
            return inner
        check(get_cell().__closure__[0], size(h + 'P'))
        # code
        check(get_cell().__code__, size(h + '5i8Pi3P'))
        # complex
        check(complex(0,1), size(h + '2d'))
        # method_descriptor (descriptor object)
        check(str.lower, size(h + '2PP'))
        # classmethod_descriptor (descriptor object)
        # XXX
        # member_descriptor (descriptor object)
        import datetime
        check(datetime.timedelta.days, size(h + '2PP'))
        # getset_descriptor (descriptor object)
        import collections
        check(collections.defaultdict.default_factory, size(h + '2PP'))
        # wrapper_descriptor (descriptor object)
        try:
            import stackless
            slxtra = 'i'
        except:
            slxtra = ''
        check(int.__add__, size(h + '2P2P' + slxtra))
        # method-wrapper (descriptor object)
        check({}.__iter__, size(h + '2P'))
        # dict
        check({}, size(h + '3P2P' + 8*'P2P'))
        longdict = {1:1, 2:2, 3:3, 4:4, 5:5, 6:6, 7:7, 8:8}
        check(longdict, size(h + '3P2P' + 8*'P2P') + 16*size('P2P'))
        # dictionary-keyiterator
        check({}.keys(), size(h + 'P'))
        # dictionary-valueiterator
        check({}.values(), size(h + 'P'))
        # dictionary-itemiterator
        check({}.items(), size(h + 'P'))
        # dictproxy
        class C(object): pass
        check(C.__dict__, size(h + 'P'))
        # BaseException
        check(BaseException(), size(h + '5P'))
        # UnicodeEncodeError
        check(UnicodeEncodeError("", "", 0, 0, ""), size(h + '5P 2P2PP'))
        # UnicodeDecodeError
        # XXX
#        check(UnicodeDecodeError("", "", 0, 0, ""), size(h + '5P2PP'))
        # UnicodeTranslateError
        check(UnicodeTranslateError("", 0, 1, ""), size(h + '5P 2P2PP'))
        # ellipses
        check(Ellipsis, size(h + ''))
        # EncodingMap
        import codecs, encodings.iso8859_3
        x = codecs.charmap_build(encodings.iso8859_3.decoding_table)
        check(x, size(h + '32B2iB'))
        # enumerate
        check(enumerate([]), size(h + 'l3P'))
        # reverse
        check(reversed(''), size(h + 'PP'))
        # float
        check(float(0), size(h + 'd'))
        # sys.floatinfo
        check(sys.float_info, size(vh) + self.P * len(sys.float_info))
        # frame
        import inspect
        CO_MAXBLOCKS = 20
        x = inspect.currentframe()
        ncells = len(x.f_code.co_cellvars)
        nfrees = len(x.f_code.co_freevars)
        extras = x.f_code.co_stacksize + x.f_code.co_nlocals +\
                  ncells + nfrees - 1
        try:
            import stackless
            slextra = 'P'
        except:
            slextra = ''
        check(x, size(vh + '12P3i' + CO_MAXBLOCKS*'3i' + slextra + 'P' + extras*'P'))
        # function
        def func(): pass
        check(func, size(h + '11P'))
        class c():
            @staticmethod
            def foo():
                pass
            @classmethod
            def bar(cls):
                pass
            # staticmethod
            check(foo, size(h + 'P'))
            # classmethod
            check(bar, size(h + 'P'))
        # generator
        def get_gen(): yield 1
        check(get_gen(), size(h + 'Pi2P'))
        # iterator
        check(iter('abc'), size(h + 'lP'))
        # callable-iterator
        import re
        check(re.finditer('',''), size(h + '2P'))
        # list
        samples = [[], [1,2,3], ['1', '2', '3']]
        for sample in samples:
            check(sample, size(vh + 'PP') + len(sample)*self.P)
        # sortwrapper (list)
        # XXX
        # cmpwrapper (list)
        # XXX
        # listiterator (list)
        check(iter([]), size(h + 'lP'))
        # listreverseiterator (list)
        check(reversed([]), size(h + 'lP'))
        # long
        check(0, size(vh))
        check(1, size(vh) + self.longdigit)
        check(-1, size(vh) + self.longdigit)
        PyLong_BASE = 2**sys.int_info.bits_per_digit
        check(int(PyLong_BASE), size(vh) + 2*self.longdigit)
        check(int(PyLong_BASE**2-1), size(vh) + 2*self.longdigit)
        check(int(PyLong_BASE**2), size(vh) + 3*self.longdigit)
        # memory
        check(memoryview(b''), size(h + 'PP2P2i7P'))
        # module
        check(unittest, size(h + '3P'))
        # None
        check(None, size(h + ''))
        # NotImplementedType
        check(NotImplemented, size(h))
        # object
        check(object(), size(h + ''))
        # property (descriptor object)
        class C(object):
            def getx(self): return self.__x
            def setx(self, value): self.__x = value
            def delx(self): del self.__x
            x = property(getx, setx, delx, "")
            check(x, size(h + '4Pi'))
        # PyCapsule
        # XXX
        # rangeiterator
        check(iter(range(1)), size(h + '4l'))
        # reverse
        check(reversed(''), size(h + 'PP'))
        # range
        check(range(1), size(h + '4P'))
        check(range(66000), size(h + '4P'))
        # set
        # frozenset
        PySet_MINSIZE = 8
        samples = [[], range(10), range(50)]
        s = size(h + '3P2P' + PySet_MINSIZE*'lP' + 'lP')
        for sample in samples:
            minused = len(sample)
            if minused == 0: tmp = 1
            # the computation of minused is actually a bit more complicated
            # but this suffices for the sizeof test
            minused = minused*2
            newsize = PySet_MINSIZE
            while newsize <= minused:
                newsize = newsize << 1
            if newsize <= 8:
                check(set(sample), s)
                check(frozenset(sample), s)
            else:
                check(set(sample), s + newsize*struct.calcsize('lP'))
                check(frozenset(sample), s + newsize*struct.calcsize('lP'))
        # setiterator
        check(iter(set()), size(h + 'P3P'))
        # slice
        check(slice(0), size(h + '3P'))
        # super
        check(super(int), size(h + '3P'))
        # tuple
        check((), size(vh))
        check((1,2,3), size(vh) + 3*self.P)
        # type
        # (PyTypeObject + PyNumberMethods + PyMappingMethods +
        #  PySequenceMethods + PyBufferProcs)
        s = size(vh + 'P2P15Pl4PP9PP11PI') + size('16Pi17P 3P 10P 2P 2P')
        try:
            import stackless
            # The number of byte entries in the generated 'slp_methodflags'.
            stacklessSize = 71
            # Make it a multiple of two.
            stacklessSize = stacklessSize + stacklessSize % 2
            s += stacklessSize
        except:
            pass

        check(int, s)
        # class
        class newstyleclass(object): pass
        check(newstyleclass, s)
        # unicode
        usize = len('\0'.encode('unicode-internal'))
        samples = ['', '1'*100]
        # we need to test for both sizes, because we don't know if the string
        # has been cached
        for s in samples:
            basicsize =  size(h + 'PPPiP') + usize * (len(s) + 1)
            check(s, basicsize)
        # weakref
        import weakref
        check(weakref.ref(int), size(h + '2Pl2P'))
        # weakproxy
        # XXX
        # weakcallableproxy
        check(weakref.proxy(int), size(h + '2Pl2P'))
Example #12
0
 def test_charmap_build(self):
     decodemap = ''.join([unichr(i).upper() if chr(i).islower() else unichr(i).lower() for i in xrange(256)])
     encodemap = codecs.charmap_build(decodemap)
     self.assertEqual(codecs.charmap_decode(u'Hello World', 'strict', decodemap), ('hELLO wORLD', 11))
     self.assertEqual(codecs.charmap_encode(u'Hello World', 'strict', encodemap), ('hELLO wORLD', 11))
    u'\u0424'  #  0xE6 -> CYRILLIC CAPITAL LETTER EF
    u'\u0413'  #  0xE7 -> CYRILLIC CAPITAL LETTER GHE
    u'\u0425'  #  0xE8 -> CYRILLIC CAPITAL LETTER HA
    u'\u0418'  #  0xE9 -> CYRILLIC CAPITAL LETTER I
    u'\u0419'  #  0xEA -> CYRILLIC CAPITAL LETTER SHORT I
    u'\u041a'  #  0xEB -> CYRILLIC CAPITAL LETTER KA
    u'\u041b'  #  0xEC -> CYRILLIC CAPITAL LETTER EL
    u'\u041c'  #  0xED -> CYRILLIC CAPITAL LETTER EM
    u'\u041d'  #  0xEE -> CYRILLIC CAPITAL LETTER EN
    u'\u041e'  #  0xEF -> CYRILLIC CAPITAL LETTER O
    u'\u041f'  #  0xF0 -> CYRILLIC CAPITAL LETTER PE
    u'\u042f'  #  0xF1 -> CYRILLIC CAPITAL LETTER YA
    u'\u0420'  #  0xF2 -> CYRILLIC CAPITAL LETTER ER
    u'\u0421'  #  0xF3 -> CYRILLIC CAPITAL LETTER ES
    u'\u0422'  #  0xF4 -> CYRILLIC CAPITAL LETTER TE
    u'\u0423'  #  0xF5 -> CYRILLIC CAPITAL LETTER U
    u'\u0416'  #  0xF6 -> CYRILLIC CAPITAL LETTER ZHE
    u'\u0412'  #  0xF7 -> CYRILLIC CAPITAL LETTER VE
    u'\u042c'  #  0xF8 -> CYRILLIC CAPITAL LETTER SOFT SIGN
    u'\u042b'  #  0xF9 -> CYRILLIC CAPITAL LETTER YERU
    u'\u0417'  #  0xFA -> CYRILLIC CAPITAL LETTER ZE
    u'\u0428'  #  0xFB -> CYRILLIC CAPITAL LETTER SHA
    u'\u042d'  #  0xFC -> CYRILLIC CAPITAL LETTER E
    u'\u0429'  #  0xFD -> CYRILLIC CAPITAL LETTER SHCHA
    u'\u0427'  #  0xFE -> CYRILLIC CAPITAL LETTER CHE
    u'\u042a'  #  0xFF -> CYRILLIC CAPITAL LETTER HARD SIGN
)

### Encoding table
encoding_table = codecs.charmap_build(decoding_table)
Example #14
0
    def test_objecttypes(self):
        # check all types defined in Objects/
        size = test.support.calcobjsize
        vsize = test.support.calcvobjsize
        check = self.check_sizeof
        # bool
        check(True, vsize('') + self.longdigit)
        # buffer
        # XXX
        # builtin_function_or_method
        check(len, size('3P'))  # XXX check layout
        # bytearray
        samples = [b'', b'u' * 100000]
        for sample in samples:
            x = bytearray(sample)
            check(x, vsize('n2Pi') + x.__alloc__())
        # bytearray_iterator
        check(iter(bytearray()), size('nP'))

        # cell
        def get_cell():
            x = 42

            def inner():
                return x

            return inner

        check(get_cell().__closure__[0], size('P'))
        # code
        check(get_cell().__code__, size('5i9Pi3P'))
        check(get_cell.__code__, size('5i9Pi3P'))

        def get_cell2(x):
            def inner():
                return x

            return inner

        check(get_cell2.__code__, size('5i9Pi3P') + 1)
        # complex
        check(complex(0, 1), size('2d'))
        # method_descriptor (descriptor object)
        check(str.lower, size('3PP'))
        # classmethod_descriptor (descriptor object)
        # XXX
        # member_descriptor (descriptor object)
        import datetime
        check(datetime.timedelta.days, size('3PP'))
        # getset_descriptor (descriptor object)
        import collections
        check(collections.defaultdict.default_factory, size('3PP'))
        # wrapper_descriptor (descriptor object)
        check(int.__add__, size('3P2P'))
        # method-wrapper (descriptor object)
        check({}.__iter__, size('2P'))
        # dict
        check({}, size('n2P' + '2nPn' + 8 * 'n2P'))
        longdict = {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8}
        check(longdict, size('n2P' + '2nPn') + 16 * struct.calcsize('n2P'))
        # dictionary-keyiterator
        check({}.keys(), size('P'))
        # dictionary-valueiterator
        check({}.values(), size('P'))
        # dictionary-itemiterator
        check({}.items(), size('P'))
        # dictionary iterator
        check(iter({}), size('P2nPn'))

        # dictproxy
        class C(object):
            pass

        check(C.__dict__, size('P'))
        # BaseException
        check(BaseException(), size('5Pb'))
        # UnicodeEncodeError
        check(UnicodeEncodeError("", "", 0, 0, ""), size('5Pb 2P2nP'))
        # UnicodeDecodeError
        check(UnicodeDecodeError("", b"", 0, 0, ""), size('5Pb 2P2nP'))
        # UnicodeTranslateError
        check(UnicodeTranslateError("", 0, 1, ""), size('5Pb 2P2nP'))
        # ellipses
        check(Ellipsis, size(''))
        # EncodingMap
        import codecs, encodings.iso8859_3
        x = codecs.charmap_build(encodings.iso8859_3.decoding_table)
        check(x, size('32B2iB'))
        # enumerate
        check(enumerate([]), size('n3P'))
        # reverse
        check(reversed(''), size('nP'))
        # float
        check(float(0), size('d'))
        # sys.floatinfo
        check(sys.float_info, vsize('') + self.P * len(sys.float_info))
        # frame
        import inspect
        CO_MAXBLOCKS = 20
        x = inspect.currentframe()
        ncells = len(x.f_code.co_cellvars)
        nfrees = len(x.f_code.co_freevars)
        extras = x.f_code.co_stacksize + x.f_code.co_nlocals +\
                  ncells + nfrees - 1
        check(x, vsize('13P3ic' + CO_MAXBLOCKS * '3i' + 'P' + extras * 'P'))

        # function
        def func():
            pass

        check(func, size('12P'))

        class c():
            @staticmethod
            def foo():
                pass

            @classmethod
            def bar(cls):
                pass

            # staticmethod
            check(foo, size('PP'))
            # classmethod
            check(bar, size('PP'))

        # generator
        def get_gen():
            yield 1

        check(get_gen(), size('Pb2P'))
        # iterator
        check(iter('abc'), size('lP'))
        # callable-iterator
        import re
        check(re.finditer('', ''), size('2P'))
        # list
        samples = [[], [1, 2, 3], ['1', '2', '3']]
        for sample in samples:
            check(sample, vsize('Pn') + len(sample) * self.P)
        # sortwrapper (list)
        # XXX
        # cmpwrapper (list)
        # XXX
        # listiterator (list)
        check(iter([]), size('lP'))
        # listreverseiterator (list)
        check(reversed([]), size('nP'))
        # int
        check(0, vsize(''))
        check(1, vsize('') + self.longdigit)
        check(-1, vsize('') + self.longdigit)
        PyLong_BASE = 2**sys.int_info.bits_per_digit
        check(int(PyLong_BASE), vsize('') + 2 * self.longdigit)
        check(int(PyLong_BASE**2 - 1), vsize('') + 2 * self.longdigit)
        check(int(PyLong_BASE**2), vsize('') + 3 * self.longdigit)
        # memoryview
        check(memoryview(b''), size('Pnin 2P2n2i5P 3cPn'))
        # module
        check(unittest, size('PnPPP'))
        # None
        check(None, size(''))
        # NotImplementedType
        check(NotImplemented, size(''))
        # object
        check(object(), size(''))

        # property (descriptor object)
        class C(object):
            def getx(self):
                return self.__x

            def setx(self, value):
                self.__x = value

            def delx(self):
                del self.__x

            x = property(getx, setx, delx, "")
            check(x, size('4Pi'))

        # PyCapsule
        # XXX
        # rangeiterator
        check(iter(range(1)), size('4l'))
        # reverse
        check(reversed(''), size('nP'))
        # range
        check(range(1), size('4P'))
        check(range(66000), size('4P'))
        # set
        # frozenset
        PySet_MINSIZE = 8
        samples = [[], range(10), range(50)]
        s = size('3n2P' + PySet_MINSIZE * 'nP' + 'nP')
        for sample in samples:
            minused = len(sample)
            if minused == 0: tmp = 1
            # the computation of minused is actually a bit more complicated
            # but this suffices for the sizeof test
            minused = minused * 2
            newsize = PySet_MINSIZE
            while newsize <= minused:
                newsize = newsize << 1
            if newsize <= 8:
                check(set(sample), s)
                check(frozenset(sample), s)
            else:
                check(set(sample), s + newsize * struct.calcsize('nP'))
                check(frozenset(sample), s + newsize * struct.calcsize('nP'))
        # setiterator
        check(iter(set()), size('P3n'))
        # slice
        check(slice(0), size('3P'))
        # super
        check(super(int), size('3P'))
        # tuple
        check((), vsize(''))
        check((1, 2, 3), vsize('') + 3 * self.P)
        # type
        # static type: PyTypeObject
        s = vsize('P2n15Pl4Pn9Pn11PIP')
        check(int, s)
        # (PyTypeObject + PyNumberMethods + PyMappingMethods +
        #  PySequenceMethods + PyBufferProcs + 4P)
        s = vsize('P2n15Pl4Pn9Pn11PIP') + struct.calcsize('34P 3P 10P 2P 4P')
        # Separate block for PyDictKeysObject with 4 entries
        s += struct.calcsize("2nPn") + 4 * struct.calcsize("n2P")

        # class
        class newstyleclass(object):
            pass

        check(newstyleclass, s)
        # dict with shared keys
        check(newstyleclass().__dict__, size('n2P' + '2nPn'))
        # unicode
        # each tuple contains a string and its expected character size
        # don't put any static strings here, as they may contain
        # wchar_t or UTF-8 representations
        samples = [
            '1' * 100, '\xff' * 50, '\u0100' * 40, '\uffff' * 100,
            '\U00010000' * 30, '\U0010ffff' * 100
        ]
        asciifields = "nnbP"
        compactfields = asciifields + "nPn"
        unicodefields = compactfields + "P"
        for s in samples:
            maxchar = ord(max(s))
            if maxchar < 128:
                L = size(asciifields) + len(s) + 1
            elif maxchar < 256:
                L = size(compactfields) + len(s) + 1
            elif maxchar < 65536:
                L = size(compactfields) + 2 * (len(s) + 1)
            else:
                L = size(compactfields) + 4 * (len(s) + 1)
            check(s, L)
        # verify that the UTF-8 size is accounted for
        s = chr(0x4000)  # 4 bytes canonical representation
        check(s, size(compactfields) + 4)
        # compile() will trigger the generation of the UTF-8
        # representation as a side effect
        compile(s, "<stdin>", "eval")
        check(s, size(compactfields) + 4 + 4)
        # TODO: add check that forces the presence of wchar_t representation
        # TODO: add check that forces layout of unicodefields
        # weakref
        import weakref
        check(weakref.ref(int), size('2Pn2P'))
        # weakproxy
        # XXX
        # weakcallableproxy
        check(weakref.proxy(int), size('2Pn2P'))
Example #15
0
    def test_objecttypes(self):
        # check all types defined in Objects/
        size = test.test_support.calcobjsize
        vsize = test.test_support.calcvobjsize
        check = self.check_sizeof
        # bool
        check(True, size('l'))
        # buffer
        with test.test_support.check_py3k_warnings():
            check(buffer(''), size('2P2Pil'))
        # builtin_function_or_method
        check(len, size('3P'))
        # bytearray
        samples = ['', 'u' * 100000]
        for sample in samples:
            x = bytearray(sample)
            check(x, vsize('iPP') + x.__alloc__())
        # bytearray_iterator
        check(iter(bytearray()), size('PP'))

        # cell
        def get_cell():
            x = 42

            def inner():
                return x

            return inner

        check(get_cell().func_closure[0], size('P'))

        # classobj (old-style class)
        class class_oldstyle():
            def method():
                pass

        check(class_oldstyle, size('7P'))
        # instance (old-style class)
        check(class_oldstyle(), size('3P'))
        # instancemethod (old-style class)
        check(class_oldstyle().method, size('4P'))
        # complex
        check(complex(0, 1), size('2d'))
        # code
        check(get_cell().func_code, size('4i8Pi3P'))
        # BaseException
        check(BaseException(), size('3P'))
        # UnicodeEncodeError
        check(UnicodeEncodeError("", u"", 0, 0, ""), size('5P2PP'))
        # UnicodeDecodeError
        check(UnicodeDecodeError("", "", 0, 0, ""), size('5P2PP'))
        # UnicodeTranslateError
        check(UnicodeTranslateError(u"", 0, 1, ""), size('5P2PP'))
        # method_descriptor (descriptor object)
        check(str.lower, size('2PP'))
        # classmethod_descriptor (descriptor object)
        # XXX
        # member_descriptor (descriptor object)
        import datetime
        check(datetime.timedelta.days, size('2PP'))
        # getset_descriptor (descriptor object)
        import __builtin__
        check(__builtin__.file.closed, size('2PP'))
        # wrapper_descriptor (descriptor object)
        check(int.__add__, size('2P2P'))

        # dictproxy
        class C(object):
            pass

        check(C.__dict__, size('P'))
        # method-wrapper (descriptor object)
        check({}.__iter__, size('2P'))
        # dict
        check({}, size('3P2P' + 8 * 'P2P'))
        x = {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8}
        check(x, size('3P2P' + 8 * 'P2P') + 16 * struct.calcsize('P2P'))
        # dictionary-keyiterator
        check({}.iterkeys(), size('P2PPP'))
        # dictionary-valueiterator
        check({}.itervalues(), size('P2PPP'))
        # dictionary-itemiterator
        check({}.iteritems(), size('P2PPP'))
        # ellipses
        check(Ellipsis, size(''))
        # EncodingMap
        import codecs, encodings.iso8859_3
        x = codecs.charmap_build(encodings.iso8859_3.decoding_table)
        check(x, size('32B2iB'))
        # enumerate
        check(enumerate([]), size('l3P'))
        # file
        check(self.file, size('4P2i4P3i3P3i'))
        # float
        check(float(0), size('d'))
        # sys.floatinfo
        check(sys.float_info, vsize('') + self.P * len(sys.float_info))
        # frame
        import inspect
        CO_MAXBLOCKS = 20
        x = inspect.currentframe()
        ncells = len(x.f_code.co_cellvars)
        nfrees = len(x.f_code.co_freevars)
        extras = x.f_code.co_stacksize + x.f_code.co_nlocals +\
                 ncells + nfrees - 1
        check(x, vsize('12P3i' + CO_MAXBLOCKS * '3i' + 'P' + extras * 'P'))

        # function
        def func():
            pass

        check(func, size('9P'))

        class c():
            @staticmethod
            def foo():
                pass

            @classmethod
            def bar(cls):
                pass

            # staticmethod
            check(foo, size('P'))
            # classmethod
            check(bar, size('P'))

        # generator
        def get_gen():
            yield 1

        check(get_gen(), size('Pi2P'))
        # integer
        check(1, size('l'))
        check(100, size('l'))
        # iterator
        check(iter('abc'), size('lP'))
        # callable-iterator
        import re
        check(re.finditer('', ''), size('2P'))
        # list
        samples = [[], [1, 2, 3], ['1', '2', '3']]
        for sample in samples:
            check(sample, vsize('PP') + len(sample) * self.P)
        # sortwrapper (list)
        # XXX
        # cmpwrapper (list)
        # XXX
        # listiterator (list)
        check(iter([]), size('lP'))
        # listreverseiterator (list)
        check(reversed([]), size('lP'))
        # long
        check(0L, vsize(''))
        check(1L, vsize('') + self.longdigit)
        check(-1L, vsize('') + self.longdigit)
        PyLong_BASE = 2**sys.long_info.bits_per_digit
        check(long(PyLong_BASE), vsize('') + 2 * self.longdigit)
        check(long(PyLong_BASE**2 - 1), vsize('') + 2 * self.longdigit)
        check(long(PyLong_BASE**2), vsize('') + 3 * self.longdigit)
        # module
        check(unittest, size('P'))
        # None
        check(None, size(''))
        # object
        check(object(), size(''))

        # property (descriptor object)
        class C(object):
            def getx(self):
                return self.__x

            def setx(self, value):
                self.__x = value

            def delx(self):
                del self.__x

            x = property(getx, setx, delx, "")
            check(x, size('4Pi'))

        # PyCObject
        # PyCapsule
        # XXX
        # rangeiterator
        check(iter(xrange(1)), size('4l'))
        # reverse
        check(reversed(''), size('PP'))
        # set
        # frozenset
        PySet_MINSIZE = 8
        samples = [[], range(10), range(50)]
        s = size('3P2P' + PySet_MINSIZE * 'lP' + 'lP')
        for sample in samples:
            minused = len(sample)
            if minused == 0: tmp = 1
            # the computation of minused is actually a bit more complicated
            # but this suffices for the sizeof test
            minused = minused * 2
            newsize = PySet_MINSIZE
            while newsize <= minused:
                newsize = newsize << 1
            if newsize <= 8:
                check(set(sample), s)
                check(frozenset(sample), s)
            else:
                check(set(sample), s + newsize * struct.calcsize('lP'))
                check(frozenset(sample), s + newsize * struct.calcsize('lP'))
        # setiterator
        check(iter(set()), size('P3P'))
        # slice
        check(slice(1), size('3P'))
        # str
        vh = test.test_support._vheader
        check('', struct.calcsize(vh + 'lic'))
        check('abc', struct.calcsize(vh + 'lic') + 3)
        # super
        check(super(int), size('3P'))
        # tuple
        check((), vsize(''))
        check((1, 2, 3), vsize('') + 3 * self.P)
        # tupleiterator
        check(iter(()), size('lP'))
        # type
        # (PyTypeObject + PyNumberMethods +  PyMappingMethods +
        #  PySequenceMethods + PyBufferProcs)
        s = vsize('P2P15Pl4PP9PP11PI') + struct.calcsize('41P 10P 3P 6P')

        class newstyleclass(object):
            pass

        check(newstyleclass, s)
        # builtin type
        check(int, s)
        # NotImplementedType
        import types
        check(types.NotImplementedType, s)
        # unicode
        usize = len(u'\0'.encode('unicode-internal'))
        samples = [u'', u'1' * 100]
        # we need to test for both sizes, because we don't know if the string
        # has been cached
        for s in samples:
            check(s, size('PPlP') + usize * (len(s) + 1))
        # weakref
        import weakref
        check(weakref.ref(int), size('2Pl2P'))
        # weakproxy
        # XXX
        # weakcallableproxy
        check(weakref.proxy(int), size('2Pl2P'))
        # xrange
        check(xrange(1), size('3l'))
        check(xrange(66000), size('3l'))