def _fixcp1252(envkey=None): """ Fixup cp1252 codec in order to use it as a real superset of latin-1 :Parameters: - `envkey`: The environment key to lookup. If this key is set and ``1`` the charset definition won't be fixed and this function is a no-op. If unset or ``None``, no lookup is made. :Types: - `envkey`: ``str`` """ import os if envkey is not None and os.environ.get(envkey) == '1': return import codecs from encodings import cp1252 try: dmap = cp1252.decoding_map # pylint: disable = E1101 except AttributeError: dtable = list(cp1252.decoding_table) codepoint = 0 try: while True: codepoint = dtable.index(u'\ufffe', codepoint) dtable[codepoint] = unichr(codepoint) except ValueError: # no more undefined points there pass dtable = u''.join(dtable) cp1252.decoding_table = dtable cp1252.encoding_table = codecs.charmap_build(dtable) else: # Python 2.4 for key, value in dmap.iteritems(): if value is None: dmap[key] = key cp1252.encoding_map = codecs.make_encoding_map(dmap)
def _restricted_codec(name, chars): INVALID = u'\uFFFE' decoding_table = (u''.join([INVALID for i in xrange(0x00, 0x20)]) + u''.join([unichr(i) if (chars is None or chr(i) in chars) else INVALID for i in xrange(0x20, 0x7F)]) + u''.join([INVALID for i in xrange(0x7F, 0x100)])) encoding_table = codecs.charmap_build(decoding_table) _charmap_codec(name, decoding_table, encoding_table)
def _teletex_codec(name): # this is actually ISO register entries 6 and 156, a (mostly) superset # of entries 102 and 103, as allowed by ITU-T rec X.680 decoding_table = (u''.join([unichr(i) for i in xrange(0xA0)]) + u'\u00A0\u00A1\u00A2\u00A3\uFFFE\u00A5\uFFFE\u00A7' u'\u00A4\u2018\u201C\u00AB\u2190\u2191\u2192\u2193' u'\u00B0\u00B1\u00B2\u00B3\u00D7\u00B5\u00B6\u00B7' u'\u00F7\u2019\u201D\u00BB\u00BC\u00BD\u00BE\u00BF' u'\uFFFE\u0300\u0301\u0302\u0303\u0304\u0306\u0307' u'\u0308\uFFFE\u030A\u0327\u0332\u030B\u0328\u030C' u'\u2015\u00B9\u00AE\u00A9\u2122\u266a\u00AC\u00A6' u'\uFFFE\uFFFE\uFFFE\uFFFE\u215B\u215C\u215D\u215E' u'\u2126\u00C6\u00D0\u00AA\u0126\uFFFE\u0132\u013F' u'\u0141\u00D8\u0152\u00BA\u00DE\u0166\u014A\u0149' u'\u0138\u00E6\u0111\u00F0\u0127\u0131\u0133\u0140' u'\u0142\u00F8\u0153\u00DF\u00FE\u0167\u014B\u00AD') encoding_table = codecs.charmap_build(decoding_table) _charmap_codec(name, decoding_table, encoding_table)
class IncrementalEncoder(codecs.IncrementalEncoder): def encode(self, input, final = False): return codecs.charmap_encode(input, self.errors, encoding_table)[0] class IncrementalDecoder(codecs.IncrementalDecoder): def decode(self, input, final = False): return codecs.charmap_decode(input, self.errors, decoding_table)[0] class StreamWriter(Codec, codecs.StreamWriter): pass class StreamReader(Codec, codecs.StreamReader): pass def getregentry(): return codecs.CodecInfo(name='cp1250', encode=Codec().encode, decode=Codec().decode, incrementalencoder=IncrementalEncoder, incrementaldecoder=IncrementalDecoder, streamreader=StreamReader, streamwriter=StreamWriter) decoding_table = u'\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7f\u20ac\ufffe\u201a\ufffe\u201e\u2026\u2020\u2021\ufffe\u2030\u0160\u2039\u015a\u0164\u017d\u0179\ufffe\u2018\u2019\u201c\u201d\u2022\u2013\u2014\ufffe\u2122\u0161\u203a\u015b\u0165\u017e\u017a\xa0\u02c7\u02d8\u0141\xa4\u0104\xa6\xa7\xa8\xa9\u015e\xab\xac\xad\xae\u017b\xb0\xb1\u02db\u0142\xb4\xb5\xb6\xb7\xb8\u0105\u015f\xbb\u013d\u02dd\u013e\u017c\u0154\xc1\xc2\u0102\xc4\u0139\u0106\xc7\u010c\xc9\u0118\xcb\u011a\xcd\xce\u010e\u0110\u0143\u0147\xd3\xd4\u0150\xd6\xd7\u0158\u016e\xda\u0170\xdc\xdd\u0162\xdf\u0155\xe1\xe2\u0103\xe4\u013a\u0107\xe7\u010d\xe9\u0119\xeb\u011b\xed\xee\u010f\u0111\u0144\u0148\xf3\xf4\u0151\xf6\xf7\u0159\u016f\xfa\u0171\xfc\xfd\u0163\u02d9' encoding_table = codecs.charmap_build(decoding_table) # okay decompyling c:\Users\PC\wotsources\files\originals\res_bw\scripts\common\lib\encodings\cp1250.pyc # decompiled 1 files: 1 okay, 0 failed, 0 verify failed # 2016.08.04 19:59:12 Støední Evropa (letní èas)
def test_objecttypes(self): # check all types defined in Objects/ size = test.test_support.calcobjsize vsize = test.test_support.calcvobjsize check = self.check_sizeof # bool check(True, size("l")) # buffer with test.test_support.check_py3k_warnings(): check(buffer(""), size("2P2Pil")) # builtin_function_or_method check(len, size("3P")) # bytearray samples = ["", "u" * 100000] for sample in samples: x = bytearray(sample) check(x, vsize("iPP") + x.__alloc__()) # bytearray_iterator check(iter(bytearray()), size("PP")) # cell def get_cell(): x = 42 def inner(): return x return inner check(get_cell().func_closure[0], size("P")) # classobj (old-style class) class class_oldstyle: def method(): pass check(class_oldstyle, size("7P")) # instance (old-style class) check(class_oldstyle(), size("3P")) # instancemethod (old-style class) check(class_oldstyle().method, size("4P")) # complex check(complex(0, 1), size("2d")) # code check(get_cell().func_code, size("4i8Pi3P")) # BaseException check(BaseException(), size("3P")) # UnicodeEncodeError check(UnicodeEncodeError("", u"", 0, 0, ""), size("5P2PP")) # UnicodeDecodeError check(UnicodeDecodeError("", "", 0, 0, ""), size("5P2PP")) # UnicodeTranslateError check(UnicodeTranslateError(u"", 0, 1, ""), size("5P2PP")) # method_descriptor (descriptor object) check(str.lower, size("2PP")) # classmethod_descriptor (descriptor object) # XXX # member_descriptor (descriptor object) import datetime check(datetime.timedelta.days, size("2PP")) # getset_descriptor (descriptor object) import __builtin__ check(__builtin__.file.closed, size("2PP")) # wrapper_descriptor (descriptor object) check(int.__add__, size("2P2P")) # dictproxy class C(object): pass check(C.__dict__, size("P")) # method-wrapper (descriptor object) check({}.__iter__, size("2P")) # dict check({}, size("3P2P" + 8 * "P2P")) x = {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8} check(x, size("3P2P" + 8 * "P2P") + 16 * struct.calcsize("P2P")) # dictionary-keyiterator check({}.iterkeys(), size("P2PPP")) # dictionary-valueiterator check({}.itervalues(), size("P2PPP")) # dictionary-itemiterator check({}.iteritems(), size("P2PPP")) # ellipses check(Ellipsis, size("")) # EncodingMap import codecs, encodings.iso8859_3 x = codecs.charmap_build(encodings.iso8859_3.decoding_table) check(x, size("32B2iB")) # enumerate check(enumerate([]), size("l3P")) # file check(self.file, size("4P2i4P3i3P3i")) # float check(float(0), size("d")) # sys.floatinfo check(sys.float_info, vsize("") + self.P * len(sys.float_info)) # frame import inspect CO_MAXBLOCKS = 20 x = inspect.currentframe() ncells = len(x.f_code.co_cellvars) nfrees = len(x.f_code.co_freevars) extras = x.f_code.co_stacksize + x.f_code.co_nlocals + ncells + nfrees - 1 check(x, vsize("12P3i" + CO_MAXBLOCKS * "3i" + "P" + extras * "P")) # function def func(): pass check(func, size("9P")) class c: @staticmethod def foo(): pass @classmethod def bar(cls): pass # staticmethod check(foo, size("P")) # classmethod check(bar, size("P")) # generator def get_gen(): yield 1 check(get_gen(), size("Pi2P")) # integer check(1, size("l")) check(100, size("l")) # iterator check(iter("abc"), size("lP")) # callable-iterator import re check(re.finditer("", ""), size("2P")) # list samples = [[], [1, 2, 3], ["1", "2", "3"]] for sample in samples: check(sample, vsize("PP") + len(sample) * self.P) # sortwrapper (list) # XXX # cmpwrapper (list) # XXX # listiterator (list) check(iter([]), size("lP")) # listreverseiterator (list) check(reversed([]), size("lP")) # long check(0L, vsize("")) check(1L, vsize("") + self.longdigit) check(-1L, vsize("") + self.longdigit) PyLong_BASE = 2 ** sys.long_info.bits_per_digit check(long(PyLong_BASE), vsize("") + 2 * self.longdigit) check(long(PyLong_BASE ** 2 - 1), vsize("") + 2 * self.longdigit) check(long(PyLong_BASE ** 2), vsize("") + 3 * self.longdigit) # module check(unittest, size("P")) # None check(None, size("")) # object check(object(), size("")) # property (descriptor object) class C(object): def getx(self): return self.__x def setx(self, value): self.__x = value def delx(self): del self.__x x = property(getx, setx, delx, "") check(x, size("4Pi")) # PyCObject # PyCapsule # XXX # rangeiterator check(iter(xrange(1)), size("4l")) # reverse check(reversed(""), size("PP")) # set # frozenset PySet_MINSIZE = 8 samples = [[], range(10), range(50)] s = size("3P2P" + PySet_MINSIZE * "lP" + "lP") for sample in samples: minused = len(sample) if minused == 0: tmp = 1 # the computation of minused is actually a bit more complicated # but this suffices for the sizeof test minused = minused * 2 newsize = PySet_MINSIZE while newsize <= minused: newsize = newsize << 1 if newsize <= 8: check(set(sample), s) check(frozenset(sample), s) else: check(set(sample), s + newsize * struct.calcsize("lP")) check(frozenset(sample), s + newsize * struct.calcsize("lP")) # setiterator check(iter(set()), size("P3P")) # slice check(slice(1), size("3P")) # str vh = test.test_support._vheader check("", struct.calcsize(vh + "lic")) check("abc", struct.calcsize(vh + "lic") + 3) # super check(super(int), size("3P")) # tuple check((), vsize("")) check((1, 2, 3), vsize("") + 3 * self.P) # tupleiterator check(iter(()), size("lP")) # type # (PyTypeObject + PyNumberMethods + PyMappingMethods + # PySequenceMethods + PyBufferProcs) s = vsize("P2P15Pl4PP9PP11PI") + struct.calcsize("41P 10P 3P 6P") class newstyleclass(object): pass check(newstyleclass, s) # builtin type check(int, s) # NotImplementedType import types check(types.NotImplementedType, s) # unicode usize = len(u"\0".encode("unicode-internal")) samples = [u"", u"1" * 100] # we need to test for both sizes, because we don't know if the string # has been cached for s in samples: check(s, size("PPlP") + usize * (len(s) + 1)) # weakref import weakref check(weakref.ref(int), size("2Pl2P")) # weakproxy # XXX # weakcallableproxy check(weakref.proxy(int), size("2Pl2P")) # xrange check(xrange(1), size("3l")) check(xrange(66000), size("3l"))
def test_objecttypes(self): # check all types defined in Objects/ size = test.support.calcobjsize vsize = test.support.calcvobjsize check = self.check_sizeof # bool check(True, vsize('') + self.longdigit) # buffer # XXX # builtin_function_or_method check(len, size('4P')) # XXX check layout # bytearray samples = [b'', b'u'*100000] for sample in samples: x = bytearray(sample) check(x, vsize('n2Pi') + x.__alloc__()) # bytearray_iterator check(iter(bytearray()), size('nP')) # bytes check(b'', vsize('n') + 1) check(b'x' * 10, vsize('n') + 11) # cell def get_cell(): x = 42 def inner(): return x return inner check(get_cell().__closure__[0], size('P')) # code check(get_cell().__code__, size('5i9Pi3P')) check(get_cell.__code__, size('5i9Pi3P')) def get_cell2(x): def inner(): return x return inner check(get_cell2.__code__, size('5i9Pi3P') + 1) # complex check(complex(0,1), size('2d')) # method_descriptor (descriptor object) check(str.lower, size('3PP')) # classmethod_descriptor (descriptor object) # XXX # member_descriptor (descriptor object) import datetime check(datetime.timedelta.days, size('3PP')) # getset_descriptor (descriptor object) import collections check(collections.defaultdict.default_factory, size('3PP')) # wrapper_descriptor (descriptor object) check(int.__add__, size('3P2P')) # method-wrapper (descriptor object) check({}.__iter__, size('2P')) # dict check({}, size('n2P' + '2nPn' + 8*'n2P')) longdict = {1:1, 2:2, 3:3, 4:4, 5:5, 6:6, 7:7, 8:8} check(longdict, size('n2P' + '2nPn') + 16*struct.calcsize('n2P')) # dictionary-keyiterator check({}.keys(), size('P')) # dictionary-valueiterator check({}.values(), size('P')) # dictionary-itemiterator check({}.items(), size('P')) # dictionary iterator check(iter({}), size('P2nPn')) # dictproxy class C(object): pass check(C.__dict__, size('P')) # BaseException check(BaseException(), size('5Pb')) # UnicodeEncodeError check(UnicodeEncodeError("", "", 0, 0, ""), size('5Pb 2P2nP')) # UnicodeDecodeError check(UnicodeDecodeError("", b"", 0, 0, ""), size('5Pb 2P2nP')) # UnicodeTranslateError check(UnicodeTranslateError("", 0, 1, ""), size('5Pb 2P2nP')) # ellipses check(Ellipsis, size('')) # EncodingMap import codecs, encodings.iso8859_3 x = codecs.charmap_build(encodings.iso8859_3.decoding_table) check(x, size('32B2iB')) # enumerate check(enumerate([]), size('n3P')) # reverse check(reversed(''), size('nP')) # float check(float(0), size('d')) # sys.floatinfo check(sys.float_info, vsize('') + self.P * len(sys.float_info)) # frame import inspect CO_MAXBLOCKS = 20 x = inspect.currentframe() ncells = len(x.f_code.co_cellvars) nfrees = len(x.f_code.co_freevars) extras = x.f_code.co_stacksize + x.f_code.co_nlocals +\ ncells + nfrees - 1 check(x, vsize('12P3ic' + CO_MAXBLOCKS*'3i' + 'P' + extras*'P')) # function def func(): pass check(func, size('12P')) class c(): @staticmethod def foo(): pass @classmethod def bar(cls): pass # staticmethod check(foo, size('PP')) # classmethod check(bar, size('PP')) # generator def get_gen(): yield 1 check(get_gen(), size('Pb2PPP')) # iterator check(iter('abc'), size('lP')) # callable-iterator import re check(re.finditer('',''), size('2P')) # list samples = [[], [1,2,3], ['1', '2', '3']] for sample in samples: check(sample, vsize('Pn') + len(sample)*self.P) # sortwrapper (list) # XXX # cmpwrapper (list) # XXX # listiterator (list) check(iter([]), size('lP')) # listreverseiterator (list) check(reversed([]), size('nP')) # int check(0, vsize('')) check(1, vsize('') + self.longdigit) check(-1, vsize('') + self.longdigit) PyLong_BASE = 2**sys.int_info.bits_per_digit check(int(PyLong_BASE), vsize('') + 2*self.longdigit) check(int(PyLong_BASE**2-1), vsize('') + 2*self.longdigit) check(int(PyLong_BASE**2), vsize('') + 3*self.longdigit) # module check(unittest, size('PnPPP')) # None check(None, size('')) # NotImplementedType check(NotImplemented, size('')) # object check(object(), size('')) # property (descriptor object) class C(object): def getx(self): return self.__x def setx(self, value): self.__x = value def delx(self): del self.__x x = property(getx, setx, delx, "") check(x, size('4Pi')) # PyCapsule # XXX # rangeiterator check(iter(range(1)), size('4l')) # reverse check(reversed(''), size('nP')) # range check(range(1), size('4P')) check(range(66000), size('4P')) # set # frozenset PySet_MINSIZE = 8 samples = [[], range(10), range(50)] s = size('3nP' + PySet_MINSIZE*'nP' + '2nP') for sample in samples: minused = len(sample) if minused == 0: tmp = 1 # the computation of minused is actually a bit more complicated # but this suffices for the sizeof test minused = minused*2 newsize = PySet_MINSIZE while newsize <= minused: newsize = newsize << 1 if newsize <= 8: check(set(sample), s) check(frozenset(sample), s) else: check(set(sample), s + newsize*struct.calcsize('nP')) check(frozenset(sample), s + newsize*struct.calcsize('nP')) # setiterator check(iter(set()), size('P3n')) # slice check(slice(0), size('3P')) # super check(super(int), size('3P')) # tuple check((), vsize('')) check((1,2,3), vsize('') + 3*self.P) # type # static type: PyTypeObject s = vsize('P2n15Pl4Pn9Pn11PIP') check(int, s) # (PyTypeObject + PyAsyncMethods + PyNumberMethods + PyMappingMethods + # PySequenceMethods + PyBufferProcs + 4P) s = vsize('P2n17Pl4Pn9Pn11PIP') + struct.calcsize('34P 3P 3P 10P 2P 4P') # Separate block for PyDictKeysObject with 4 entries s += struct.calcsize("2nPn") + 4*struct.calcsize("n2P") # class class newstyleclass(object): pass check(newstyleclass, s) # dict with shared keys check(newstyleclass().__dict__, size('n2P' + '2nPn')) # unicode # each tuple contains a string and its expected character size # don't put any static strings here, as they may contain # wchar_t or UTF-8 representations samples = ['1'*100, '\xff'*50, '\u0100'*40, '\uffff'*100, '\U00010000'*30, '\U0010ffff'*100] asciifields = "nnbP" compactfields = asciifields + "nPn" unicodefields = compactfields + "P" for s in samples: maxchar = ord(max(s)) if maxchar < 128: L = size(asciifields) + len(s) + 1 elif maxchar < 256: L = size(compactfields) + len(s) + 1 elif maxchar < 65536: L = size(compactfields) + 2*(len(s) + 1) else: L = size(compactfields) + 4*(len(s) + 1) check(s, L) # verify that the UTF-8 size is accounted for s = chr(0x4000) # 4 bytes canonical representation check(s, size(compactfields) + 4) # compile() will trigger the generation of the UTF-8 # representation as a side effect compile(s, "<stdin>", "eval") check(s, size(compactfields) + 4 + 4) # TODO: add check that forces the presence of wchar_t representation # TODO: add check that forces layout of unicodefields # weakref import weakref check(weakref.ref(int), size('2Pn2P')) # weakproxy # XXX # weakcallableproxy check(weakref.proxy(int), size('2Pn2P'))
def test_objecttypes(self): # check all types defined in Objects/ h = self.header vh = self.vheader size = self.calcsize check = self.check_sizeof # bool check(True, size(h + 'l')) # buffer check(buffer(''), size(h + '2P2Pil')) # builtin_function_or_method check(len, size(h + '3P')) # bytearray samples = ['', 'u'*100000] for sample in samples: x = bytearray(sample) check(x, size(vh + 'iPP') + x.__alloc__() * self.c) # bytearray_iterator check(iter(bytearray()), size(h + 'PP')) # cell def get_cell(): x = 42 def inner(): return x return inner check(get_cell().func_closure[0], size(h + 'P')) # classobj (old-style class) class class_oldstyle(): def method(): pass check(class_oldstyle, size(h + '6P')) # instance (old-style class) check(class_oldstyle(), size(h + '3P')) # instancemethod (old-style class) check(class_oldstyle().method, size(h + '4P')) # complex check(complex(0,1), size(h + '2d')) # code check(get_cell().func_code, size(h + '4i8Pi2P')) # BaseException check(BaseException(), size(h + '3P')) # UnicodeEncodeError check(UnicodeEncodeError("", u"", 0, 0, ""), size(h + '5P2PP')) # UnicodeDecodeError check(UnicodeDecodeError("", "", 0, 0, ""), size(h + '5P2PP')) # UnicodeTranslateError check(UnicodeTranslateError(u"", 0, 1, ""), size(h + '5P2PP')) # method_descriptor (descriptor object) check(str.lower, size(h + '2PP')) # classmethod_descriptor (descriptor object) # XXX # member_descriptor (descriptor object) import datetime check(datetime.timedelta.days, size(h + '2PP')) # getset_descriptor (descriptor object) import __builtin__ check(__builtin__.file.closed, size(h + '2PP')) # wrapper_descriptor (descriptor object) check(int.__add__, size(h + '2P2P')) # dictproxy class C(object): pass check(C.__dict__, size(h + 'P')) # method-wrapper (descriptor object) check({}.__iter__, size(h + '2P')) # dict check({}, size(h + '3P2P' + 8*'P2P')) x = {1:1, 2:2, 3:3, 4:4, 5:5, 6:6, 7:7, 8:8} check(x, size(h + '3P2P' + 8*'P2P') + 16*size('P2P')) # dictionary-keyiterator check({}.iterkeys(), size(h + 'P2PPP')) # dictionary-valueiterator check({}.itervalues(), size(h + 'P2PPP')) # dictionary-itemiterator check({}.iteritems(), size(h + 'P2PPP')) # ellipses check(Ellipsis, size(h + '')) # EncodingMap import codecs, encodings.iso8859_3 x = codecs.charmap_build(encodings.iso8859_3.decoding_table) check(x, size(h + '32B2iB')) # enumerate check(enumerate([]), size(h + 'l3P')) # file check(self.file, size(h + '4P2i4P3i3Pi')) # float check(float(0), size(h + 'd')) # sys.floatinfo check(sys.float_info, size(vh) + self.P * len(sys.float_info)) # frame import inspect CO_MAXBLOCKS = 20 x = inspect.currentframe() ncells = len(x.f_code.co_cellvars) nfrees = len(x.f_code.co_freevars) extras = x.f_code.co_stacksize + x.f_code.co_nlocals +\ ncells + nfrees - 1 check(x, size(vh + '12P3i' + CO_MAXBLOCKS*'3i' + 'P' + extras*'P')) # function def func(): pass check(func, size(h + '9P')) class c(): @staticmethod def foo(): pass @classmethod def bar(cls): pass # staticmethod check(foo, size(h + 'P')) # classmethod check(bar, size(h + 'P')) # generator def get_gen(): yield 1 check(get_gen(), size(h + 'Pi2P')) # integer check(1, size(h + 'l')) check(100, size(h + 'l')) # iterator check(iter('abc'), size(h + 'lP')) # callable-iterator import re check(re.finditer('',''), size(h + '2P')) # list samples = [[], [1,2,3], ['1', '2', '3']] for sample in samples: check(sample, size(vh + 'PP') + len(sample)*self.P) # sortwrapper (list) # XXX # cmpwrapper (list) # XXX # listiterator (list) check(iter([]), size(h + 'lP')) # listreverseiterator (list) check(reversed([]), size(h + 'lP')) # long check(0L, size(vh + 'H') - self.H) check(1L, size(vh + 'H')) check(-1L, size(vh + 'H')) check(32768L, size(vh + 'H') + self.H) check(32768L*32768L-1, size(vh + 'H') + self.H) check(32768L*32768L, size(vh + 'H') + 2*self.H) # module check(unittest, size(h + 'P')) # None check(None, size(h + '')) # object check(object(), size(h + '')) # property (descriptor object) class C(object): def getx(self): return self.__x def setx(self, value): self.__x = value def delx(self): del self.__x x = property(getx, setx, delx, "") check(x, size(h + '4Pi')) # PyCObject # XXX # rangeiterator check(iter(xrange(1)), size(h + '4l')) # reverse check(reversed(''), size(h + 'PP')) # set # frozenset PySet_MINSIZE = 8 samples = [[], range(10), range(50)] s = size(h + '3P2P' + PySet_MINSIZE*'lP' + 'lP') for sample in samples: minused = len(sample) if minused == 0: tmp = 1 # the computation of minused is actually a bit more complicated # but this suffices for the sizeof test minused = minused*2 newsize = PySet_MINSIZE while newsize <= minused: newsize = newsize << 1 if newsize <= 8: check(set(sample), s) check(frozenset(sample), s) else: check(set(sample), s + newsize*struct.calcsize('lP')) check(frozenset(sample), s + newsize*struct.calcsize('lP')) # setiterator check(iter(set()), size(h + 'P3P')) # slice check(slice(1), size(h + '3P')) # str check('', size(vh + 'lic')) check('abc', size(vh + 'lic') + 3*self.c) # super check(super(int), size(h + '3P')) # tuple check((), size(vh)) check((1,2,3), size(vh) + 3*self.P) # tupleiterator check(iter(()), size(h + 'lP')) # type # (PyTypeObject + PyNumberMethods + PyMappingMethods + # PySequenceMethods + PyBufferProcs) s = size(vh + 'P2P15Pl4PP9PP11PI') + size('41P 10P 3P 6P') class newstyleclass(object): pass check(newstyleclass, s) # builtin type check(int, s) # NotImplementedType import types check(types.NotImplementedType, s) # unicode usize = len(u'\0'.encode('unicode-internal')) samples = [u'', u'1'*100] # we need to test for both sizes, because we don't know if the string # has been cached for s in samples: check(s, size(h + 'PPlP') + usize * (len(s) + 1)) # weakref import weakref check(weakref.ref(int), size(h + '2Pl2P')) # weakproxy # XXX # weakcallableproxy check(weakref.proxy(int), size(h + '2Pl2P')) # xrange check(xrange(1), size(h + '3l')) check(xrange(66000), size(h + '3l'))
def make_sloppy_codec(encoding): """ Take a codec name, and return a 'sloppy' version of that codec that can encode and decode the unassigned bytes in that encoding. Single-byte encodings in the standard library are defined using some boilerplate classes surrounding the functions that do the actual work, `codecs.charmap_decode` and `charmap_encode`. This function, given an encoding name, *defines* those boilerplate classes. """ # Make an array of all 256 possible bytes. all_bytes = bytearray(range(256)) # Get a list of what they would decode to in Latin-1. sloppy_chars = list(all_bytes.decode('latin-1')) # Get a list of what they decode to in the given encoding. Use the # replacement character for unassigned bytes. if PY26: decoded_chars = all_bytes.decode(encoding, 'replace') else: decoded_chars = all_bytes.decode(encoding, errors='replace') # Update the sloppy_chars list. Each byte that was successfully decoded # gets its decoded value in the list. The unassigned bytes are left as # they are, which gives their decoding in Latin-1. for i, char in enumerate(decoded_chars): if char != REPLACEMENT_CHAR: sloppy_chars[i] = char # Create the data structures that tell the charmap methods how to encode # and decode in this sloppy encoding. decoding_table = ''.join(sloppy_chars) encoding_table = codecs.charmap_build(decoding_table) # Now produce all the class boilerplate. Look at the Python source for # `encodings.cp1252` for comparison; this is almost exactly the same, # except I made it follow pep8. class Codec(codecs.Codec): def encode(self, input, errors='strict'): return codecs.charmap_encode(input, errors, encoding_table) def decode(self, input, errors='strict'): return codecs.charmap_decode(input, errors, decoding_table) class IncrementalEncoder(codecs.IncrementalEncoder): def encode(self, input, final=False): return codecs.charmap_encode(input, self.errors, encoding_table)[0] class IncrementalDecoder(codecs.IncrementalDecoder): def decode(self, input, final=False): return codecs.charmap_decode(input, self.errors, decoding_table)[0] class StreamWriter(Codec, codecs.StreamWriter): pass class StreamReader(Codec, codecs.StreamReader): pass return codecs.CodecInfo( name='sloppy-' + encoding, encode=Codec().encode, decode=Codec().decode, incrementalencoder=IncrementalEncoder, incrementaldecoder=IncrementalDecoder, streamreader=StreamReader, streamwriter=StreamWriter, )
u'\u03c4' # 0xE7 -> GREEK SMALL LETTER TAU u'\u03a6' # 0xE8 -> GREEK CAPITAL LETTER PHI u'\u0398' # 0xE9 -> GREEK CAPITAL LETTER THETA u'\u03a9' # 0xEA -> GREEK CAPITAL LETTER OMEGA u'\u03b4' # 0xEB -> GREEK SMALL LETTER DELTA u'\u222e' # 0xEC -> CONTOUR INTEGRAL u'\u03c6' # 0xED -> GREEK SMALL LETTER PHI u'\u2208' # 0xEE -> ELEMENT OF SIGN u'\u2229' # 0xEF -> INTERSECTION u'\u2261' # 0xF0 -> IDENTICAL TO u'\xb1' # 0xF1 -> PLUS-MINUS SIGN u'\u2265' # 0xF2 -> GREATER-THAN OR EQUAL TO u'\u2264' # 0xF3 -> LESS-THAN OR EQUAL TO u'\u2320' # 0xF4 -> TOP HALF INTEGRAL u'\u2321' # 0xF5 -> BOTTOM HALF INTEGRAL u'\xf7' # 0xF6 -> DIVISION SIGN u'\u2248' # 0xF7 -> ALMOST EQUAL TO u'\xb0' # 0xF8 -> DEGREE SIGN u'\u2219' # 0xF9 -> BULLET OPERATOR u'\xb7' # 0xFA -> MIDDLE DOT u'\u221a' # 0xFB -> SQUARE ROOT u'\u207f' # 0xFC -> SUPERSCRIPT LATIN SMALL LETTER N u'\xb2' # 0xFD -> SUPERSCRIPT TWO u'\xb3' # 0xFE -> SUPERSCRIPT THREE u'\xaf' # 0xFF -> MACRON ) ### Encoding table ENCODING_TABLE = codecs.charmap_build(DECODING_TABLE)
def test_charmap_build(self): import codecs assert codecs.charmap_build(u'123456') == {49: 0, 50: 1, 51: 2, 52: 3, 53: 4, 54: 5}
def test_objecttypes(self): # check all types defined in Objects/ h = self.header vh = self.vheader size = self.calcsize check = self.check_sizeof # bool check(True, size(vh) + self.longdigit) # buffer # XXX # builtin_function_or_method check(len, size(h + '3P')) # bytearray samples = [b'', b'u'*100000] for sample in samples: x = bytearray(sample) check(x, size(vh + 'iPP') + x.__alloc__() * self.c) # bytearray_iterator check(iter(bytearray()), size(h + 'PP')) # cell def get_cell(): x = 42 def inner(): return x return inner check(get_cell().__closure__[0], size(h + 'P')) # code check(get_cell().__code__, size(h + '5i8Pi3P')) # complex check(complex(0,1), size(h + '2d')) # method_descriptor (descriptor object) check(str.lower, size(h + '2PP')) # classmethod_descriptor (descriptor object) # XXX # member_descriptor (descriptor object) import datetime check(datetime.timedelta.days, size(h + '2PP')) # getset_descriptor (descriptor object) import collections check(collections.defaultdict.default_factory, size(h + '2PP')) # wrapper_descriptor (descriptor object) try: import stackless slxtra = 'i' except: slxtra = '' check(int.__add__, size(h + '2P2P' + slxtra)) # method-wrapper (descriptor object) check({}.__iter__, size(h + '2P')) # dict check({}, size(h + '3P2P' + 8*'P2P')) longdict = {1:1, 2:2, 3:3, 4:4, 5:5, 6:6, 7:7, 8:8} check(longdict, size(h + '3P2P' + 8*'P2P') + 16*size('P2P')) # dictionary-keyiterator check({}.keys(), size(h + 'P')) # dictionary-valueiterator check({}.values(), size(h + 'P')) # dictionary-itemiterator check({}.items(), size(h + 'P')) # dictproxy class C(object): pass check(C.__dict__, size(h + 'P')) # BaseException check(BaseException(), size(h + '5P')) # UnicodeEncodeError check(UnicodeEncodeError("", "", 0, 0, ""), size(h + '5P 2P2PP')) # UnicodeDecodeError # XXX # check(UnicodeDecodeError("", "", 0, 0, ""), size(h + '5P2PP')) # UnicodeTranslateError check(UnicodeTranslateError("", 0, 1, ""), size(h + '5P 2P2PP')) # ellipses check(Ellipsis, size(h + '')) # EncodingMap import codecs, encodings.iso8859_3 x = codecs.charmap_build(encodings.iso8859_3.decoding_table) check(x, size(h + '32B2iB')) # enumerate check(enumerate([]), size(h + 'l3P')) # reverse check(reversed(''), size(h + 'PP')) # float check(float(0), size(h + 'd')) # sys.floatinfo check(sys.float_info, size(vh) + self.P * len(sys.float_info)) # frame import inspect CO_MAXBLOCKS = 20 x = inspect.currentframe() ncells = len(x.f_code.co_cellvars) nfrees = len(x.f_code.co_freevars) extras = x.f_code.co_stacksize + x.f_code.co_nlocals +\ ncells + nfrees - 1 try: import stackless slextra = 'P' except: slextra = '' check(x, size(vh + '12P3i' + CO_MAXBLOCKS*'3i' + slextra + 'P' + extras*'P')) # function def func(): pass check(func, size(h + '11P')) class c(): @staticmethod def foo(): pass @classmethod def bar(cls): pass # staticmethod check(foo, size(h + 'P')) # classmethod check(bar, size(h + 'P')) # generator def get_gen(): yield 1 check(get_gen(), size(h + 'Pi2P')) # iterator check(iter('abc'), size(h + 'lP')) # callable-iterator import re check(re.finditer('',''), size(h + '2P')) # list samples = [[], [1,2,3], ['1', '2', '3']] for sample in samples: check(sample, size(vh + 'PP') + len(sample)*self.P) # sortwrapper (list) # XXX # cmpwrapper (list) # XXX # listiterator (list) check(iter([]), size(h + 'lP')) # listreverseiterator (list) check(reversed([]), size(h + 'lP')) # long check(0, size(vh)) check(1, size(vh) + self.longdigit) check(-1, size(vh) + self.longdigit) PyLong_BASE = 2**sys.int_info.bits_per_digit check(int(PyLong_BASE), size(vh) + 2*self.longdigit) check(int(PyLong_BASE**2-1), size(vh) + 2*self.longdigit) check(int(PyLong_BASE**2), size(vh) + 3*self.longdigit) # memory check(memoryview(b''), size(h + 'PP2P2i7P')) # module check(unittest, size(h + '3P')) # None check(None, size(h + '')) # NotImplementedType check(NotImplemented, size(h)) # object check(object(), size(h + '')) # property (descriptor object) class C(object): def getx(self): return self.__x def setx(self, value): self.__x = value def delx(self): del self.__x x = property(getx, setx, delx, "") check(x, size(h + '4Pi')) # PyCapsule # XXX # rangeiterator check(iter(range(1)), size(h + '4l')) # reverse check(reversed(''), size(h + 'PP')) # range check(range(1), size(h + '4P')) check(range(66000), size(h + '4P')) # set # frozenset PySet_MINSIZE = 8 samples = [[], range(10), range(50)] s = size(h + '3P2P' + PySet_MINSIZE*'lP' + 'lP') for sample in samples: minused = len(sample) if minused == 0: tmp = 1 # the computation of minused is actually a bit more complicated # but this suffices for the sizeof test minused = minused*2 newsize = PySet_MINSIZE while newsize <= minused: newsize = newsize << 1 if newsize <= 8: check(set(sample), s) check(frozenset(sample), s) else: check(set(sample), s + newsize*struct.calcsize('lP')) check(frozenset(sample), s + newsize*struct.calcsize('lP')) # setiterator check(iter(set()), size(h + 'P3P')) # slice check(slice(0), size(h + '3P')) # super check(super(int), size(h + '3P')) # tuple check((), size(vh)) check((1,2,3), size(vh) + 3*self.P) # type # (PyTypeObject + PyNumberMethods + PyMappingMethods + # PySequenceMethods + PyBufferProcs) s = size(vh + 'P2P15Pl4PP9PP11PI') + size('16Pi17P 3P 10P 2P 2P') try: import stackless # The number of byte entries in the generated 'slp_methodflags'. stacklessSize = 71 # Make it a multiple of two. stacklessSize = stacklessSize + stacklessSize % 2 s += stacklessSize except: pass check(int, s) # class class newstyleclass(object): pass check(newstyleclass, s) # unicode usize = len('\0'.encode('unicode-internal')) samples = ['', '1'*100] # we need to test for both sizes, because we don't know if the string # has been cached for s in samples: basicsize = size(h + 'PPPiP') + usize * (len(s) + 1) check(s, basicsize) # weakref import weakref check(weakref.ref(int), size(h + '2Pl2P')) # weakproxy # XXX # weakcallableproxy check(weakref.proxy(int), size(h + '2Pl2P'))
def test_charmap_build(self): decodemap = ''.join([unichr(i).upper() if chr(i).islower() else unichr(i).lower() for i in xrange(256)]) encodemap = codecs.charmap_build(decodemap) self.assertEqual(codecs.charmap_decode(u'Hello World', 'strict', decodemap), ('hELLO wORLD', 11)) self.assertEqual(codecs.charmap_encode(u'Hello World', 'strict', encodemap), ('hELLO wORLD', 11))
u'\u0424' # 0xE6 -> CYRILLIC CAPITAL LETTER EF u'\u0413' # 0xE7 -> CYRILLIC CAPITAL LETTER GHE u'\u0425' # 0xE8 -> CYRILLIC CAPITAL LETTER HA u'\u0418' # 0xE9 -> CYRILLIC CAPITAL LETTER I u'\u0419' # 0xEA -> CYRILLIC CAPITAL LETTER SHORT I u'\u041a' # 0xEB -> CYRILLIC CAPITAL LETTER KA u'\u041b' # 0xEC -> CYRILLIC CAPITAL LETTER EL u'\u041c' # 0xED -> CYRILLIC CAPITAL LETTER EM u'\u041d' # 0xEE -> CYRILLIC CAPITAL LETTER EN u'\u041e' # 0xEF -> CYRILLIC CAPITAL LETTER O u'\u041f' # 0xF0 -> CYRILLIC CAPITAL LETTER PE u'\u042f' # 0xF1 -> CYRILLIC CAPITAL LETTER YA u'\u0420' # 0xF2 -> CYRILLIC CAPITAL LETTER ER u'\u0421' # 0xF3 -> CYRILLIC CAPITAL LETTER ES u'\u0422' # 0xF4 -> CYRILLIC CAPITAL LETTER TE u'\u0423' # 0xF5 -> CYRILLIC CAPITAL LETTER U u'\u0416' # 0xF6 -> CYRILLIC CAPITAL LETTER ZHE u'\u0412' # 0xF7 -> CYRILLIC CAPITAL LETTER VE u'\u042c' # 0xF8 -> CYRILLIC CAPITAL LETTER SOFT SIGN u'\u042b' # 0xF9 -> CYRILLIC CAPITAL LETTER YERU u'\u0417' # 0xFA -> CYRILLIC CAPITAL LETTER ZE u'\u0428' # 0xFB -> CYRILLIC CAPITAL LETTER SHA u'\u042d' # 0xFC -> CYRILLIC CAPITAL LETTER E u'\u0429' # 0xFD -> CYRILLIC CAPITAL LETTER SHCHA u'\u0427' # 0xFE -> CYRILLIC CAPITAL LETTER CHE u'\u042a' # 0xFF -> CYRILLIC CAPITAL LETTER HARD SIGN ) ### Encoding table encoding_table = codecs.charmap_build(decoding_table)
def test_objecttypes(self): # check all types defined in Objects/ size = test.support.calcobjsize vsize = test.support.calcvobjsize check = self.check_sizeof # bool check(True, vsize('') + self.longdigit) # buffer # XXX # builtin_function_or_method check(len, size('3P')) # XXX check layout # bytearray samples = [b'', b'u' * 100000] for sample in samples: x = bytearray(sample) check(x, vsize('n2Pi') + x.__alloc__()) # bytearray_iterator check(iter(bytearray()), size('nP')) # cell def get_cell(): x = 42 def inner(): return x return inner check(get_cell().__closure__[0], size('P')) # code check(get_cell().__code__, size('5i9Pi3P')) check(get_cell.__code__, size('5i9Pi3P')) def get_cell2(x): def inner(): return x return inner check(get_cell2.__code__, size('5i9Pi3P') + 1) # complex check(complex(0, 1), size('2d')) # method_descriptor (descriptor object) check(str.lower, size('3PP')) # classmethod_descriptor (descriptor object) # XXX # member_descriptor (descriptor object) import datetime check(datetime.timedelta.days, size('3PP')) # getset_descriptor (descriptor object) import collections check(collections.defaultdict.default_factory, size('3PP')) # wrapper_descriptor (descriptor object) check(int.__add__, size('3P2P')) # method-wrapper (descriptor object) check({}.__iter__, size('2P')) # dict check({}, size('n2P' + '2nPn' + 8 * 'n2P')) longdict = {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8} check(longdict, size('n2P' + '2nPn') + 16 * struct.calcsize('n2P')) # dictionary-keyiterator check({}.keys(), size('P')) # dictionary-valueiterator check({}.values(), size('P')) # dictionary-itemiterator check({}.items(), size('P')) # dictionary iterator check(iter({}), size('P2nPn')) # dictproxy class C(object): pass check(C.__dict__, size('P')) # BaseException check(BaseException(), size('5Pb')) # UnicodeEncodeError check(UnicodeEncodeError("", "", 0, 0, ""), size('5Pb 2P2nP')) # UnicodeDecodeError check(UnicodeDecodeError("", b"", 0, 0, ""), size('5Pb 2P2nP')) # UnicodeTranslateError check(UnicodeTranslateError("", 0, 1, ""), size('5Pb 2P2nP')) # ellipses check(Ellipsis, size('')) # EncodingMap import codecs, encodings.iso8859_3 x = codecs.charmap_build(encodings.iso8859_3.decoding_table) check(x, size('32B2iB')) # enumerate check(enumerate([]), size('n3P')) # reverse check(reversed(''), size('nP')) # float check(float(0), size('d')) # sys.floatinfo check(sys.float_info, vsize('') + self.P * len(sys.float_info)) # frame import inspect CO_MAXBLOCKS = 20 x = inspect.currentframe() ncells = len(x.f_code.co_cellvars) nfrees = len(x.f_code.co_freevars) extras = x.f_code.co_stacksize + x.f_code.co_nlocals +\ ncells + nfrees - 1 check(x, vsize('13P3ic' + CO_MAXBLOCKS * '3i' + 'P' + extras * 'P')) # function def func(): pass check(func, size('12P')) class c(): @staticmethod def foo(): pass @classmethod def bar(cls): pass # staticmethod check(foo, size('PP')) # classmethod check(bar, size('PP')) # generator def get_gen(): yield 1 check(get_gen(), size('Pb2P')) # iterator check(iter('abc'), size('lP')) # callable-iterator import re check(re.finditer('', ''), size('2P')) # list samples = [[], [1, 2, 3], ['1', '2', '3']] for sample in samples: check(sample, vsize('Pn') + len(sample) * self.P) # sortwrapper (list) # XXX # cmpwrapper (list) # XXX # listiterator (list) check(iter([]), size('lP')) # listreverseiterator (list) check(reversed([]), size('nP')) # int check(0, vsize('')) check(1, vsize('') + self.longdigit) check(-1, vsize('') + self.longdigit) PyLong_BASE = 2**sys.int_info.bits_per_digit check(int(PyLong_BASE), vsize('') + 2 * self.longdigit) check(int(PyLong_BASE**2 - 1), vsize('') + 2 * self.longdigit) check(int(PyLong_BASE**2), vsize('') + 3 * self.longdigit) # memoryview check(memoryview(b''), size('Pnin 2P2n2i5P 3cPn')) # module check(unittest, size('PnPPP')) # None check(None, size('')) # NotImplementedType check(NotImplemented, size('')) # object check(object(), size('')) # property (descriptor object) class C(object): def getx(self): return self.__x def setx(self, value): self.__x = value def delx(self): del self.__x x = property(getx, setx, delx, "") check(x, size('4Pi')) # PyCapsule # XXX # rangeiterator check(iter(range(1)), size('4l')) # reverse check(reversed(''), size('nP')) # range check(range(1), size('4P')) check(range(66000), size('4P')) # set # frozenset PySet_MINSIZE = 8 samples = [[], range(10), range(50)] s = size('3n2P' + PySet_MINSIZE * 'nP' + 'nP') for sample in samples: minused = len(sample) if minused == 0: tmp = 1 # the computation of minused is actually a bit more complicated # but this suffices for the sizeof test minused = minused * 2 newsize = PySet_MINSIZE while newsize <= minused: newsize = newsize << 1 if newsize <= 8: check(set(sample), s) check(frozenset(sample), s) else: check(set(sample), s + newsize * struct.calcsize('nP')) check(frozenset(sample), s + newsize * struct.calcsize('nP')) # setiterator check(iter(set()), size('P3n')) # slice check(slice(0), size('3P')) # super check(super(int), size('3P')) # tuple check((), vsize('')) check((1, 2, 3), vsize('') + 3 * self.P) # type # static type: PyTypeObject s = vsize('P2n15Pl4Pn9Pn11PIP') check(int, s) # (PyTypeObject + PyNumberMethods + PyMappingMethods + # PySequenceMethods + PyBufferProcs + 4P) s = vsize('P2n15Pl4Pn9Pn11PIP') + struct.calcsize('34P 3P 10P 2P 4P') # Separate block for PyDictKeysObject with 4 entries s += struct.calcsize("2nPn") + 4 * struct.calcsize("n2P") # class class newstyleclass(object): pass check(newstyleclass, s) # dict with shared keys check(newstyleclass().__dict__, size('n2P' + '2nPn')) # unicode # each tuple contains a string and its expected character size # don't put any static strings here, as they may contain # wchar_t or UTF-8 representations samples = [ '1' * 100, '\xff' * 50, '\u0100' * 40, '\uffff' * 100, '\U00010000' * 30, '\U0010ffff' * 100 ] asciifields = "nnbP" compactfields = asciifields + "nPn" unicodefields = compactfields + "P" for s in samples: maxchar = ord(max(s)) if maxchar < 128: L = size(asciifields) + len(s) + 1 elif maxchar < 256: L = size(compactfields) + len(s) + 1 elif maxchar < 65536: L = size(compactfields) + 2 * (len(s) + 1) else: L = size(compactfields) + 4 * (len(s) + 1) check(s, L) # verify that the UTF-8 size is accounted for s = chr(0x4000) # 4 bytes canonical representation check(s, size(compactfields) + 4) # compile() will trigger the generation of the UTF-8 # representation as a side effect compile(s, "<stdin>", "eval") check(s, size(compactfields) + 4 + 4) # TODO: add check that forces the presence of wchar_t representation # TODO: add check that forces layout of unicodefields # weakref import weakref check(weakref.ref(int), size('2Pn2P')) # weakproxy # XXX # weakcallableproxy check(weakref.proxy(int), size('2Pn2P'))
def test_objecttypes(self): # check all types defined in Objects/ size = test.test_support.calcobjsize vsize = test.test_support.calcvobjsize check = self.check_sizeof # bool check(True, size('l')) # buffer with test.test_support.check_py3k_warnings(): check(buffer(''), size('2P2Pil')) # builtin_function_or_method check(len, size('3P')) # bytearray samples = ['', 'u' * 100000] for sample in samples: x = bytearray(sample) check(x, vsize('iPP') + x.__alloc__()) # bytearray_iterator check(iter(bytearray()), size('PP')) # cell def get_cell(): x = 42 def inner(): return x return inner check(get_cell().func_closure[0], size('P')) # classobj (old-style class) class class_oldstyle(): def method(): pass check(class_oldstyle, size('7P')) # instance (old-style class) check(class_oldstyle(), size('3P')) # instancemethod (old-style class) check(class_oldstyle().method, size('4P')) # complex check(complex(0, 1), size('2d')) # code check(get_cell().func_code, size('4i8Pi3P')) # BaseException check(BaseException(), size('3P')) # UnicodeEncodeError check(UnicodeEncodeError("", u"", 0, 0, ""), size('5P2PP')) # UnicodeDecodeError check(UnicodeDecodeError("", "", 0, 0, ""), size('5P2PP')) # UnicodeTranslateError check(UnicodeTranslateError(u"", 0, 1, ""), size('5P2PP')) # method_descriptor (descriptor object) check(str.lower, size('2PP')) # classmethod_descriptor (descriptor object) # XXX # member_descriptor (descriptor object) import datetime check(datetime.timedelta.days, size('2PP')) # getset_descriptor (descriptor object) import __builtin__ check(__builtin__.file.closed, size('2PP')) # wrapper_descriptor (descriptor object) check(int.__add__, size('2P2P')) # dictproxy class C(object): pass check(C.__dict__, size('P')) # method-wrapper (descriptor object) check({}.__iter__, size('2P')) # dict check({}, size('3P2P' + 8 * 'P2P')) x = {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8} check(x, size('3P2P' + 8 * 'P2P') + 16 * struct.calcsize('P2P')) # dictionary-keyiterator check({}.iterkeys(), size('P2PPP')) # dictionary-valueiterator check({}.itervalues(), size('P2PPP')) # dictionary-itemiterator check({}.iteritems(), size('P2PPP')) # ellipses check(Ellipsis, size('')) # EncodingMap import codecs, encodings.iso8859_3 x = codecs.charmap_build(encodings.iso8859_3.decoding_table) check(x, size('32B2iB')) # enumerate check(enumerate([]), size('l3P')) # file check(self.file, size('4P2i4P3i3P3i')) # float check(float(0), size('d')) # sys.floatinfo check(sys.float_info, vsize('') + self.P * len(sys.float_info)) # frame import inspect CO_MAXBLOCKS = 20 x = inspect.currentframe() ncells = len(x.f_code.co_cellvars) nfrees = len(x.f_code.co_freevars) extras = x.f_code.co_stacksize + x.f_code.co_nlocals +\ ncells + nfrees - 1 check(x, vsize('12P3i' + CO_MAXBLOCKS * '3i' + 'P' + extras * 'P')) # function def func(): pass check(func, size('9P')) class c(): @staticmethod def foo(): pass @classmethod def bar(cls): pass # staticmethod check(foo, size('P')) # classmethod check(bar, size('P')) # generator def get_gen(): yield 1 check(get_gen(), size('Pi2P')) # integer check(1, size('l')) check(100, size('l')) # iterator check(iter('abc'), size('lP')) # callable-iterator import re check(re.finditer('', ''), size('2P')) # list samples = [[], [1, 2, 3], ['1', '2', '3']] for sample in samples: check(sample, vsize('PP') + len(sample) * self.P) # sortwrapper (list) # XXX # cmpwrapper (list) # XXX # listiterator (list) check(iter([]), size('lP')) # listreverseiterator (list) check(reversed([]), size('lP')) # long check(0L, vsize('')) check(1L, vsize('') + self.longdigit) check(-1L, vsize('') + self.longdigit) PyLong_BASE = 2**sys.long_info.bits_per_digit check(long(PyLong_BASE), vsize('') + 2 * self.longdigit) check(long(PyLong_BASE**2 - 1), vsize('') + 2 * self.longdigit) check(long(PyLong_BASE**2), vsize('') + 3 * self.longdigit) # module check(unittest, size('P')) # None check(None, size('')) # object check(object(), size('')) # property (descriptor object) class C(object): def getx(self): return self.__x def setx(self, value): self.__x = value def delx(self): del self.__x x = property(getx, setx, delx, "") check(x, size('4Pi')) # PyCObject # PyCapsule # XXX # rangeiterator check(iter(xrange(1)), size('4l')) # reverse check(reversed(''), size('PP')) # set # frozenset PySet_MINSIZE = 8 samples = [[], range(10), range(50)] s = size('3P2P' + PySet_MINSIZE * 'lP' + 'lP') for sample in samples: minused = len(sample) if minused == 0: tmp = 1 # the computation of minused is actually a bit more complicated # but this suffices for the sizeof test minused = minused * 2 newsize = PySet_MINSIZE while newsize <= minused: newsize = newsize << 1 if newsize <= 8: check(set(sample), s) check(frozenset(sample), s) else: check(set(sample), s + newsize * struct.calcsize('lP')) check(frozenset(sample), s + newsize * struct.calcsize('lP')) # setiterator check(iter(set()), size('P3P')) # slice check(slice(1), size('3P')) # str vh = test.test_support._vheader check('', struct.calcsize(vh + 'lic')) check('abc', struct.calcsize(vh + 'lic') + 3) # super check(super(int), size('3P')) # tuple check((), vsize('')) check((1, 2, 3), vsize('') + 3 * self.P) # tupleiterator check(iter(()), size('lP')) # type # (PyTypeObject + PyNumberMethods + PyMappingMethods + # PySequenceMethods + PyBufferProcs) s = vsize('P2P15Pl4PP9PP11PI') + struct.calcsize('41P 10P 3P 6P') class newstyleclass(object): pass check(newstyleclass, s) # builtin type check(int, s) # NotImplementedType import types check(types.NotImplementedType, s) # unicode usize = len(u'\0'.encode('unicode-internal')) samples = [u'', u'1' * 100] # we need to test for both sizes, because we don't know if the string # has been cached for s in samples: check(s, size('PPlP') + usize * (len(s) + 1)) # weakref import weakref check(weakref.ref(int), size('2Pl2P')) # weakproxy # XXX # weakcallableproxy check(weakref.proxy(int), size('2Pl2P')) # xrange check(xrange(1), size('3l')) check(xrange(66000), size('3l'))