def strcoll(space, w_s1, w_s2): "string,string -> int. Compares two strings according to the locale." if (space.isinstance_w(w_s1, space.w_bytes) and space.isinstance_w(w_s2, space.w_bytes)): s1, s2 = space.bytes_w(w_s1), space.bytes_w(w_s2) s1_c = rffi.str2charp(s1) s2_c = rffi.str2charp(s2) try: return space.newint(_strcoll(s1_c, s2_c)) finally: rffi.free_charp(s1_c) rffi.free_charp(s2_c) s1, s2 = space.unicode_w(w_s1), space.unicode_w(w_s2) s1_c = rffi.unicode2wcharp(s1) s2_c = rffi.unicode2wcharp(s2) try: result = _wcscoll(s1_c, s2_c) finally: rffi.free_wcharp(s1_c) rffi.free_wcharp(s2_c) return space.newint(result)
def test_ascii_codec(self, space): s = 'abcdefg' data = rffi.str2charp(s) NULL = lltype.nullptr(rffi.CCHARP.TO) w_u = PyUnicode_DecodeASCII(space, data, len(s), NULL) assert space.eq_w(w_u, space.wrap(u"abcdefg")) rffi.free_charp(data) s = 'abcd\xFF' data = rffi.str2charp(s) with raises_w(space, UnicodeDecodeError): PyUnicode_DecodeASCII(space, data, len(s), NULL) rffi.free_charp(data) uni = u'abcdefg' data = rffi.unicode2wcharp(uni) w_s = PyUnicode_EncodeASCII(space, data, len(uni), NULL) assert space.eq_w(space.wrap("abcdefg"), w_s) rffi.free_wcharp(data) u = u'�bcd�fg' data = rffi.unicode2wcharp(u) with raises_w(space, UnicodeEncodeError): PyUnicode_EncodeASCII(space, data, len(u), NULL) rffi.free_wcharp(data)
def strcoll(space, w_s1, w_s2): "string,string -> int. Compares two strings according to the locale." if (space.isinstance_w(w_s1, space.w_str) and space.isinstance_w(w_s2, space.w_str)): s1, s2 = space.str_w(w_s1), space.str_w(w_s2) s1_c = rffi.str2charp(s1) s2_c = rffi.str2charp(s2) try: return space.wrap(_strcoll(s1_c, s2_c)) finally: rffi.free_charp(s1_c) rffi.free_charp(s2_c) s1, s2 = space.unicode_w(w_s1), space.unicode_w(w_s2) s1_c = rffi.unicode2wcharp(s1) s2_c = rffi.unicode2wcharp(s2) try: result = _wcscoll(s1_c, s2_c) finally: rffi.free_wcharp(s1_c) rffi.free_wcharp(s2_c) return space.wrap(result)
def test_ascii_codec(self, space, api): s = 'abcdefg' data = rffi.str2charp(s) w_u = api.PyUnicode_DecodeASCII(data, len(s), lltype.nullptr(rffi.CCHARP.TO)) assert space.eq_w(w_u, space.wrap(u"abcdefg")) rffi.free_charp(data) s = 'abcd\xFF' data = rffi.str2charp(s) self.raises(space, api, UnicodeDecodeError, api.PyUnicode_DecodeASCII, data, len(s), lltype.nullptr(rffi.CCHARP.TO)) rffi.free_charp(data) uni = u'abcdefg' data = rffi.unicode2wcharp(uni) w_s = api.PyUnicode_EncodeASCII(data, len(uni), lltype.nullptr(rffi.CCHARP.TO)) assert space.eq_w(space.wrap("abcdefg"), w_s) rffi.free_wcharp(data) u = u'äbcdéfg' data = rffi.unicode2wcharp(u) w_s = api.PyUnicode_EncodeASCII(data, len(u), lltype.nullptr(rffi.CCHARP.TO)) self.raises(space, api, UnicodeEncodeError, api.PyUnicode_EncodeASCII, data, len(u), lltype.nullptr(rffi.CCHARP.TO)) rffi.free_wcharp(data)
def strcoll(space, w_s1, w_s2): "string,string -> int. Compares two strings according to the locale." s1, s2 = space.unicode_w(w_s1), space.unicode_w(w_s2) s1_c = rffi.unicode2wcharp(s1) s2_c = rffi.unicode2wcharp(s2) try: result = _wcscoll(s1_c, s2_c) finally: rffi.free_wcharp(s1_c) rffi.free_wcharp(s2_c) return space.wrap(result)
def fill_with_unicode(self, space, w_value): if w_value is None or space.is_w(w_value, space.w_None): self.clear() else: # XXX ucs2 only probably univalue = space.unicode_w(w_value) self.ptr = rffi.cast(roci.oratext, rffi.unicode2wcharp(univalue)) self.size = len(univalue) * 2
def PyUnicode_AS_UNICODE(space, ref): """Return a pointer to the internal Py_UNICODE buffer of the object. ref has to be a PyUnicodeObject (not checked).""" ref_unicode = rffi.cast(PyUnicodeObject, ref) if not ref_unicode.c_str: # Copy unicode buffer w_unicode = from_ref(space, rffi.cast(PyObject, ref)) u = space.unicode_w(w_unicode) ref_unicode.c_str = rffi.unicode2wcharp(u) return ref_unicode.c_str
def test_mbcs(self, space, api): if sys.platform != 'win32': py.test.skip("mcbs encoding only exists on Windows") # unfortunately, mbcs is locale-dependent. # This tests works at least on a Western Windows. unichars = u"abc" + unichr(12345) wbuf = rffi.unicode2wcharp(unichars) w_str = api.PyUnicode_EncodeMBCS(wbuf, 4, None) rffi.free_wcharp(wbuf) assert space.type(w_str) is space.w_str assert space.str_w(w_str) == "abc?"
def get_programname(self): if not self.programname: space = self.space argv = space.sys.get('argv') if space.len_w(argv): argv0 = space.getitem(argv, space.newint(0)) progname = space.unicode_w(argv0) else: progname = u"pypy3" self.programname = rffi.unicode2wcharp(progname) lltype.render_immortal(self.programname) return self.programname
def stop_error_capture(self, w_done): if w_done is None: return w_text = self.space.call_function(w_done) p = rffi.unicode2wcharp(self.space.unicode_w(w_text), track_allocation=False) if self.text_p: rffi.free_wcharp(self.text_p, track_allocation=False) self.text_p = p # keepalive cffi_errorbox(p)
def _readify(space, py_obj, value): maxchar = 0 for c in value: if ord(c) > maxchar: maxchar = ord(c) if maxchar > MAX_UNICODE: raise oefmt( space.w_ValueError, "Character U+%d is not in range [U+0000; U+10ffff]", maxchar) if maxchar < 256: ucs1_data = rffi.str2charp( unicode_encode_latin_1(value, len(value), errors='strict')) set_data(py_obj, cts.cast('void*', ucs1_data)) set_kind(py_obj, _1BYTE_KIND) set_len(py_obj, get_wsize(py_obj)) if maxchar < 128: set_ascii(py_obj, 1) set_utf8(py_obj, cts.cast('char*', get_data(py_obj))) set_utf8_len(py_obj, get_wsize(py_obj)) else: set_ascii(py_obj, 0) set_utf8(py_obj, cts.cast('char *', 0)) set_utf8_len(py_obj, 0) elif maxchar < 65536: # XXX: assumes that sizeof(wchar_t) == 4 ucs2_str = unicode_encode_utf_16_helper(value, len(value), errors='strict', byteorder=runicode.BYTEORDER) ucs2_data = cts.cast('Py_UCS2 *', rffi.str2charp(ucs2_str)) set_data(py_obj, cts.cast('void*', ucs2_data)) set_len(py_obj, get_wsize(py_obj)) set_kind(py_obj, _2BYTE_KIND) set_utf8(py_obj, cts.cast('char *', 0)) set_utf8_len(py_obj, 0) else: # XXX: assumes that sizeof(wchar_t) == 4 if not get_wbuffer(py_obj): # Copy unicode buffer set_wbuffer(py_obj, rffi.unicode2wcharp(value)) set_wsize(py_obj, len(value)) ucs4_data = get_wbuffer(py_obj) set_data(py_obj, cts.cast('void*', ucs4_data)) set_len(py_obj, get_wsize(py_obj)) set_kind(py_obj, _4BYTE_KIND) set_utf8(py_obj, cts.cast('char *', 0)) set_utf8_len(py_obj, 0) set_ready(py_obj, 1) return 0
def PyUnicode_AsUnicodeAndSize(space, ref, psize): """Return a read-only pointer to the Unicode object's internal Py_UNICODE buffer, NULL if unicode is not a Unicode object.""" # Don't use PyUnicode_Check, it will realize the object :-( w_type = from_ref(space, rffi.cast(PyObject, ref.c_ob_type)) if not space.issubtype_w(w_type, space.w_unicode): raise oefmt(space.w_TypeError, "expected unicode object") if not get_wbuffer(ref): # Copy unicode buffer w_unicode = from_ref(space, rffi.cast(PyObject, ref)) u = space.unicode_w(w_unicode) set_wbuffer(ref, rffi.unicode2wcharp(u)) set_wsize(ref, len(u)) if psize: psize[0] = get_wsize(ref) return get_wbuffer(ref)
def test_latin1(self, space, api): s = 'abcdefg' data = rffi.str2charp(s) w_u = api.PyUnicode_DecodeLatin1(data, len(s), lltype.nullptr(rffi.CCHARP.TO)) assert space.eq_w(w_u, space.wrap(u"abcdefg")) rffi.free_charp(data) uni = u'abcdefg' data = rffi.unicode2wcharp(uni) w_s = api.PyUnicode_EncodeLatin1(data, len(uni), lltype.nullptr(rffi.CCHARP.TO)) assert space.eq_w(space.wrapbytes("abcdefg"), w_s) rffi.free_wcharp(data) ustr = "abcdef" w_ustr = space.wrap(ustr.decode("ascii")) result = api.PyUnicode_AsLatin1String(w_ustr) assert space.eq_w(space.wrapbytes(ustr), result)
def test_latin1(self, space, api): s = 'abcdefg' data = rffi.str2charp(s) w_u = api.PyUnicode_DecodeLatin1(data, len(s), lltype.nullptr(rffi.CCHARP.TO)) assert space.eq_w(w_u, space.wrap(u"abcdefg")) rffi.free_charp(data) uni = u'abcdefg' data = rffi.unicode2wcharp(uni) w_s = api.PyUnicode_EncodeLatin1(data, len(uni), lltype.nullptr(rffi.CCHARP.TO)) assert space.eq_w(space.wrap("abcdefg"), w_s) rffi.free_wcharp(data) ustr = "abcdef" w_ustr = space.wrap(ustr.decode("ascii")) result = api.PyUnicode_AsLatin1String(w_ustr) assert space.eq_w(space.wrap(ustr), result)
def test_AS(self, space): word = space.wrap(u'spam') array = rffi.cast(rffi.CWCHARP, PyUnicode_AS_DATA(space, word)) array2 = PyUnicode_AS_UNICODE(space, word) array3 = PyUnicode_AsUnicode(space, word) for (i, char) in enumerate(space.utf8_w(word)): assert array[i] == char assert array2[i] == char assert array3[i] == char with raises_w(space, TypeError): PyUnicode_AsUnicode(space, space.newbytes('spam')) utf_8 = rffi.str2charp('utf-8') encoded = PyUnicode_AsEncodedString(space, space.wrap(u'sp�m'), utf_8, None) assert space.unwrap(encoded) == 'sp\xef\xbf\xbdm' encoded_obj = PyUnicode_AsEncodedObject(space, space.wrap(u'sp�m'), utf_8, None) assert space.eq_w(encoded, encoded_obj) one = space.newint(1) with raises_w(space, TypeError): PyUnicode_AsEncodedString( space, space.newtuple([one, one, one]), None, None) with raises_w(space, TypeError): PyUnicode_AsEncodedString(space, space.wrap(''), None, None) ascii = rffi.str2charp('ascii') replace = rffi.str2charp('replace') encoded = PyUnicode_AsEncodedString(space, space.wrap(u'sp�m'), ascii, replace) assert space.unwrap(encoded) == 'sp?m' rffi.free_charp(utf_8) rffi.free_charp(replace) rffi.free_charp(ascii) buf = rffi.unicode2wcharp(u"12345") PyUnicode_AsWideChar(space, space.wrap(u'longword'), buf, 5) assert rffi.wcharp2unicode(buf) == 'longw' PyUnicode_AsWideChar(space, space.wrap(u'a'), buf, 5) assert rffi.wcharp2unicode(buf) == 'a' rffi.free_wcharp(buf)
def test_AS(self, space, api): word = space.wrap(u'spam') array = rffi.cast(rffi.CWCHARP, api.PyUnicode_AS_DATA(word)) array2 = api.PyUnicode_AS_UNICODE(word) array3 = api.PyUnicode_AsUnicode(word) for (i, char) in enumerate(space.unwrap(word)): assert array[i] == char assert array2[i] == char assert array3[i] == char self.raises(space, api, TypeError, api.PyUnicode_AsUnicode, space.wrap('spam')) utf_8 = rffi.str2charp('utf-8') encoded = api.PyUnicode_AsEncodedString(space.wrap(u'späm'), utf_8, None) assert space.unwrap(encoded) == 'sp\xc3\xa4m' encoded_obj = api.PyUnicode_AsEncodedObject(space.wrap(u'späm'), utf_8, None) assert space.eq_w(encoded, encoded_obj) self.raises(space, api, TypeError, api.PyUnicode_AsEncodedString, space.newtuple([1, 2, 3]), None, None) self.raises(space, api, TypeError, api.PyUnicode_AsEncodedString, space.wrap(''), None, None) ascii = rffi.str2charp('ascii') replace = rffi.str2charp('replace') encoded = api.PyUnicode_AsEncodedString(space.wrap(u'späm'), ascii, replace) assert space.unwrap(encoded) == 'sp?m' rffi.free_charp(utf_8) rffi.free_charp(replace) rffi.free_charp(ascii) buf = rffi.unicode2wcharp(u"12345") api.PyUnicode_AsWideChar(space.wrap(u'longword'), buf, 5) assert rffi.wcharp2unicode(buf) == 'longw' api.PyUnicode_AsWideChar(space.wrap(u'a'), buf, 5) assert rffi.wcharp2unicode(buf) == 'a' rffi.free_wcharp(buf)
def convert_to_regdata(space, w_value, typ): ''' returns CCHARP, int ''' buf = None if typ == rwinreg.REG_DWORD: if space.is_none(w_value) or space.isinstance_w(w_value, space.w_int): if space.is_none(w_value): value = r_uint(0) else: value = space.c_uint_w(w_value) buflen = rffi.sizeof(rwin32.DWORD) buf1 = lltype.malloc(rffi.CArray(rwin32.DWORD), 1, flavor='raw') buf1[0] = value buf = rffi.cast(rffi.CCHARP, buf1) elif typ == rwinreg.REG_SZ or typ == rwinreg.REG_EXPAND_SZ: if space.is_w(w_value, space.w_None): buflen = 1 buf = lltype.malloc(rffi.CCHARP.TO, buflen, flavor='raw') buf[0] = '\0' else: buf = rffi.unicode2wcharp(space.unicode_w(w_value)) buf = rffi.cast(rffi.CCHARP, buf) buflen = (space.len_w(w_value) * 2) + 1 elif typ == rwinreg.REG_MULTI_SZ: if space.is_w(w_value, space.w_None): buflen = 1 buf = lltype.malloc(rffi.CCHARP.TO, buflen, flavor='raw') buf[0] = '\0' elif space.isinstance_w(w_value, space.w_list): strings = [] buflen = 0 # unwrap strings and compute total size w_iter = space.iter(w_value) while True: try: w_item = space.next(w_iter) item = space.unicode_w(w_item) strings.append(item) buflen += 2 * (len(item) + 1) except OperationError as e: if not e.match(space, space.w_StopIteration): raise # re-raise other app-level exceptions break buflen += 2 buf = lltype.malloc(rffi.CCHARP.TO, buflen, flavor='raw') # Now copy data buflen = 0 for string in strings: with rffi.scoped_unicode2wcharp(string) as wchr: c_str = rffi.cast(rffi.CCHARP, wchr) for i in range(len(string) * 2): buf[buflen + i] = c_str[i] buflen += (len(string) + 1) * 2 buf[buflen - 1] = '\0' buf[buflen - 2] = '\0' buflen += 2 buf[buflen - 1] = '\0' buf[buflen - 2] = '\0' else: # REG_BINARY and ALL unknown data types. if space.is_w(w_value, space.w_None): buflen = 0 buf = lltype.malloc(rffi.CCHARP.TO, 1, flavor='raw') buf[0] = '\0' else: try: value = w_value.buffer_w(space, space.BUF_SIMPLE) except BufferInterfaceNotFound: raise oefmt( space.w_TypeError, "Objects of type '%T' can not be used as binary " "registry values", w_value) else: value = value.as_str() buflen = len(value) buf = rffi.str2charp(value) if buf is not None: return rffi.cast(rffi.CCHARP, buf), buflen raise oefmt(space.w_ValueError, "Could not convert the data to the specified type")
def test_encode_utf8(self, space, api): u = rffi.unicode2wcharp(u'späm') w_s = api.PyUnicode_EncodeUTF8(u, 4, None) assert space.unwrap(w_s) == u'späm'.encode('utf-8') rffi.free_wcharp(u)
def handle_unichar_p(self, w_ffitype, w_obj, unicodeval): buf = rffi.unicode2wcharp(unicodeval) self.w_func.to_free.append(rffi.cast(rffi.VOIDP, buf)) addr = rffi.cast(rffi.ULONG, buf) self.argchain.arg(addr)
def test_encode_utf8(self, space): u = rffi.unicode2wcharp(u'sp\x09m') w_s = PyUnicode_EncodeUTF8(space, u, 4, None) assert space.unicode_w(w_s) == u'sp\x09m'.encode('utf-8') rffi.free_wcharp(u)
def as_py_uni(val): py_obj = new_empty_unicode(space, len(val)) set_wbuffer(py_obj, rffi.unicode2wcharp(val)) return py_obj
def test_encode_utf8(self, space): u = rffi.unicode2wcharp(u'sp\x09m') w_b = PyUnicode_EncodeUTF8(space, u, 4, None) assert space.type(w_b) is space.w_bytes assert space.bytes_w(w_b) == u'sp\x09m'.encode('utf-8') rffi.free_wcharp(u)