def strcoll(space, w_s1, w_s2): "string,string -> int. Compares two strings according to the locale." if space.is_true(space.isinstance(w_s1, space.w_str)) and \ space.is_true(space.isinstance(w_s2, space.w_str)): s1, s2 = space.str_w(w_s1), space.str_w(w_s2) s1_c = rffi.str2charp(s1) s2_c = rffi.str2charp(s2) try: return space.wrap(_strcoll(s1_c, s2_c)) finally: rffi.free_charp(s1_c) rffi.free_charp(s2_c) #if not space.is_true(space.isinstance(w_s1, space.w_unicode)) and \ # not space.is_true(space.isinstance(w_s2, space.w_unicode)): # raise OperationError(space.w_ValueError, # space.wrap("strcoll arguments must be strings")) s1, s2 = space.unicode_w(w_s1), space.unicode_w(w_s2) s1_c = rffi.unicode2wcharp(s1) s2_c = rffi.unicode2wcharp(s2) try: result = _wcscoll(s1_c, s2_c) finally: rffi.free_wcharp(s1_c) rffi.free_wcharp(s2_c) return space.wrap(result)
def test_ascii_codec(self, space, api): s = 'abcdefg' data = rffi.str2charp(s) w_u = api.PyUnicode_DecodeASCII(data, len(s), lltype.nullptr(rffi.CCHARP.TO)) assert space.eq_w(w_u, space.wrap(u"abcdefg")) rffi.free_charp(data) s = 'abcd\xFF' data = rffi.str2charp(s) self.raises(space, api, UnicodeDecodeError, api.PyUnicode_DecodeASCII, data, len(s), lltype.nullptr(rffi.CCHARP.TO)) rffi.free_charp(data) uni = u'abcdefg' data = rffi.unicode2wcharp(uni) w_s = api.PyUnicode_EncodeASCII(data, len(uni), lltype.nullptr(rffi.CCHARP.TO)) assert space.eq_w(space.wrap("abcdefg"), w_s) rffi.free_wcharp(data) u = u'äbcdéfg' data = rffi.unicode2wcharp(u) w_s = api.PyUnicode_EncodeASCII(data, len(u), lltype.nullptr(rffi.CCHARP.TO)) self.raises(space, api, UnicodeEncodeError, api.PyUnicode_EncodeASCII, data, len(u), lltype.nullptr(rffi.CCHARP.TO)) rffi.free_wcharp(data)
def test_AS(self, space, api): word = space.wrap(u"spam") array = rffi.cast(rffi.CWCHARP, api.PyUnicode_AS_DATA(word)) array2 = api.PyUnicode_AS_UNICODE(word) array3 = api.PyUnicode_AsUnicode(word) for (i, char) in enumerate(space.unwrap(word)): assert array[i] == char assert array2[i] == char assert array3[i] == char self.raises(space, api, TypeError, api.PyUnicode_AsUnicode, space.wrap("spam")) utf_8 = rffi.str2charp("utf-8") encoded = api.PyUnicode_AsEncodedString(space.wrap(u"späm"), utf_8, None) assert space.unwrap(encoded) == "sp\xc3\xa4m" encoded_obj = api.PyUnicode_AsEncodedObject(space.wrap(u"späm"), utf_8, None) assert space.eq_w(encoded, encoded_obj) self.raises(space, api, TypeError, api.PyUnicode_AsEncodedString, space.newtuple([1, 2, 3]), None, None) self.raises(space, api, TypeError, api.PyUnicode_AsEncodedString, space.wrap(""), None, None) ascii = rffi.str2charp("ascii") replace = rffi.str2charp("replace") encoded = api.PyUnicode_AsEncodedString(space.wrap(u"späm"), ascii, replace) assert space.unwrap(encoded) == "sp?m" rffi.free_charp(utf_8) rffi.free_charp(replace) rffi.free_charp(ascii) buf = rffi.unicode2wcharp(u"12345") api.PyUnicode_AsWideChar(space.wrap(u"longword"), buf, 5) assert rffi.wcharp2unicode(buf) == "longw" api.PyUnicode_AsWideChar(space.wrap(u"a"), buf, 5) assert rffi.wcharp2unicode(buf) == "a" rffi.free_wcharp(buf)
def fill_with_unicode(self, space, w_value): if w_value is None or space.is_w(w_value, space.w_None): self.clear() else: # XXX ucs2 only probably univalue = space.unicode_w(w_value) self.ptr = rffi.cast(roci.oratext, rffi.unicode2wcharp(univalue)) self.size = len(univalue) * 2
def PyUnicode_AS_UNICODE(space, ref): """Return a pointer to the internal Py_UNICODE buffer of the object. ref has to be a PyUnicodeObject (not checked).""" ref_unicode = rffi.cast(PyUnicodeObject, ref) if not ref_unicode.c_buffer: # Copy unicode buffer w_unicode = from_ref(space, ref) u = space.unicode_w(w_unicode) ref_unicode.c_buffer = rffi.unicode2wcharp(u) return ref_unicode.c_buffer
def test_mbcs(self, space, api): if sys.platform != 'win32': py.test.skip("mcbs encoding only exists on Windows") # unfortunately, mbcs is locale-dependent. # This tests works at least on a Western Windows. unichars = u"abc" + unichr(12345) wbuf = rffi.unicode2wcharp(unichars) w_str = api.PyUnicode_EncodeMBCS(wbuf, 4, None) rffi.free_wcharp(wbuf) assert space.type(w_str) is space.w_str assert space.str_w(w_str) == "abc?"
def test_latin1(self, space, api): s = "abcdefg" data = rffi.str2charp(s) w_u = api.PyUnicode_DecodeLatin1(data, len(s), lltype.nullptr(rffi.CCHARP.TO)) assert space.eq_w(w_u, space.wrap(u"abcdefg")) rffi.free_charp(data) uni = u"abcdefg" data = rffi.unicode2wcharp(uni) w_s = api.PyUnicode_EncodeLatin1(data, len(uni), lltype.nullptr(rffi.CCHARP.TO)) assert space.eq_w(space.wrap("abcdefg"), w_s) rffi.free_wcharp(data) ustr = "abcdef" w_ustr = space.wrap(ustr.decode("ascii")) result = api.PyUnicode_AsLatin1String(w_ustr) assert space.eq_w(space.wrap(ustr), result)
def test_latin1(self, space, api): s = 'abcdefg' data = rffi.str2charp(s) w_u = api.PyUnicode_DecodeLatin1(data, len(s), lltype.nullptr(rffi.CCHARP.TO)) assert space.eq_w(w_u, space.wrap(u"abcdefg")) rffi.free_charp(data) uni = u'abcdefg' data = rffi.unicode2wcharp(uni) w_s = api.PyUnicode_EncodeLatin1(data, len(uni), lltype.nullptr(rffi.CCHARP.TO)) assert space.eq_w(space.wrap("abcdefg"), w_s) rffi.free_wcharp(data) ustr = "abcdef" w_ustr = space.wrap(ustr.decode("ascii")) result = api.PyUnicode_AsLatin1String(w_ustr) assert space.eq_w(space.wrap(ustr), result)
def add_char_p_maybe(self, space, argchain, w_arg, w_argtype): """ Automatic conversion from string to char_p. The allocated buffer will be automatically freed after the call. """ w_type = jit.promote(space.type(w_arg)) if w_argtype.is_char_p() and w_type is space.w_str: strval = space.str_w(w_arg) buf = rffi.str2charp(strval) self.to_free.append(rffi.cast(rffi.VOIDP, buf)) addr = rffi.cast(rffi.ULONG, buf) argchain.arg(addr) return True elif w_argtype.is_unichar_p() and (w_type is space.w_str or w_type is space.w_unicode): unicodeval = space.unicode_w(w_arg) buf = rffi.unicode2wcharp(unicodeval) self.to_free.append(rffi.cast(rffi.VOIDP, buf)) addr = rffi.cast(rffi.ULONG, buf) argchain.arg(addr) return True return False
def test_AS(self, space, api): word = space.wrap(u'spam') array = rffi.cast(rffi.CWCHARP, api.PyUnicode_AS_DATA(word)) array2 = api.PyUnicode_AS_UNICODE(word) array3 = api.PyUnicode_AsUnicode(word) for (i, char) in enumerate(space.unwrap(word)): assert array[i] == char assert array2[i] == char assert array3[i] == char self.raises(space, api, TypeError, api.PyUnicode_AsUnicode, space.wrap('spam')) utf_8 = rffi.str2charp('utf-8') encoded = api.PyUnicode_AsEncodedString(space.wrap(u'späm'), utf_8, None) assert space.unwrap(encoded) == 'sp\xc3\xa4m' encoded_obj = api.PyUnicode_AsEncodedObject(space.wrap(u'späm'), utf_8, None) assert space.eq_w(encoded, encoded_obj) self.raises(space, api, TypeError, api.PyUnicode_AsEncodedString, space.newtuple([1, 2, 3]), None, None) self.raises(space, api, TypeError, api.PyUnicode_AsEncodedString, space.wrap(''), None, None) ascii = rffi.str2charp('ascii') replace = rffi.str2charp('replace') encoded = api.PyUnicode_AsEncodedString(space.wrap(u'späm'), ascii, replace) assert space.unwrap(encoded) == 'sp?m' rffi.free_charp(utf_8) rffi.free_charp(replace) rffi.free_charp(ascii) buf = rffi.unicode2wcharp(u"12345") api.PyUnicode_AsWideChar(space.wrap(u'longword'), buf, 5) assert rffi.wcharp2unicode(buf) == 'longw' api.PyUnicode_AsWideChar(space.wrap(u'a'), buf, 5) assert rffi.wcharp2unicode(buf) == 'a' rffi.free_wcharp(buf)
def test_encode_utf8(self, space, api): u = rffi.unicode2wcharp(u'späm') w_s = api.PyUnicode_EncodeUTF8(u, 4, None) assert space.unwrap(w_s) == u'späm'.encode('utf-8') rffi.free_wcharp(u)
def test_encode_utf8(self, space, api): u = rffi.unicode2wcharp(u"späm") w_s = api.PyUnicode_EncodeUTF8(u, 4, None) assert space.unwrap(w_s) == u"späm".encode("utf-8") rffi.free_wcharp(u)
def handle_unichar_p(self, w_ffitype, w_obj, unicodeval): buf = rffi.unicode2wcharp(unicodeval) self.w_func.to_free.append(rffi.cast(rffi.VOIDP, buf)) addr = rffi.cast(rffi.ULONG, buf) self.argchain.arg(addr)