def descr__new__(space, w_unicodetype, w_string='', w_encoding=None, w_errors=None): # NB. the default value of w_string is really a *wrapped* empty string: # there is gateway magic at work from pypy.objspace.std.unicodeobject import W_UnicodeObject w_obj = w_string w_obj_type = space.type(w_obj) if space.is_w(w_obj_type, space.w_unicode): if (not space.is_w(w_encoding, space.w_None) or not space.is_w(w_errors, space.w_None)): raise OperationError(space.w_TypeError, space.wrap('decoding Unicode is not supported')) if space.is_w(w_unicodetype, space.w_unicode): return w_obj w_value = w_obj elif (space.is_w(w_encoding, space.w_None) and space.is_w(w_errors, space.w_None)): if space.is_true(space.isinstance(w_obj, space.w_str)): w_value = unicode_from_string(space, w_obj) elif space.is_true(space.isinstance(w_obj, space.w_unicode)): w_value = w_obj else: w_value = unicode_from_object(space, w_obj) else: w_value = unicode_from_encoded_object(space, w_obj, w_encoding, w_errors) # help the annotator! also the ._value depends on W_UnicodeObject layout assert isinstance(w_value, W_UnicodeObject) w_newobj = space.allocate_instance(W_UnicodeObject, w_unicodetype) W_UnicodeObject.__init__(w_newobj, w_value._value) return w_newobj
def descr_new_(space, w_unicodetype, w_string, w_encoding=None, w_errors=None): # NB. the default value of w_obj is really a *wrapped* empty string: # there is gateway magic at work from pypy.objspace.std.unicodeobject import W_UnicodeObject w_obj = w_string encoding, errors = _get_encoding_and_errors(space, w_encoding, w_errors) # convoluted logic for the case when unicode subclass has a __unicode__ # method, we need to call this method is_precisely_unicode = space.is_w(space.type(w_obj), space.w_unicode) if (is_precisely_unicode or (space.isinstance_w(w_obj, space.w_unicode) and space.findattr(w_obj, space.wrap('__unicode__')) is None)): if encoding is not None or errors is not None: raise OperationError(space.w_TypeError, space.wrap('decoding Unicode is not supported')) w_value = w_obj if is_precisely_unicode and space.is_w(w_unicodetype, space.w_unicode): return w_value else: if encoding is None and errors is None: w_value = unicode_from_object(space, w_obj) else: w_value = unicode_from_encoded_object(space, w_obj, encoding, errors) if space.is_w(w_unicodetype, space.w_unicode): return w_value assert isinstance(w_value, W_UnicodeObject) w_newobj = space.allocate_instance(W_UnicodeObject, w_unicodetype) W_UnicodeObject.__init__(w_newobj, w_value._value) return w_newobj
def descr__new__(space, w_unicodetype, w_string='', w_encoding=None, w_errors=None): # NB. the default value of w_obj is really a *wrapped* empty string: # there is gateway magic at work from pypy.objspace.std.unicodeobject import W_UnicodeObject from pypy.objspace.std.ropeunicodeobject import W_RopeUnicodeObject w_obj = w_string w_obj_type = space.type(w_obj) encoding, errors = _get_encoding_and_errors(space, w_encoding, w_errors) if space.is_w(w_obj_type, space.w_unicode): if encoding is not None or errors is not None: raise OperationError(space.w_TypeError, space.wrap('decoding Unicode is not supported')) if space.is_w(w_unicodetype, space.w_unicode): return w_obj w_value = w_obj elif encoding is None and errors is None: if space.is_true(space.isinstance(w_obj, space.w_str)): w_value = unicode_from_string(space, w_obj) elif space.is_true(space.isinstance(w_obj, space.w_unicode)): w_value = w_obj else: w_value = unicode_from_object(space, w_obj) else: w_value = unicode_from_encoded_object(space, w_obj, encoding, errors) if space.config.objspace.std.withropeunicode: assert isinstance(w_value, W_RopeUnicodeObject) w_newobj = space.allocate_instance(W_RopeUnicodeObject, w_unicodetype) W_RopeUnicodeObject.__init__(w_newobj, w_value._node) return w_newobj assert isinstance(w_value, W_UnicodeObject) w_newobj = space.allocate_instance(W_UnicodeObject, w_unicodetype) W_UnicodeObject.__init__(w_newobj, w_value._value) return w_newobj
def descr_new_(space, w_unicodetype, w_string='', w_encoding=None, w_errors=None): # NB. the default value of w_obj is really a *wrapped* empty string: # there is gateway magic at work from pypy.objspace.std.unicodeobject import W_UnicodeObject from pypy.objspace.std.ropeunicodeobject import W_RopeUnicodeObject w_obj = w_string encoding, errors = _get_encoding_and_errors(space, w_encoding, w_errors) if space.is_true(space.isinstance(w_obj, space.w_unicode)): if encoding is not None or errors is not None: raise OperationError(space.w_TypeError, space.wrap('decoding Unicode is not supported')) w_value = w_obj else: if encoding is None and errors is None: if space.is_true(space.isinstance(w_obj, space.w_str)): w_value = unicode_from_string(space, w_obj) else: w_value = unicode_from_object(space, w_obj) else: w_value = unicode_from_encoded_object(space, w_obj, encoding, errors) if space.is_w(w_unicodetype, space.w_unicode): return w_value if space.config.objspace.std.withropeunicode: assert isinstance(w_value, W_RopeUnicodeObject) w_newobj = space.allocate_instance(W_RopeUnicodeObject, w_unicodetype) W_RopeUnicodeObject.__init__(w_newobj, w_value._node) return w_newobj assert isinstance(w_value, W_UnicodeObject) w_newobj = space.allocate_instance(W_UnicodeObject, w_unicodetype) W_UnicodeObject.__init__(w_newobj, w_value._value) return w_newobj
def descr_append_slice(self, space, w_s, start, end): w_unicode = W_UnicodeObject.convert_arg_to_w_unicode(space, w_s) if not 0 <= start <= end <= w_unicode._len(): raise oefmt(space.w_ValueError, "bad start/stop") byte_start = w_unicode._index_to_byte(start) byte_end = w_unicode._index_to_byte(end) self.builder.append_slice(w_unicode._utf8, byte_start, byte_end)
def descr_append(self, space, w_s): if isinstance(w_s, W_UnicodeObject): self.builder.append_utf8(w_s._utf8, w_s._len()) else: w_unicode = W_UnicodeObject.convert_arg_to_w_unicode(space, w_s) s = space.utf8_w(w_unicode) self.builder.append(s)
def unicode_from_string(space, w_str): # this is a performance and bootstrapping hack encoding = getdefaultencoding(space) from pypy.objspace.std.unicodeobject import W_UnicodeObject if encoding != 'ascii': return unicode_from_encoded_object(space, w_str, encoding, "strict") s = space.str_w(w_str) try: return W_UnicodeObject(s.decode("ascii")) except UnicodeDecodeError: # raising UnicodeDecodeError is messy, "please crash for me" return unicode_from_encoded_object(space, w_str, "ascii", "strict")
def descr_next(self, space): from pypy.objspace.std.unicodeobject import W_UnicodeObject w_seq = self.w_seq if w_seq is None: raise OperationError(space.w_StopIteration, space.w_None) assert isinstance(w_seq, W_UnicodeObject) index = self.index if index == w_seq._length: self.w_seq = None raise OperationError(space.w_StopIteration, space.w_None) start = self.byteindex end = w_seq.next_codepoint_pos_dont_look_inside(start) w_res = W_UnicodeObject(w_seq._utf8[start:end], 1) self.byteindex = end self.index += 1 return w_res
def newunicode(self, uni): assert uni is not None assert isinstance(uni, unicode) return W_UnicodeObject(uni)
def wrapunicode(space, uni): from pypy.objspace.std.unicodeobject import W_UnicodeObject from pypy.objspace.std.ropeunicodeobject import wrapunicode if space.config.objspace.std.withropeunicode: return wrapunicode(space, uni) return W_UnicodeObject(uni)
def newutf8(self, utf8s, length): assert utf8s is not None assert isinstance(utf8s, str) return W_UnicodeObject(utf8s, length)
def wrapunicode(space, uni): from pypy.objspace.std.unicodeobject import W_UnicodeObject return W_UnicodeObject(uni)
def _wrap_object(self, space, obj): result = rffi.cast(self.c_type, obj) u = rffi.cast(lltype.UniChar, result) return W_UnicodeObject(u.encode('utf8'), 1)