Beispiel #1
0
def descr__new__(space, w_unicodetype, w_string='', w_encoding=None, w_errors=None):
    # NB. the default value of w_string is really a *wrapped* empty string:
    #     there is gateway magic at work
    from pypy.objspace.std.unicodeobject import W_UnicodeObject
    w_obj = w_string
    w_obj_type = space.type(w_obj)
    
    if space.is_w(w_obj_type, space.w_unicode):
        if (not space.is_w(w_encoding, space.w_None) or
            not space.is_w(w_errors, space.w_None)):
            raise OperationError(space.w_TypeError,
                                 space.wrap('decoding Unicode is not supported'))
        if space.is_w(w_unicodetype, space.w_unicode):
            return w_obj
        w_value = w_obj
    elif (space.is_w(w_encoding, space.w_None) and
          space.is_w(w_errors, space.w_None)):
        if space.is_true(space.isinstance(w_obj, space.w_str)):
            w_value = unicode_from_string(space, w_obj)
        elif space.is_true(space.isinstance(w_obj, space.w_unicode)):
            w_value = w_obj
        else:
            w_value = unicode_from_object(space, w_obj)
    else:
        w_value = unicode_from_encoded_object(space, w_obj, w_encoding, w_errors)
    # help the annotator! also the ._value depends on W_UnicodeObject layout
    assert isinstance(w_value, W_UnicodeObject)
    w_newobj = space.allocate_instance(W_UnicodeObject, w_unicodetype)
    W_UnicodeObject.__init__(w_newobj, w_value._value)
    return w_newobj
Beispiel #2
0
def descr_new_(space, w_unicodetype, w_string, w_encoding=None, w_errors=None):
    # NB. the default value of w_obj is really a *wrapped* empty string:
    #     there is gateway magic at work
    from pypy.objspace.std.unicodeobject import W_UnicodeObject
    w_obj = w_string

    encoding, errors = _get_encoding_and_errors(space, w_encoding, w_errors)
    # convoluted logic for the case when unicode subclass has a __unicode__
    # method, we need to call this method
    is_precisely_unicode = space.is_w(space.type(w_obj), space.w_unicode)
    if (is_precisely_unicode or
        (space.isinstance_w(w_obj, space.w_unicode) and
         space.findattr(w_obj, space.wrap('__unicode__')) is None)):
        if encoding is not None or errors is not None:
            raise OperationError(space.w_TypeError,
                                 space.wrap('decoding Unicode is not supported'))
        w_value = w_obj
        if is_precisely_unicode and space.is_w(w_unicodetype, space.w_unicode):
            return w_value
    else:
        if encoding is None and errors is None:
            w_value = unicode_from_object(space, w_obj)
        else:
            w_value = unicode_from_encoded_object(space, w_obj,
                                                  encoding, errors)
        if space.is_w(w_unicodetype, space.w_unicode):
            return w_value

    assert isinstance(w_value, W_UnicodeObject)
    w_newobj = space.allocate_instance(W_UnicodeObject, w_unicodetype)
    W_UnicodeObject.__init__(w_newobj, w_value._value)
    return w_newobj
Beispiel #3
0
def descr__new__(space, w_unicodetype, w_string='', w_encoding=None, w_errors=None):
    # NB. the default value of w_obj is really a *wrapped* empty string:
    #     there is gateway magic at work
    from pypy.objspace.std.unicodeobject import W_UnicodeObject
    from pypy.objspace.std.ropeunicodeobject import W_RopeUnicodeObject
    w_obj = w_string
    w_obj_type = space.type(w_obj)
    
    encoding, errors = _get_encoding_and_errors(space, w_encoding, w_errors) 
    if space.is_w(w_obj_type, space.w_unicode):
        if encoding is not None or errors is not None:
            raise OperationError(space.w_TypeError,
                                 space.wrap('decoding Unicode is not supported'))
        if space.is_w(w_unicodetype, space.w_unicode):
            return w_obj
        w_value = w_obj
    elif encoding is None and errors is None:
        if space.is_true(space.isinstance(w_obj, space.w_str)):
            w_value = unicode_from_string(space, w_obj)
        elif space.is_true(space.isinstance(w_obj, space.w_unicode)):
            w_value = w_obj
        else:
            w_value = unicode_from_object(space, w_obj)
    else:
        w_value = unicode_from_encoded_object(space, w_obj, encoding, errors)
    if space.config.objspace.std.withropeunicode:
        assert isinstance(w_value, W_RopeUnicodeObject)
        w_newobj = space.allocate_instance(W_RopeUnicodeObject, w_unicodetype)
        W_RopeUnicodeObject.__init__(w_newobj, w_value._node)
        return w_newobj

    assert isinstance(w_value, W_UnicodeObject)
    w_newobj = space.allocate_instance(W_UnicodeObject, w_unicodetype)
    W_UnicodeObject.__init__(w_newobj, w_value._value)
    return w_newobj
Beispiel #4
0
def descr_new_(space, w_unicodetype, w_string='', w_encoding=None, w_errors=None):
    # NB. the default value of w_obj is really a *wrapped* empty string:
    #     there is gateway magic at work
    from pypy.objspace.std.unicodeobject import W_UnicodeObject
    from pypy.objspace.std.ropeunicodeobject import W_RopeUnicodeObject
    w_obj = w_string
    
    encoding, errors = _get_encoding_and_errors(space, w_encoding, w_errors) 
    if space.is_true(space.isinstance(w_obj, space.w_unicode)):
        if encoding is not None or errors is not None:
            raise OperationError(space.w_TypeError,
                                 space.wrap('decoding Unicode is not supported'))
        w_value = w_obj
    else:
        if encoding is None and errors is None:
            if space.is_true(space.isinstance(w_obj, space.w_str)):
                w_value = unicode_from_string(space, w_obj)
            else:
                w_value = unicode_from_object(space, w_obj)
        else:
            w_value = unicode_from_encoded_object(space, w_obj,
                                                  encoding, errors)
        if space.is_w(w_unicodetype, space.w_unicode):
            return w_value

    if space.config.objspace.std.withropeunicode:
        assert isinstance(w_value, W_RopeUnicodeObject)
        w_newobj = space.allocate_instance(W_RopeUnicodeObject, w_unicodetype)
        W_RopeUnicodeObject.__init__(w_newobj, w_value._node)
        return w_newobj

    assert isinstance(w_value, W_UnicodeObject)
    w_newobj = space.allocate_instance(W_UnicodeObject, w_unicodetype)
    W_UnicodeObject.__init__(w_newobj, w_value._value)
    return w_newobj
Beispiel #5
0
 def descr_append_slice(self, space, w_s, start, end):
     w_unicode = W_UnicodeObject.convert_arg_to_w_unicode(space, w_s)
     if not 0 <= start <= end <= w_unicode._len():
         raise oefmt(space.w_ValueError, "bad start/stop")
     byte_start = w_unicode._index_to_byte(start)
     byte_end = w_unicode._index_to_byte(end)
     self.builder.append_slice(w_unicode._utf8, byte_start, byte_end)
Beispiel #6
0
 def descr_append(self, space, w_s):
     if isinstance(w_s, W_UnicodeObject):
         self.builder.append_utf8(w_s._utf8, w_s._len())
     else:
         w_unicode = W_UnicodeObject.convert_arg_to_w_unicode(space, w_s)
         s = space.utf8_w(w_unicode)
         self.builder.append(s)
Beispiel #7
0
def unicode_from_string(space, w_str):
    # this is a performance and bootstrapping hack
    encoding = getdefaultencoding(space)
    from pypy.objspace.std.unicodeobject import W_UnicodeObject
    if encoding != 'ascii':
        return unicode_from_encoded_object(space, w_str, encoding, "strict")
    s = space.str_w(w_str)
    try:
        return W_UnicodeObject(s.decode("ascii"))
    except UnicodeDecodeError:
        # raising UnicodeDecodeError is messy, "please crash for me"
        return unicode_from_encoded_object(space, w_str, "ascii", "strict")
Beispiel #8
0
 def descr_next(self, space):
     from pypy.objspace.std.unicodeobject import W_UnicodeObject
     w_seq = self.w_seq
     if w_seq is None:
         raise OperationError(space.w_StopIteration, space.w_None)
     assert isinstance(w_seq, W_UnicodeObject)
     index = self.index
     if index == w_seq._length:
         self.w_seq = None
         raise OperationError(space.w_StopIteration, space.w_None)
     start = self.byteindex
     end = w_seq.next_codepoint_pos_dont_look_inside(start)
     w_res = W_UnicodeObject(w_seq._utf8[start:end], 1)
     self.byteindex = end
     self.index += 1
     return w_res
Beispiel #9
0
 def newunicode(self, uni):
     assert uni is not None
     assert isinstance(uni, unicode)
     return W_UnicodeObject(uni)
Beispiel #10
0
def wrapunicode(space, uni):
    from pypy.objspace.std.unicodeobject import W_UnicodeObject
    from pypy.objspace.std.ropeunicodeobject import wrapunicode
    if space.config.objspace.std.withropeunicode:
        return wrapunicode(space, uni)
    return W_UnicodeObject(uni)
Beispiel #11
0
 def newutf8(self, utf8s, length):
     assert utf8s is not None
     assert isinstance(utf8s, str)
     return W_UnicodeObject(utf8s, length)
Beispiel #12
0
def wrapunicode(space, uni):
    from pypy.objspace.std.unicodeobject import W_UnicodeObject
    return W_UnicodeObject(uni)
Beispiel #13
0
 def _wrap_object(self, space, obj):
     result = rffi.cast(self.c_type, obj)
     u = rffi.cast(lltype.UniChar, result)
     return W_UnicodeObject(u.encode('utf8'), 1)