Esempio n. 1
0
def charmap_decode(space, s, errors="strict", w_mapping=None):
    size = len(s)
    # Default to Latin-1
    if space.is_true(space.is_(w_mapping, space.w_None)):
        return latin_1_decode(space, s, errors, space.w_False)

    if (size == 0):
        return space.newtuple([space.wrap(u''), space.wrap(0)])
    
    # fast path for all the stuff in the encodings module
    if space.is_true(space.isinstance(w_mapping, space.w_tuple)):
        mapping_w = space.fixedview(w_mapping)
    else:
        mapping_w = None

    builder = UnicodeBuilder(size)
    inpos = 0
    while (inpos < len(s)):
        #/* Get mapping_w (char ordinal -> integer, Unicode char or None) */
        ch = s[inpos]
        w_x = _extract_from_mapping(space, mapping_w, w_mapping, ch)
        if w_x is not None and _append_unicode(space, builder, w_x):
            inpos += 1
            continue
        state = space.fromcache(CodecState)
        next, inpos = state.decode_error_handler(errors, "charmap",
                   "character maps to <undefined>", s, inpos, inpos+1)
        builder.append(next)
    res = builder.build()
    return space.newtuple([space.wrap(res), space.wrap(size)])
Esempio n. 2
0
class W_UnicodeBuilder(Wrappable):
    def __init__(self, space, size):
        if size < 0:
            self.builder = UnicodeBuilder()
        else:
            self.builder = UnicodeBuilder(size)
        self.done = False

    def _check_done(self, space):
        if self.done:
            raise OperationError(space.w_ValueError, space.wrap("Can't operate on a done builder"))

    @unwrap_spec(size=int)
    def descr__new__(space, w_subtype, size=-1):
        return W_UnicodeBuilder(space, size)

    @unwrap_spec(s=unicode)
    def descr_append(self, space, s):
        self._check_done(space)
        self.builder.append(s)

    @unwrap_spec(s=unicode, start=int, end=int)
    def descr_append_slice(self, space, s, start, end):
        self._check_done(space)
        if not 0 <= start <= end <= len(s):
            raise OperationError(space.w_ValueError, space.wrap("bad start/stop"))
        self.builder.append_slice(s, start, end)

    def descr_build(self, space):
        self._check_done(space)
        w_s = space.wrap(self.builder.build())
        self.done = True
        return w_s
Esempio n. 3
0
def charmap_decode(space, s, errors="strict", w_mapping=None):
    size = len(s)
    # Default to Latin-1
    if space.is_true(space.is_(w_mapping, space.w_None)):
        return latin_1_decode(space, s, errors, space.w_False)

    if (size == 0):
        return space.newtuple([space.wrap(u''), space.wrap(0)])

    # fast path for all the stuff in the encodings module
    if space.is_true(space.isinstance(w_mapping, space.w_tuple)):
        mapping_w = space.fixedview(w_mapping)
    else:
        mapping_w = None

    builder = UnicodeBuilder(size)
    inpos = 0
    while (inpos < len(s)):
        #/* Get mapping_w (char ordinal -> integer, Unicode char or None) */
        ch = s[inpos]
        w_x = _extract_from_mapping(space, mapping_w, w_mapping, ch)
        if w_x is not None and _append_unicode(space, builder, w_x):
            inpos += 1
            continue
        state = space.fromcache(CodecState)
        next, inpos = state.decode_error_handler(
            errors, "charmap", "character maps to <undefined>", s, inpos,
            inpos + 1)
        builder.append(next)
    res = builder.build()
    return space.newtuple([space.wrap(res), space.wrap(size)])
Esempio n. 4
0
def unicode_swapcase__Unicode(space, w_self):
    input = w_self._value
    builder = UnicodeBuilder(len(input))
    for i in range(len(input)):
        unichar = ord(input[i])
        if unicodedb.islower(unichar):
            builder.append(unichr(unicodedb.toupper(unichar)))
        elif unicodedb.isupper(unichar):
            builder.append(unichr(unicodedb.tolower(unichar)))
        else:
            builder.append(input[i])
    return W_UnicodeObject(builder.build())
Esempio n. 5
0
def unicode_capitalize__Unicode(space, w_self):
    input = w_self._value
    if len(input) == 0:
        return W_UnicodeObject.EMPTY
    builder = UnicodeBuilder(len(input))
    builder.append(unichr(unicodedb.toupper(ord(input[0]))))
    for i in range(1, len(input)):
        builder.append(unichr(unicodedb.tolower(ord(input[i]))))
    return W_UnicodeObject(builder.build())
Esempio n. 6
0
def xmlcharrefreplace_errors(space, w_exc):
    check_exception(space, w_exc)
    if space.isinstance_w(w_exc, space.w_UnicodeEncodeError):
        obj = space.realunicode_w(space.getattr(w_exc, space.wrap('object')))
        start = space.int_w(space.getattr(w_exc, space.wrap('start')))
        w_end = space.getattr(w_exc, space.wrap('end'))
        end = space.int_w(w_end)
        builder = UnicodeBuilder()
        pos = start
        while pos < end:
            ch = obj[pos]
            builder.append(u"&#")
            builder.append(unicode(str(ord(ch))))
            builder.append(u";")
            pos += 1
        return space.newtuple([space.wrap(builder.build()), w_end])
    else:
        typename = space.type(w_exc).getname(space)
        raise operationerrfmt(space.w_TypeError,
            "don't know how to handle %s in error callback", typename)
Esempio n. 7
0
        def format(self):
            lgt = len(self.fmt) + 4 * len(self.values_w) + 10
            if do_unicode:
                result = UnicodeBuilder(lgt)
            else:
                result = StringBuilder(lgt)
            self.result = result
            while True:
                # fast path: consume as many characters as possible
                fmt = self.fmt
                i = i0 = self.fmtpos
                while i < len(fmt):
                    if fmt[i] == '%':
                        break
                    i += 1
                else:
                    result.append_slice(fmt, i0, len(fmt))
                    break  # end of 'fmt' string
                result.append_slice(fmt, i0, i)
                self.fmtpos = i + 1

                # interpret the next formatter
                w_value = self.parse_fmt()
                c = self.peekchr()
                self.forward()
                if c == '%':
                    self.std_wp(const('%'))
                    continue
                if w_value is None:
                    w_value = self.nextinputvalue()

                # dispatch on the formatter
                # (this turns into a switch after translation)
                for c1 in FORMATTER_CHARS:
                    if c == c1:
                        # 'c1' is an annotation constant here,
                        # so this getattr() is ok
                        do_fmt = getattr(self, 'fmt_' + c1)
                        do_fmt(w_value)
                        break
                else:
                    self.unknown_fmtchar()

            self.checkconsumed()
            return result.build()
Esempio n. 8
0
def _unicode_join_many_items(space, w_self, list_w, size):
    self = w_self._value
    sb = UnicodeBuilder()
    for i in range(size):
        if self and i != 0:
            sb.append(self)
        w_s = list_w[i]
        if isinstance(w_s, W_UnicodeObject):
            # shortcut for performance
            sb.append(w_s._value)
        else:
            try:
                sb.append(space.unicode_w(w_s))
            except OperationError, e:
                if not e.match(space, space.w_TypeError):
                    raise
                raise operationerrfmt(space.w_TypeError,
                    "sequence item %d: expected string or Unicode", i)
Esempio n. 9
0
def unicode_title__Unicode(space, w_self):
    input = w_self._value
    if len(input) == 0:
        return w_self
    builder = UnicodeBuilder(len(input))

    previous_is_cased = False
    for i in range(len(input)):
        unichar = ord(input[i])
        if previous_is_cased:
            builder.append(unichr(unicodedb.tolower(unichar)))
        else:
            builder.append(unichr(unicodedb.totitle(unichar)))
        previous_is_cased = unicodedb.iscased(unichar)
    return W_UnicodeObject(builder.build())
Esempio n. 10
0
def xmlcharrefreplace_errors(space, w_exc):
    check_exception(space, w_exc)
    if space.isinstance_w(w_exc, space.w_UnicodeEncodeError):
        obj = space.realunicode_w(space.getattr(w_exc, space.wrap("object")))
        start = space.int_w(space.getattr(w_exc, space.wrap("start")))
        w_end = space.getattr(w_exc, space.wrap("end"))
        end = space.int_w(w_end)
        builder = UnicodeBuilder()
        pos = start
        while pos < end:
            ch = obj[pos]
            builder.append(u"&#")
            builder.append(unicode(str(ord(ch))))
            builder.append(u";")
            pos += 1
        return space.newtuple([space.wrap(builder.build()), w_end])
    else:
        typename = space.type(w_exc).getname(space)
        raise operationerrfmt(space.w_TypeError, "don't know how to handle %s in error callback", typename)
Esempio n. 11
0
def _unicode_join_many_items(space, w_self, list_w, size):
    self = w_self._value
    sb = UnicodeBuilder()
    for i in range(size):
        if self and i != 0:
            sb.append(self)
        w_s = list_w[i]
        if isinstance(w_s, W_UnicodeObject):
            # shortcut for performance
            sb.append(w_s._value)
        else:
            try:
                sb.append(space.unicode_w(w_s))
            except OperationError, e:
                if not e.match(space, space.w_TypeError):
                    raise
                raise operationerrfmt(space.w_TypeError, "sequence item %d: expected string or Unicode", i)
Esempio n. 12
0
        def format(self):
            lgt = len(self.fmt) + 4 * len(self.values_w) + 10
            if do_unicode:
                result = UnicodeBuilder(lgt)
            else:
                result = StringBuilder(lgt)
            self.result = result
            while True:
                # fast path: consume as many characters as possible
                fmt = self.fmt
                i = i0 = self.fmtpos
                while i < len(fmt):
                    if fmt[i] == '%':
                        break
                    i += 1
                else:
                    result.append_slice(fmt, i0, len(fmt))
                    break     # end of 'fmt' string
                result.append_slice(fmt, i0, i)
                self.fmtpos = i + 1

                # interpret the next formatter
                w_value = self.parse_fmt()
                c = self.peekchr()
                self.forward()
                if c == '%':
                    self.std_wp(const('%'))
                    continue
                if w_value is None:
                    w_value = self.nextinputvalue()

                # dispatch on the formatter
                # (this turns into a switch after translation)
                for c1 in FORMATTER_CHARS:
                    if c == c1:
                        # 'c1' is an annotation constant here,
                        # so this getattr() is ok
                        do_fmt = getattr(self, 'fmt_' + c1)
                        do_fmt(w_value)
                        break
                else:
                    self.unknown_fmtchar()

            self.checkconsumed()
            return result.build()
Esempio n. 13
0
 def func():
     s = UnicodeBuilder()
     s.append(u'a')
     s.append(u'abc')
     s.append(u'abcdef')
     s.append_slice(u'abc', 1, 2)
     s.append_multiple_char(u'u', 4)
     return s.build()
Esempio n. 14
0
 def func(i):
     if i:
         s = UnicodeBuilder()
     else:
         s = None
     return g(s)
Esempio n. 15
0
def unicode_upper__Unicode(space, w_self):
    input = w_self._value
    builder = UnicodeBuilder(len(input))
    for i in range(len(input)):
        builder.append(unichr(unicodedb.toupper(ord(input[i]))))
    return W_UnicodeObject(builder.build())
Esempio n. 16
0
 def func():
     s = UnicodeBuilder()
     s.append(u"a")
     s.append(u"abc")
     return s.getlength()
Esempio n. 17
0
def backslashreplace_errors(space, w_exc):
    check_exception(space, w_exc)
    if space.isinstance_w(w_exc, space.w_UnicodeEncodeError):
        obj = space.realunicode_w(space.getattr(w_exc, space.wrap('object')))
        start = space.int_w(space.getattr(w_exc, space.wrap('start')))
        w_end = space.getattr(w_exc, space.wrap('end'))
        end = space.int_w(w_end)
        builder = UnicodeBuilder()
        pos = start
        while pos < end:
            oc = ord(obj[pos])
            num = hex(oc)
            if (oc >= 0x10000):
                builder.append(u"\\U")
                zeros = 8
            elif (oc >= 0x100):
                builder.append(u"\\u")
                zeros = 4
            else:
                builder.append(u"\\x")
                zeros = 2
            lnum = len(num)
            nb = zeros + 2 - lnum # num starts with '0x'
            if nb > 0:
                builder.append_multiple_char(u'0', nb)
            builder.append_slice(unicode(num), 2, lnum)
            pos += 1
        return space.newtuple([space.wrap(builder.build()), w_end])
    else:
        typename = space.type(w_exc).getname(space)
        raise operationerrfmt(space.w_TypeError,
            "don't know how to handle %s in error callback", typename)
Esempio n. 18
0
    return _unicode_join_many_items(space, w_self, list_w, size)

@jit.look_inside_iff(lambda space, w_self, list_w, size:
                     jit.loop_unrolling_heuristic(list_w, size))
def _unicode_join_many_items(space, w_self, list_w, size):
    self = w_self._value
    prealloc_size = len(self) * (size - 1)
    for i in range(size):
        try:
            prealloc_size += len(space.unicode_w(list_w[i]))
        except OperationError, e:
            if not e.match(space, space.w_TypeError):
                raise
            raise operationerrfmt(space.w_TypeError,
                        "sequence item %d: expected string or Unicode", i)
    sb = UnicodeBuilder(prealloc_size)
    for i in range(size):
        if self and i != 0:
            sb.append(self)
        w_s = list_w[i]
        sb.append(space.unicode_w(w_s))
    return space.wrap(sb.build())

def hash__Unicode(space, w_uni):
    s = w_uni._value
    if space.config.objspace.std.withrope:
        # be compatible with the special ropes hash
        # XXX no caching
        if len(s) == 0:
            return space.wrap(0)
        x = 0
Esempio n. 19
0
def test_unicode_builder():
    s = UnicodeBuilder()
    s.append(u'a')
    s.append(u'abc')
    s.append_slice(u'abcdef', 1, 2)
    assert s.getlength() == len('aabcb')
    s.append_multiple_char(u'd', 4)
    assert s.build() == 'aabcbdddd'
    assert isinstance(s.build(), unicode)
Esempio n. 20
0
 def func():
     s = UnicodeBuilder()
     s.append(u'a')
     s.append(u'abc')
     s.append(u'abcdef')
     s.append_slice(u'abc', 1, 2)
     s.append_multiple_char(u'u', 4)
     return s.build()
Esempio n. 21
0
 def __init__(self, space, size):
     if size < 0:
         self.builder = UnicodeBuilder()
     else:
         self.builder = UnicodeBuilder(size)
     self.done = False
Esempio n. 22
0
def backslashreplace_errors(space, w_exc):
    check_exception(space, w_exc)
    if space.isinstance_w(w_exc, space.w_UnicodeEncodeError):
        obj = space.realunicode_w(space.getattr(w_exc, space.wrap('object')))
        start = space.int_w(space.getattr(w_exc, space.wrap('start')))
        w_end = space.getattr(w_exc, space.wrap('end'))
        end = space.int_w(w_end)
        builder = UnicodeBuilder()
        pos = start
        while pos < end:
            oc = ord(obj[pos])
            num = hex(oc)
            if (oc >= 0x10000):
                builder.append(u"\\U")
                zeros = 8
            elif (oc >= 0x100):
                builder.append(u"\\u")
                zeros = 4
            else:
                builder.append(u"\\x")
                zeros = 2
            lnum = len(num)
            nb = zeros + 2 - lnum # num starts with '0x'
            if nb > 0:
                builder.append_multiple_char(u'0', nb)
            builder.append_slice(unicode(num), 2, lnum)
            pos += 1
        return space.newtuple([space.wrap(builder.build()), w_end])
    else:
        typename = space.type(w_exc).getname(space, '?')
        raise operationerrfmt(space.w_TypeError,
            "don't know how to handle %s in error callback", typename)
Esempio n. 23
0
def test_unicode_builder():
    s = UnicodeBuilder()
    s.append(u'a')
    s.append(u'abc')
    s.append_slice(u'abcdef', 1, 2)
    assert s.getlength() == len('aabcb')
    s.append_multiple_char(u'd', 4)
    assert s.build() == 'aabcbdddd'
    assert isinstance(s.build(), unicode)
Esempio n. 24
0
 def func():
     s = UnicodeBuilder()
     s.append(u"a")
     s.append(u"abc")
     return s.getlength()
Esempio n. 25
0
 def fn():
     return (compute_unique_id("foo"), compute_unique_id(u"bar"),
             compute_unique_id([1]), compute_unique_id({"foo": 3}),
             compute_unique_id(StringBuilder()),
             compute_unique_id(UnicodeBuilder()))