Esempio n. 1
0
    def utf8_encode_mbcs(s, errors, errorhandler, force_replace=True):
        # TODO: do the encoding without decoding utf8 -> unicode
        uni = s.decode('utf8')
        lgt = len(uni)
        if not force_replace and errors not in ('strict', 'replace'):
            msg = "mbcs encoding does not support errors='%s'" % errors
            errorhandler('strict', 'mbcs', msg, s, 0, 0)

        if lgt == 0:
            return ''

        if force_replace or errors == 'replace':
            flags = 0
            used_default_p = lltype.nullptr(BOOLP.TO)
        else:
            # strict
            flags = rwin32.WC_NO_BEST_FIT_CHARS
            used_default_p = lltype.malloc(BOOLP.TO, 1, flavor='raw')
            used_default_p[0] = rffi.cast(rwin32.BOOL, False)

        try:
            with rffi.scoped_nonmoving_unicodebuffer(uni) as dataptr:
                # first get the size of the result
                mbcssize = WideCharToMultiByte(CP_ACP, flags, dataptr, lgt,
                                               None, 0, None, used_default_p)
                if mbcssize == 0:
                    raise rwin32.lastSavedWindowsError()
                # If we used a default char, then we failed!
                if (used_default_p
                        and rffi.cast(lltype.Bool, used_default_p[0])):
                    errorhandler('strict', 'mbcs', "invalid character", s, 0,
                                 0)

                with rffi.scoped_alloc_buffer(mbcssize) as buf:
                    # do the conversion
                    if WideCharToMultiByte(CP_ACP, flags, dataptr, lgt,
                                           buf.raw, mbcssize, None,
                                           used_default_p) == 0:
                        raise rwin32.lastSavedWindowsError()
                    if (used_default_p
                            and rffi.cast(lltype.Bool, used_default_p[0])):
                        errorhandler('strict', 'mbcs', "invalid character", s,
                                     0, 0)
                    result = buf.str(mbcssize)
                    assert result is not None
                    return result
        finally:
            if used_default_p:
                lltype.free(used_default_p, flavor='raw')
Esempio n. 2
0
def utf8_encode_code_page(cp, s, errors, errorhandler):
    """Encode a utf8 string s using code page cp and the given
    errors/errorhandler.
    Returns a encoded byte string
    """

    name = _code_page_name(cp)
    lgt = len(s)

    if lgt == 0:
        return ''
    flags = _encode_code_page_flags(cp, errors)
    if cp in (rwin32.CP_UTF8, rwin32.CP_UTF7):
        used_default_p = lltype.nullptr(BOOLP.TO)
    else:
        used_default_p = lltype.malloc(BOOLP.TO, 1, flavor='raw')
    # Encode one codpoint at a time to allow the errorhandlers to do
    # their thing
    chars = lltype.malloc(rffi.CWCHARP.TO, 2, flavor='raw')
    res = StringBuilder(lgt)
    try:
        with rffi.scoped_alloc_buffer(4) as buf:
            pos = 0
            # TODO: update s if obj != s is returned from an errorhandler
            for uni in Utf8StringIterator(s):
                if used_default_p:
                    used_default_p[0] = rffi.cast(rwin32.BOOL, False)
                if uni < 0x10000:
                    chars[0] = rffi.cast(lltype.UniChar, uni)
                    charsize = 1
                else:
                    chars[0] = Py_UNICODE_HIGH_SURROGATE(uni)
                    chars[1] = Py_UNICODE_LOW_SURROGATE(uni)
                    charsize = 2
                    # first get the size of the result
                outsize = WideCharToMultiByte(cp, flags, chars, charsize,
                                              buf.raw, 4, None, used_default_p)

                if outsize > 0:
                    if not (used_default_p and used_default_p[0]):
                        r = buf.str(outsize)
                        assert r is not None
                        res.append(r)
                        pos += 1
                        continue
                elif rwin32.GetLastError_saved(
                ) != rwin32.ERROR_NO_UNICODE_TRANSLATION:
                    raise rwin32.lastSavedWindowsError()
                # If we used a default char, then we failed!
                r, pos, retype, obj = errorhandler(errors, name,
                                                   "invalid character", s, pos,
                                                   pos + 1)
                res.append(r)
                pos += 1
    finally:
        lltype.free(chars, flavor='raw')
        if used_default_p:
            lltype.free(used_default_p, flavor='raw')
    return res.build()
Esempio n. 3
0
def _unibuf_to_utf8(dataptr, insize):
    """Encode the widechar unicode buffer u to utf8
    Should never error, since the buffer comes from a call to
    MultiByteToWideChar
    """
    flags = 0
    cp = rwin32.CP_UTF8
    used_default_p = lltype.nullptr(BOOLP.TO)
    # first get the size of the result
    outsize = WideCharToMultiByte(cp, flags, dataptr, insize, None, 0, None,
                                  used_default_p)
    if outsize == 0:
        raise rwin32.lastSavedWindowsError()
    with rffi.scoped_alloc_buffer(outsize) as buf:
        # do the conversion
        if WideCharToMultiByte(cp, flags, dataptr, insize, buf.raw, outsize,
                               None, used_default_p) == 0:
            raise rwin32.lastSavedWindowsError()
        result = buf.str(outsize)
        assert result is not None
        return result