def fsencode(space, w_uni): from pypy.module._codecs import interp_codecs state = space.fromcache(interp_codecs.CodecState) if _WIN32: uni = space.unicode_w(w_uni) bytes = unicode_encode_mbcs(uni, len(uni), 'strict', errorhandler=encode_error_handler(space), force_replace=False) elif _MACOSX: uni = space.unicode_w(w_uni) bytes = runicode.unicode_encode_utf_8_impl( uni, len(uni), 'surrogateescape', errorhandler=state.encode_error_handler, allow_surrogates=False) elif space.sys.filesystemencoding is None or state.codec_need_encodings: # bootstrap check: if the filesystemencoding isn't initialized # or the filesystem codec is implemented in Python we cannot # use it before the codecs are ready. use the locale codec # instead from pypy.module._codecs.locale import ( unicode_encode_locale_surrogateescape) uni = space.unicode_w(w_uni) if u'\x00' in uni: raise oefmt(space.w_ValueError, "embedded null character") bytes = unicode_encode_locale_surrogateescape( uni, errorhandler=encode_error_handler(space)) else: from pypy.module.sys.interp_encoding import getfilesystemencoding return space.call_method(w_uni, 'encode', getfilesystemencoding(space), space.newtext('surrogateescape')) return space.newbytes(bytes)
def utf_8_encode(space, uni, errors="strict"): if errors is None: errors = 'strict' state = space.fromcache(CodecState) # NB. can't call unicode_encode_utf_8() directly because that's # an @elidable function nowadays. Instead, we need the _impl(). # (The problem is the errorhandler, which calls arbitrary Python.) result = runicode.unicode_encode_utf_8_impl( uni, len(uni), errors, state.encode_error_handler, allow_surrogates=True) return space.newtuple([space.newbytes(result), space.newint(len(uni))])