def filename_to_unicode(opsysstring): """Converts a str representing a filename from GLib to unicode. :param str opsysstring: a string in the (GLib) encoding for filenames :returns: the converted filename :rtype: unicode >>> from gi.repository import GLib >>> filename_to_unicode('/ascii/only/path') u'/ascii/only/path' >>> filename_to_unicode(None) is None True This is just a more Pythonic wrapper around g_filename_to_utf8() for now. If there are compatibility reasons to change it, fallbacks involving sys.getfilesystemencoding exist. """ if opsysstring is None: return None # On Windows, they're always UTF-8 regardless. if sys.platform == "win32": return opsysstring.decode("utf-8") # Other systems are dependent in opaque ways on the environment. if not isinstance(opsysstring, str): raise TypeError("Argument must be bytes") ustring = GLib.filename_to_utf8(opsysstring, -1, 0, 0) if ustring is None: raise UnicodeDecodeError( "GLib failed to convert %r to a UTF-8 string. " "Consider setting G_FILENAME_ENCODING if your file system's " "filename encoding scheme is not UTF-8." % (opsysstring, )) return ustring.decode("utf-8")
def filename_to_unicode(opsysstring): """Converts a str representing a filename from GLib to unicode. :param str opsysstring: a string in the (GLib) encoding for filenames :returns: the converted filename :rtype: unicode >>> from gi.repository import GLib >>> filename_to_unicode('/ascii/only/path') u'/ascii/only/path' >>> filename_to_unicode(None) is None True This is just a more Pythonic wrapper around g_filename_to_utf8() for now. If there are compatibility reasons to change it, fallbacks involving sys.getfilesystemencoding exist. """ if opsysstring is None: return None # On Windows, they're always UTF-8 regardless. if sys.platform == "win32": return opsysstring.decode("utf-8") # Other systems are dependent in opaque ways on the environment. if not isinstance(opsysstring, str): raise TypeError("Argument must be bytes") ustring = GLib.filename_to_utf8(opsysstring, -1, 0, 0) if ustring is None: raise UnicodeDecodeError( "GLib failed to convert %r to a UTF-8 string. " "Consider setting G_FILENAME_ENCODING if your file system's " "filename encoding scheme is not UTF-8." % (opsysstring,) ) return ustring.decode("utf-8")
def filename_to_unicode(opsysstring): """Converts a str representing a filename from GLib to unicode. :param bytes opsysstring: a string in the (GLib) encoding for filenames :returns: the converted filename :rtype: str >>> filename_to_unicode(b'/ascii/only/path') == u'/ascii/only/path' True >>> filename_to_unicode(None) is None True This is just a more Pythonic wrapper around g_filename_to_utf8() for now. If there are compatibility reasons to change it, fallbacks involving sys.getfilesystemencoding exist. """ if opsysstring is None: return None # Let's assume that if the string is already unicode under Python 3, # then it's already correct. if PY3 and isinstance(opsysstring, str): return opsysstring # On Windows, they're always UTF-8 regardless. if sys.platform == "win32": return opsysstring.decode("utf-8") # Other systems are dependent in opaque ways on the environment. if not isinstance(opsysstring, bytes): raise TypeError("Argument must be bytes") # This function's annotation seems to vary quite a bit. # See https://github.com/mypaint/mypaint/issues/634 try: ustring, _, _ = GLib.filename_to_utf8(opsysstring, -1) except TypeError: ustring = GLib.filename_to_utf8(opsysstring, -1, 0, 0) if ustring is None: raise UnicodeDecodeError( "GLib failed to convert %r to a UTF-8 string. " "Consider setting G_FILENAME_ENCODING if your file system's " "filename encoding scheme is not UTF-8." % (opsysstring, )) if PY2: ustring = ustring.decode("utf-8") return ustring
def filename_to_unicode(opsysstring): """Converts a str representing a filename from GLib to unicode. :param bytes opsysstring: a string in the (GLib) encoding for filenames :returns: the converted filename :rtype: unicode/str >>> filename_to_unicode(b'/ascii/only/path') == u'/ascii/only/path' True >>> filename_to_unicode(None) is None True This is just a more Pythonic wrapper around g_filename_to_utf8() for now, which works around a ton of weird bugs and corner cases with the typelib annotations for it. It is intended for cleaning up the output of certain GLib functions. Currently, if you're using Python 3 and the input is already unicode then this function assumes that GLib+GI have already done the work, and that the unicode string was correct. You get the same string back. For Python 2, this accepts only "bytes" string input. If we find a corner case where GLib functions return degenerate unicode, we can adapt it for that case (those funcs need their own wrappers though). """ if opsysstring is None: return None # Let's assume that if the string is already unicode under Python 3, # then it's already correct. if PY3 and isinstance(opsysstring, unicode): return opsysstring # On Windows, they're always UTF-8 regardless. # That's what the docs say. if sys.platform == "win32": return opsysstring.decode("utf-8") # Other systems are dependent in opaque ways on the environment. if not isinstance(opsysstring, bytes): raise TypeError("Argument must be bytes") opsysstring_degenerate_unicode = opsysstring.decode("latin_1") # This function's annotation seems to vary quite a bit. # See https://github.com/mypaint/mypaint/issues/634 ustring = None # The sensible, modern case! Byte strings in, unicode strings # out hopefully, and the C func's arguments are correctly # [out]-annotated. It works like this as of... # # - Python 2.7.14 OR Python 3.6.4 # - gobject-introspection 1.54.1 # - glib2 2.54.3 # - Debian buster/sid amd64 OR MSYS2 MINGW64 on Windows 7 64-bit. if ustring is None: for s in [opsysstring, opsysstring_degenerate_unicode]: try: ustring, _bytes_read, _bytes_written \ = GLib.filename_to_utf8(s, -1) break except TypeError: pass # Try an older, bad typelib's form. # This is the case for Ubuntu 14.04 LTS "trusty" (which is ancient, # but that's what our current Travis CI solution uses). For the # record, this weirdness is applicable to the following combination: # # - Python 2.7.6 OR Python 3.4.3 # - gobject-introspection 1.40.0 # - glib2 2.40.2 # - Ubuntu 14.04.5 LTS amd64. # # Of note: the Py3 wrappings are weird in Trusty. Other GLib funcs # return bytes, but GLib.filename_to_utf8() expects those degenerate # unicode strings. Byte strings will not do. Unusual tastes. if ustring is None: for s in [opsysstring, opsysstring_degenerate_unicode]: try: ustring = GLib.filename_to_utf8(s, -1, 0, 0) break except TypeError: pass # Congratulations! You found a new bug. if ustring is None: raise UnicodeDecodeError( "New or unknown bugs in g_filename_to_utf8()'s typelib. " "Failed to convert %r. Please tell the developers about this." % (opsysstring,) ) # Python2's wrappers tended to do this. # I suspect it's reasonable to convert for all, now that we're # reasonably sure that the data would be utf-8. if isinstance(ustring, bytes): ustring = ustring.decode("utf-8") return ustring
from gi.repository import GLib +logger = logging.getLogger(__name__) + ## File path getter functions @@ -35,7 +36,6 @@ def filename_to_unicode(opsysstring): :returns: the converted filename :rtype: unicode - >>> from gi.repository import GLib >>> filename_to_unicode('/ascii/only/path') u'/ascii/only/path' >>> filename_to_unicode(None) is None @@ -54,7 +54,12 @@ def filename_to_unicode(opsysstring): # Other systems are dependent in opaque ways on the environment. if not isinstance(opsysstring, str): raise TypeError("Argument must be bytes") - ustring = GLib.filename_to_utf8(opsysstring, -1, 0, 0) + # This function's annotation seems to vary quite a bit. + # See https://github.com/mypaint/mypaint/issues/634 + try: + ustring, _, _ = GLib.filename_to_utf8(opsysstring, -1) + except TypeError: + ustring = GLib.filename_to_utf8(opsysstring, -1, 0, 0) if ustring is None: raise UnicodeDecodeError( "GLib failed to convert %r to a UTF-8 string. "
def filename_to_unicode(opsysstring): """Converts a str representing a filename from GLib to unicode. :param bytes opsysstring: a string in the (GLib) encoding for filenames :returns: the converted filename :rtype: unicode/str >>> filename_to_unicode(b'/ascii/only/path') == u'/ascii/only/path' True >>> filename_to_unicode(None) is None True This is just a more Pythonic wrapper around g_filename_to_utf8() for now, which works around a ton of weird bugs and corner cases with the typelib annotations for it. It is intended for cleaning up the output of certain GLib functions. Currently, if you're using Python 3 and the input is already unicode then this function assumes that GLib+GI have already done the work, and that the unicode string was correct. You get the same string back. For Python 2, this accepts only "bytes" string input. If we find a corner case where GLib functions return degenerate unicode, we can adapt it for that case (those funcs need their own wrappers though). """ if opsysstring is None: return None # Let's assume that if the string is already unicode under Python 3, # then it's already correct. if PY3 and isinstance(opsysstring, unicode): return opsysstring # On Windows, they're always UTF-8 regardless. # That's what the docs say. if sys.platform == "win32": return opsysstring.decode("utf-8") # Other systems are dependent in opaque ways on the environment. if not isinstance(opsysstring, bytes): raise TypeError("Argument must be bytes") opsysstring_degenerate_unicode = opsysstring.decode("latin_1") # This function's annotation seems to vary quite a bit. # See https://github.com/mypaint/mypaint/issues/634 ustring = None # The sensible, modern case! Byte strings in, unicode strings # out hopefully, and the C func's arguments are correctly # [out]-annotated. It works like this as of... # # - Python 2.7.14 OR Python 3.6.4 # - gobject-introspection 1.54.1 # - glib2 2.54.3 # - Debian buster/sid amd64 OR MSYS2 MINGW64 on Windows 7 64-bit. if ustring is None: for s in [opsysstring, opsysstring_degenerate_unicode]: try: ustring, _bytes_read, _bytes_written \ = GLib.filename_to_utf8(s, -1) break except TypeError: pass # Try an older, bad typelib's form. # This is the case for Ubuntu 14.04 LTS "trusty" (which is ancient, # but that's what our current Travis CI solution uses). For the # record, this weirdness is applicable to the following combination: # # - Python 2.7.6 OR Python 3.4.3 # - gobject-introspection 1.40.0 # - glib2 2.40.2 # - Ubuntu 14.04.5 LTS amd64. # # Of note: the Py3 wrappings are weird in Trusty. Other GLib funcs # return bytes, but GLib.filename_to_utf8() expects those degenerate # unicode strings. Byte strings will not do. Unusual tastes. if ustring is None: for s in [opsysstring, opsysstring_degenerate_unicode]: try: ustring = GLib.filename_to_utf8(s, -1, 0, 0) break except TypeError: pass # Congratulations! You found a new bug. if ustring is None: raise UnicodeDecodeError( "New or unknown bugs in g_filename_to_utf8()'s typelib. " "Failed to convert %r. Please tell the developers about this." % (opsysstring, )) # Python2's wrappers tended to do this. # I suspect it's reasonable to convert for all, now that we're # reasonably sure that the data would be utf-8. if isinstance(ustring, bytes): ustring = ustring.decode("utf-8") return ustring