Example #1
0
def charmap():
    global _charmap
    if _charmap is None:
        f = charmap_file()
        if not os.path.exists(f):
            _charmap = {}
            for i in range(0, sys.maxunicode + 1):
                cat = unicodedata.category(hunichr(i))
                rs = _charmap.setdefault(cat, [])
                if rs and rs[-1][-1] == i - 1:
                    rs[-1][-1] += 1
                else:
                    rs.append([i, i])
            # We explicitly set the mtime to an arbitary value so as to get
            # a stable format for our charmap.
            data = sorted(
                (k, tuple((map(tuple, v)))) for k, v in _charmap.items())

            # Write the Unicode table atomically
            fd, tmpfile = tempfile.mkstemp(dir=tmpdir())
            os.close(fd)
            with GzipFile(tmpfile, 'wb', mtime=1) as o:
                o.write(pickle.dumps(data, pickle.HIGHEST_PROTOCOL))
            os.rename(tmpfile, f)

        with GzipFile(f, 'rb') as i:
            _charmap = dict(pickle.loads(i.read()))
    assert _charmap is not None
    return _charmap
Example #2
0
def charmap():
    global _charmap
    if _charmap is None:
        f = charmap_file()
        if not os.path.exists(f):
            _charmap = {}
            for i in range(0, sys.maxunicode + 1):
                cat = unicodedata.category(hunichr(i))
                rs = _charmap.setdefault(cat, [])
                if rs and rs[-1][-1] == i - 1:
                    rs[-1][-1] += 1
                else:
                    rs.append([i, i])
            # We explicitly set the mtime to an arbitary value so as to get
            # a stable format for our charmap.
            data = sorted(
                (k, tuple((map(tuple, v))))
                for k, v in _charmap.items())

            # Write the Unicode table atomically
            fd, tmpfile = tempfile.mkstemp(dir=tmpdir())
            os.close(fd)
            with GzipFile(tmpfile, 'wb', mtime=1) as o:
                o.write(pickle.dumps(data, pickle.HIGHEST_PROTOCOL))
            os.rename(tmpfile, f)

        with GzipFile(f, 'rb') as i:
            _charmap = dict(pickle.loads(i.read()))
    assert _charmap is not None
    return _charmap
Example #3
0
def charmap():
    global _charmap
    if _charmap is None:
        f = charmap_file()
        if not os.path.exists(f):
            tmp_charmap = {}
            for i in range(0, sys.maxunicode + 1):
                cat = unicodedata.category(hunichr(i))
                rs = tmp_charmap.setdefault(cat, [])
                if rs and rs[-1][-1] == i - 1:
                    rs[-1][-1] += 1
                else:
                    rs.append([i, i])
            # We explicitly set the mtime to an arbitary value so as to get
            # a stable format for our charmap.
            data = sorted((k, tuple((map(tuple, v)))) for k, v in tmp_charmap.items())

            # Write the Unicode table atomically
            fd, tmpfile = tempfile.mkstemp(dir=tmpdir())
            os.close(fd)
            with GzipFile(tmpfile, "wb", mtime=1) as o:
                o.write(pickle.dumps(data, pickle.HIGHEST_PROTOCOL))
            try:
                os.rename(tmpfile, f)
            except FileExistsError:  # pragma: no cover
                # This exception is only raised on Windows, and coverage is
                # measured on Linux.
                pass
        with GzipFile(f, "rb") as i:
            _charmap = dict(pickle.loads(i.read()))
    assert _charmap is not None
    return _charmap
Example #4
0
def charmap():
    """Return a dict that maps a Unicode category, to a tuple of 2-tuples
    covering the codepoint intervals for characters in that category.

    >>> charmap()['Co']
    ((57344, 63743), (983040, 1048573), (1048576, 1114109))
    """
    global _charmap
    # Best-effort caching in the face of missing files and/or unwritable
    # filesystems is fairly simple: check if loaded, else try loading,
    # else calculate and try writing the cache.
    if _charmap is None:
        f = charmap_file()
        try:
            with gzip.GzipFile(f, "rb") as i:
                tmp_charmap = dict(json.loads(i))

        except Exception:
            tmp_charmap = {}
            for i in range(0, sys.maxunicode + 1):
                cat = unicodedata.category(hunichr(i))
                rs = tmp_charmap.setdefault(cat, [])
                if rs and rs[-1][-1] == i - 1:
                    rs[-1][-1] += 1
                else:
                    rs.append([i, i])

            try:
                # Write the Unicode table atomically
                fd, tmpfile = tempfile.mkstemp(dir=tmpdir())
                os.close(fd)
                # Explicitly set the mtime to get reproducible output
                with gzip.GzipFile(tmpfile, "wb", mtime=1) as o:
                    result = json.dumps(sorted(tmp_charmap.items()))
                    o.write(result.encode())

                os.rename(tmpfile, f)
            except Exception:
                pass

        # convert between lists and tuples
        _charmap = {
            k: tuple(tuple(pair) for pair in pairs)
            for k, pairs in tmp_charmap.items()
        }
        # each value is a tuple of 2-tuples (that is, tuples of length 2)
        # and that both elements of that tuple are integers.
        for vs in _charmap.values():
            ints = list(sum(vs, ()))
            assert all([isinstance(x, int) for x in ints])
            assert ints == sorted(ints)
            assert all([len(tup) == 2 for tup in vs])

    assert _charmap is not None
    return _charmap
def charmap():
    """Return a dict that maps a Unicode category, to a tuple of 2-tuples
    covering the codepoint intervals for characters in that category.

    >>> charmap()['Co']
    ((57344, 63743), (983040, 1048573), (1048576, 1114109))
    """
    global _charmap
    # Best-effort caching in the face of missing files and/or unwritable
    # filesystems is fairly simple: check if loaded, else try loading,
    # else calculate and try writing the cache.
    if _charmap is None:
        f = charmap_file()
        try:
            with gzip.GzipFile(f, "rb") as i:
                tmp_charmap = dict(json.loads(i))

        except Exception:
            tmp_charmap = {}
            for i in range(0, sys.maxunicode + 1):
                cat = unicodedata.category(hunichr(i))
                rs = tmp_charmap.setdefault(cat, [])
                if rs and rs[-1][-1] == i - 1:
                    rs[-1][-1] += 1
                else:
                    rs.append([i, i])

            try:
                # Write the Unicode table atomically
                fd, tmpfile = tempfile.mkstemp(dir=tmpdir())
                os.close(fd)
                # Explicitly set the mtime to get reproducible output
                with gzip.GzipFile(tmpfile, "wb", mtime=1) as o:
                    result = json.dumps(sorted(tmp_charmap.items()))
                    o.write(result.encode())

                os.rename(tmpfile, f)
            except Exception:
                pass

        # convert between lists and tuples
        _charmap = {
            k: tuple(tuple(pair) for pair in pairs) for k, pairs in tmp_charmap.items()
        }
        # each value is a tuple of 2-tuples (that is, tuples of length 2)
        # and that both elements of that tuple are integers.
        for vs in _charmap.values():
            ints = list(sum(vs, ()))
            assert all([isinstance(x, int) for x in ints])
            assert ints == sorted(ints)
            assert all([len(tup) == 2 for tup in vs])

    assert _charmap is not None
    return _charmap
Example #6
0
def charmap():
    """Return a dict that maps a Unicode category, to a tuple of 2-tuples
    covering the codepoint intervals for characters in that category.

    >>> charmap()['Co']
    ((57344, 63743), (983040, 1048573), (1048576, 1114109))

    """
    global _charmap
    # Best-effort caching in the face of missing files and/or unwritable
    # filesystems is fairly simple: check if loaded, else try loading,
    # else calculate and try writing the cache.
    if _charmap is None:
        f = charmap_file()
        try:
            with gzip.GzipFile(f, 'rb') as i:
                _charmap = dict(pickle.load(i))

        except Exception:
            tmp_charmap = {}
            for i in range(0, sys.maxunicode + 1):
                cat = unicodedata.category(hunichr(i))
                rs = tmp_charmap.setdefault(cat, [])
                if rs and rs[-1][-1] == i - 1:
                    rs[-1][-1] += 1
                else:
                    rs.append([i, i])
            _charmap = {
                k: tuple(tuple(pair) for pair in pairs)
                for k, pairs in tmp_charmap.items()
            }

            try:
                # Write the Unicode table atomically
                fd, tmpfile = tempfile.mkstemp(dir=tmpdir())
                os.close(fd)
                # Explicitly set the mtime to get reproducible output
                with gzip.GzipFile(tmpfile, 'wb', mtime=1) as o:
                    pickle.dump(sorted(_charmap.items()), o,
                                pickle.HIGHEST_PROTOCOL)
                os.rename(tmpfile, f)
            except Exception:  # pragma: no cover
                pass
    assert _charmap is not None
    return _charmap
Example #7
0
def charmap():
    """Return a dict that maps a Unicode category, to a tuple of 2-tuples
    covering the codepoint intervals for characters in that category.

    >>> charmap()['Co']
    ((57344, 63743), (983040, 1048573), (1048576, 1114109))

    """
    global _charmap
    # Best-effort caching in the face of missing files and/or unwritable
    # filesystems is fairly simple: check if loaded, else try loading,
    # else calculate and try writing the cache.
    if _charmap is None:
        f = charmap_file()
        try:
            with gzip.GzipFile(f, 'rb') as i:
                _charmap = dict(pickle.load(i))

        except Exception:
            tmp_charmap = {}
            for i in range(0, sys.maxunicode + 1):
                cat = unicodedata.category(hunichr(i))
                rs = tmp_charmap.setdefault(cat, [])
                if rs and rs[-1][-1] == i - 1:
                    rs[-1][-1] += 1
                else:
                    rs.append([i, i])
            _charmap = {k: tuple((map(tuple, v)))
                        for k, v in tmp_charmap.items()}

            try:
                # Write the Unicode table atomically
                fd, tmpfile = tempfile.mkstemp(dir=tmpdir())
                os.close(fd)
                # Explicitly set the mtime to get reproducible output
                with gzip.GzipFile(tmpfile, 'wb', mtime=1) as o:
                    pickle.dump(sorted(_charmap.items()), o,
                                pickle.HIGHEST_PROTOCOL)
                os.rename(tmpfile, f)
            except Exception:  # pragma: no cover
                pass
    assert _charmap is not None
    return _charmap
def charmap():
    """Return a dict that maps a Unicode category, to a tuple of 2-tuples
    covering the codepoint intervals for characters in that category.

    >>> charmap()['Co']
    ((57344, 63743), (983040, 1048573), (1048576, 1114109))

    """
    global _charmap
    if _charmap is None:
        f = charmap_file()
        if not os.path.exists(f):
            tmp_charmap = {}
            for i in range(0, sys.maxunicode + 1):
                cat = unicodedata.category(hunichr(i))
                rs = tmp_charmap.setdefault(cat, [])
                if rs and rs[-1][-1] == i - 1:
                    rs[-1][-1] += 1
                else:
                    rs.append([i, i])
            # We explicitly set the mtime to an arbitrary value so as to get
            # a stable format for our charmap.
            data = sorted(
                (k, tuple((map(tuple, v)))) for k, v in tmp_charmap.items())

            # Write the Unicode table atomically
            fd, tmpfile = tempfile.mkstemp(dir=tmpdir())
            os.close(fd)
            with GzipFile(tmpfile, 'wb', mtime=1) as o:
                o.write(pickle.dumps(data, pickle.HIGHEST_PROTOCOL))
            try:
                os.rename(tmpfile, f)
            except FileExistsError:  # pragma: no cover
                # This exception is only raised on Windows, and coverage is
                # measured on Linux.
                pass
        with GzipFile(f, 'rb') as i:
            _charmap = dict(pickle.loads(i.read()))
    assert _charmap is not None
    return _charmap