def charmap(): global _charmap if _charmap is None: f = charmap_file() if not os.path.exists(f): _charmap = {} for i in range(0, sys.maxunicode + 1): cat = unicodedata.category(hunichr(i)) rs = _charmap.setdefault(cat, []) if rs and rs[-1][-1] == i - 1: rs[-1][-1] += 1 else: rs.append([i, i]) # We explicitly set the mtime to an arbitary value so as to get # a stable format for our charmap. data = sorted( (k, tuple((map(tuple, v)))) for k, v in _charmap.items()) # Write the Unicode table atomically fd, tmpfile = tempfile.mkstemp(dir=tmpdir()) os.close(fd) with GzipFile(tmpfile, 'wb', mtime=1) as o: o.write(pickle.dumps(data, pickle.HIGHEST_PROTOCOL)) os.rename(tmpfile, f) with GzipFile(f, 'rb') as i: _charmap = dict(pickle.loads(i.read())) assert _charmap is not None return _charmap
def charmap(): global _charmap if _charmap is None: f = charmap_file() if not os.path.exists(f): tmp_charmap = {} for i in range(0, sys.maxunicode + 1): cat = unicodedata.category(hunichr(i)) rs = tmp_charmap.setdefault(cat, []) if rs and rs[-1][-1] == i - 1: rs[-1][-1] += 1 else: rs.append([i, i]) # We explicitly set the mtime to an arbitary value so as to get # a stable format for our charmap. data = sorted((k, tuple((map(tuple, v)))) for k, v in tmp_charmap.items()) # Write the Unicode table atomically fd, tmpfile = tempfile.mkstemp(dir=tmpdir()) os.close(fd) with GzipFile(tmpfile, "wb", mtime=1) as o: o.write(pickle.dumps(data, pickle.HIGHEST_PROTOCOL)) try: os.rename(tmpfile, f) except FileExistsError: # pragma: no cover # This exception is only raised on Windows, and coverage is # measured on Linux. pass with GzipFile(f, "rb") as i: _charmap = dict(pickle.loads(i.read())) assert _charmap is not None return _charmap
def charmap(): """Return a dict that maps a Unicode category, to a tuple of 2-tuples covering the codepoint intervals for characters in that category. >>> charmap()['Co'] ((57344, 63743), (983040, 1048573), (1048576, 1114109)) """ global _charmap # Best-effort caching in the face of missing files and/or unwritable # filesystems is fairly simple: check if loaded, else try loading, # else calculate and try writing the cache. if _charmap is None: f = charmap_file() try: with gzip.GzipFile(f, "rb") as i: tmp_charmap = dict(json.loads(i)) except Exception: tmp_charmap = {} for i in range(0, sys.maxunicode + 1): cat = unicodedata.category(hunichr(i)) rs = tmp_charmap.setdefault(cat, []) if rs and rs[-1][-1] == i - 1: rs[-1][-1] += 1 else: rs.append([i, i]) try: # Write the Unicode table atomically fd, tmpfile = tempfile.mkstemp(dir=tmpdir()) os.close(fd) # Explicitly set the mtime to get reproducible output with gzip.GzipFile(tmpfile, "wb", mtime=1) as o: result = json.dumps(sorted(tmp_charmap.items())) o.write(result.encode()) os.rename(tmpfile, f) except Exception: pass # convert between lists and tuples _charmap = { k: tuple(tuple(pair) for pair in pairs) for k, pairs in tmp_charmap.items() } # each value is a tuple of 2-tuples (that is, tuples of length 2) # and that both elements of that tuple are integers. for vs in _charmap.values(): ints = list(sum(vs, ())) assert all([isinstance(x, int) for x in ints]) assert ints == sorted(ints) assert all([len(tup) == 2 for tup in vs]) assert _charmap is not None return _charmap
def charmap(): """Return a dict that maps a Unicode category, to a tuple of 2-tuples covering the codepoint intervals for characters in that category. >>> charmap()['Co'] ((57344, 63743), (983040, 1048573), (1048576, 1114109)) """ global _charmap # Best-effort caching in the face of missing files and/or unwritable # filesystems is fairly simple: check if loaded, else try loading, # else calculate and try writing the cache. if _charmap is None: f = charmap_file() try: with gzip.GzipFile(f, 'rb') as i: _charmap = dict(pickle.load(i)) except Exception: tmp_charmap = {} for i in range(0, sys.maxunicode + 1): cat = unicodedata.category(hunichr(i)) rs = tmp_charmap.setdefault(cat, []) if rs and rs[-1][-1] == i - 1: rs[-1][-1] += 1 else: rs.append([i, i]) _charmap = { k: tuple(tuple(pair) for pair in pairs) for k, pairs in tmp_charmap.items() } try: # Write the Unicode table atomically fd, tmpfile = tempfile.mkstemp(dir=tmpdir()) os.close(fd) # Explicitly set the mtime to get reproducible output with gzip.GzipFile(tmpfile, 'wb', mtime=1) as o: pickle.dump(sorted(_charmap.items()), o, pickle.HIGHEST_PROTOCOL) os.rename(tmpfile, f) except Exception: # pragma: no cover pass assert _charmap is not None return _charmap
def charmap(): """Return a dict that maps a Unicode category, to a tuple of 2-tuples covering the codepoint intervals for characters in that category. >>> charmap()['Co'] ((57344, 63743), (983040, 1048573), (1048576, 1114109)) """ global _charmap # Best-effort caching in the face of missing files and/or unwritable # filesystems is fairly simple: check if loaded, else try loading, # else calculate and try writing the cache. if _charmap is None: f = charmap_file() try: with gzip.GzipFile(f, 'rb') as i: _charmap = dict(pickle.load(i)) except Exception: tmp_charmap = {} for i in range(0, sys.maxunicode + 1): cat = unicodedata.category(hunichr(i)) rs = tmp_charmap.setdefault(cat, []) if rs and rs[-1][-1] == i - 1: rs[-1][-1] += 1 else: rs.append([i, i]) _charmap = {k: tuple((map(tuple, v))) for k, v in tmp_charmap.items()} try: # Write the Unicode table atomically fd, tmpfile = tempfile.mkstemp(dir=tmpdir()) os.close(fd) # Explicitly set the mtime to get reproducible output with gzip.GzipFile(tmpfile, 'wb', mtime=1) as o: pickle.dump(sorted(_charmap.items()), o, pickle.HIGHEST_PROTOCOL) os.rename(tmpfile, f) except Exception: # pragma: no cover pass assert _charmap is not None return _charmap
def charmap(): """Return a dict that maps a Unicode category, to a tuple of 2-tuples covering the codepoint intervals for characters in that category. >>> charmap()['Co'] ((57344, 63743), (983040, 1048573), (1048576, 1114109)) """ global _charmap if _charmap is None: f = charmap_file() if not os.path.exists(f): tmp_charmap = {} for i in range(0, sys.maxunicode + 1): cat = unicodedata.category(hunichr(i)) rs = tmp_charmap.setdefault(cat, []) if rs and rs[-1][-1] == i - 1: rs[-1][-1] += 1 else: rs.append([i, i]) # We explicitly set the mtime to an arbitrary value so as to get # a stable format for our charmap. data = sorted( (k, tuple((map(tuple, v)))) for k, v in tmp_charmap.items()) # Write the Unicode table atomically fd, tmpfile = tempfile.mkstemp(dir=tmpdir()) os.close(fd) with GzipFile(tmpfile, 'wb', mtime=1) as o: o.write(pickle.dumps(data, pickle.HIGHEST_PROTOCOL)) try: os.rename(tmpfile, f) except FileExistsError: # pragma: no cover # This exception is only raised on Windows, and coverage is # measured on Linux. pass with GzipFile(f, 'rb') as i: _charmap = dict(pickle.loads(i.read())) assert _charmap is not None return _charmap