def charmap(): """Return a dict that maps a Unicode category, to a tuple of 2-tuples covering the codepoint intervals for characters in that category. >>> charmap()['Co'] ((57344, 63743), (983040, 1048573), (1048576, 1114109)) """ global _charmap # Best-effort caching in the face of missing files and/or unwritable # filesystems is fairly simple: check if loaded, else try loading, # else calculate and try writing the cache. if _charmap is None: f = charmap_file() try: with gzip.GzipFile(f, "rb") as i: tmp_charmap = dict(json.loads(i)) except Exception: tmp_charmap = {} for i in range(0, sys.maxunicode + 1): cat = unicodedata.category(hunichr(i)) rs = tmp_charmap.setdefault(cat, []) if rs and rs[-1][-1] == i - 1: rs[-1][-1] += 1 else: rs.append([i, i]) try: # Write the Unicode table atomically tmpdir = storage_directory("tmp") mkdir_p(tmpdir) fd, tmpfile = tempfile.mkstemp(dir=tmpdir) os.close(fd) # Explicitly set the mtime to get reproducible output with gzip.GzipFile(tmpfile, "wb", mtime=1) as o: result = json.dumps(sorted(tmp_charmap.items())) o.write(result.encode()) os.renames(tmpfile, f) except Exception: pass # convert between lists and tuples _charmap = { k: tuple(tuple(pair) for pair in pairs) for k, pairs in tmp_charmap.items() } # each value is a tuple of 2-tuples (that is, tuples of length 2) # and that both elements of that tuple are integers. for vs in _charmap.values(): ints = list(sum(vs, ())) assert all([isinstance(x, int) for x in ints]) assert ints == sorted(ints) assert all([len(tup) == 2 for tup in vs]) assert _charmap is not None return _charmap
def save(self, key: bytes, value: bytes) -> None: # Note: we attempt to create the dir in question now. We # already checked for permissions, but there can still be other issues, # e.g. the disk is full mkdir_p(self._key_path(key)) path = self._value_path(key, value) if not os.path.exists(path): suffix = binascii.hexlify(os.urandom(16)).decode("ascii") tmpname = path + "." + suffix with open(tmpname, "wb") as o: o.write(value) try: os.rename(tmpname, path) except OSError: # pragma: no cover os.unlink(tmpname) assert not os.path.exists(tmpname)
def charmap(): """Return a dict that maps a Unicode category, to a tuple of 2-tuples covering the codepoint intervals for characters in that category. >>> charmap()['Co'] ((57344, 63743), (983040, 1048573), (1048576, 1114109)) """ global _charmap # Best-effort caching in the face of missing files and/or unwritable # filesystems is fairly simple: check if loaded, else try loading, # else calculate and try writing the cache. if _charmap is None: f = charmap_file() try: with gzip.GzipFile(f, "rb") as i: # When the minimum Python 3 version becomes 3.6, this can be # simplified to `json.load(i)` without needing to decode first. data = i.read().decode() tmp_charmap = dict(json.loads(data)) except Exception: # This loop is reduced to using only local variables for performance; # indexing and updating containers is a ~3x slowdown. This doesn't fix # https://github.com/HypothesisWorks/hypothesis/issues/2108 but it helps. category = unicodedata.category # Local variable -> ~20% speedup! tmp_charmap = {} last_cat = category(chr(0)) last_start = 0 for i in range(1, sys.maxunicode + 1): cat = category(chr(i)) if cat != last_cat: tmp_charmap.setdefault(last_cat, []).append([last_start, i - 1]) last_cat, last_start = cat, i tmp_charmap.setdefault(last_cat, []).append([last_start, sys.maxunicode]) try: # Write the Unicode table atomically tmpdir = storage_directory("tmp") mkdir_p(tmpdir) fd, tmpfile = tempfile.mkstemp(dir=tmpdir) os.close(fd) # Explicitly set the mtime to get reproducible output with gzip.GzipFile(tmpfile, "wb", mtime=1) as o: result = json.dumps(sorted(tmp_charmap.items())) o.write(result.encode()) os.renames(tmpfile, f) except Exception: pass # convert between lists and tuples _charmap = { k: tuple(tuple(pair) for pair in pairs) for k, pairs in tmp_charmap.items() } # each value is a tuple of 2-tuples (that is, tuples of length 2) # and that both elements of that tuple are integers. for vs in _charmap.values(): ints = list(sum(vs, ())) assert all(isinstance(x, int) for x in ints) assert ints == sorted(ints) assert all(len(tup) == 2 for tup in vs) assert _charmap is not None return _charmap