def test_parallelism(ParallelType: Type[Parallel]) -> None: if tmp_root.exists(): shutil.rmtree(tmp_root) tmp_root.mkdir(parents=True) square.group = MemoizedGroup( obj_store=DirObjStore(temp_path()), fine_grain_persistence=True, temporary=True ) calls, unique_calls = make_overlapping_calls(N_PROCS, N_OVERLAP) procs = [ ParallelType( target=square_all, args=(call,), ) for call in calls ] for proc in procs: proc.start() for proc in procs: proc.join() recomputed = [int(log.read_text()) for log in tmp_root.iterdir()] # Note that two parallel workers *can* sometimes compute redundant function values because they don't know that the other is in progress. # However, it would be improbable that *every* worker *always* is computing redundant values. assert len(recomputed) < N_OVERLAP * N_PROCS assert set(recomputed) == unique_calls assert all(square.would_hit(x) for x in unique_calls)
class Class: @staticmethod @memoize( group=MemoizedGroup(obj_store=DirObjStore(temp_path()), temporary=True), ) def method(z: int) -> int: return z + 1
class Class: def __init__(self, y: int) -> None: self.y = y @memoize( group=MemoizedGroup(obj_store=DirObjStore(temp_path()), temporary=True), ) def method(self, z: int) -> int: return self.y + z
class Class: x = 3 @classmethod @memoize( group=MemoizedGroup(obj_store=DirObjStore(temp_path()), temporary=True), ) def method(cls: Type[Any], z: int) -> int: return cls.x + z
def test_cloudpickle() -> None: if tmp_root.exists(): shutil.rmtree(tmp_root) tmp_root.mkdir(parents=True) # I need to make Memoize compatible with cloudpickle so that it can be parallelized with dask. square.group = MemoizedGroup( obj_store=DirObjStore(temp_path()), fine_grain_persistence=True, temporary=True ) square(2) square2 = cloudpickle.loads(cloudpickle.dumps(square)) assert square2.would_hit(2)
def test_dask_delayed() -> None: if tmp_root.exists(): shutil.rmtree(tmp_root) tmp_root.mkdir(parents=True) square.group = MemoizedGroup( obj_store=DirObjStore(temp_path()), fine_grain_persistence=True, temporary=True ) calls, unique_calls = make_overlapping_calls(N_PROCS, N_OVERLAP) square2 = dask.delayed(square) # type: ignore results = dask.compute(*[square2(x) for call in calls for x in call]) # type: ignore recomputed = [int(log.read_text()) for log in tmp_root.iterdir()] assert len(recomputed) < N_OVERLAP * N_PROCS assert set(recomputed) == unique_calls assert all(square.would_hit(x) for x in unique_calls) assert results == tuple([x**2 for call in calls for x in call])
def test_dask_bag() -> None: if tmp_root.exists(): shutil.rmtree(tmp_root) tmp_root.mkdir(parents=True) square.group = MemoizedGroup( obj_store=DirObjStore(temp_path()), fine_grain_persistence=True, temporary=True ) calls, unique_calls = make_overlapping_calls(N_PROCS, N_OVERLAP) dask.bag.from_sequence(itertools.chain.from_iterable(calls), npartitions=N_PROCS).map(square).compute() # type: ignore recomputed = [int(log.read_text()) for log in tmp_root.iterdir()] assert len(recomputed) < N_OVERLAP * N_PROCS assert set(recomputed) == unique_calls subprocess.run(["sync", "--file-system", "."], check=True) calls_would_hit = [square.would_hit(x) for x in unique_calls] assert all(calls_would_hit)
def test_filecontents() -> None: path = temp_path() double.group = MemoizedGroup(obj_store=DirObjStore(path), temporary=True) file1 = cast(str, FileContents(path / "file1")) Path(file1).write_text("hello") outfile1 = double(file1) assert (Path(outfile1).read_text() == "hello hello" ), "reading and writing are transparent" assert double.would_hit( file1), "cache ver is same since file didn't change" Path(outfile1).write_text("blah blah") outfile1 = double(file1) assert Path( outfile1).read_text() == "hello hello", "recall from storage works" Path(file1).write_text("world") assert not double.would_hit(file1), "cache ver changes since file changed"
import sys import time from pathlib import Path root = Path(os.environ["eht_root"]) if "eht_root" in os.environ else Path() if str(root) not in sys.path: sys.path.insert(0, str(root)) import ehtim as eh from ehtim.calibrating import self_cal as sc # Begin memoization = not bool(int(os.environ.get("CHARMONIUM_CACHE_DISABLE", "0"))) print(f"{memoization=}", file=sys.stderr) if memoization: from charmonium.cache import memoize, FileContents, MemoizedGroup group = MemoizedGroup(size="10Mb") else: memoize = lambda **kwargs: (lambda x: x) FileContents = lambda x: None group = None # End #from ehtim.plotting import self_cal as sc plt.close('all') ttype = 'direct' @memoize(group=group) def get_data(): # Load the image and the array
import numpy as np from astropy.wcs import WCS from astropy.io import fits import astropy.units as u from astropy.constants.si import c, G, M_sun, R_sun from astropy.units import imperial import astropy.units as u from astropy.time import Time from astropy.table import Column, Table from astropy.io import ascii from charmonium.determ_hash import determ_hash import charmonium.freeze charmonium.freeze.config.recursion_limit = 100 from charmonium.cache import memoize, MemoizedGroup, FileContents group = MemoizedGroup(size="1GiB") from pathlib import Path import os if "OUTPUT_LOG" in os.environ: output_file = open(os.environ["OUTPUT_LOG"], "w+") else: output_file = None repeat_factor = 20 import charmonium.freeze charmonium.freeze.config.ignore_globals.add( ("astropy.utils.data", "_tempfilestodel")) charmonium.freeze.config.ignore_globals.add( ("astropy.units.core", "_unit_registries")) charmonium.freeze.config.recursion_limit = 50
import urllib.parse import warnings from pathlib import Path from typing import Any, Hashable, List, Mapping, Optional, Set, Tuple, cast import github import requests from ascl_net_scraper import DetailedCodeRecord, scrape_index # type: ignore from charmonium.cache import MemoizedGroup, memoize # from rich.progress import track as tqdm from tqdm import tqdm from .ratelimit_check import wait_for_limit group = MemoizedGroup(size="100MiB", fine_grain_persistence=False) github_regex = re.compile( r"https?://github.com/(?P<author>[a-zA-Z0-9\.\-]*)/(?P<repo>[a-zA-Z0-9\.\-]*)" ) def parse_github_url(github_url: str) -> Tuple[str, Optional[str]]: github_url_parsed = urllib.parse.urlparse(github_url) if github_url_parsed.netloc != "github.com": raise ValueError(f"{github_url} is not a github.com url.") path = Path(github_url_parsed.path).parts user = path[1] repo: Optional[str] if len(path) > 2: repo = path[2]
def test_ttl() -> None: get_now.group = MemoizedGroup(obj_store=DirObjStore(temp_path()), temporary=True) assert datetime.datetime.now() - get_now() < dt time.sleep(dt.total_seconds()) assert datetime.datetime.now() - get_now() < dt
def test_filecontents_empty() -> None: path = temp_path() double.group = MemoizedGroup(obj_store=DirObjStore(path), temporary=True) file2 = cast(str, FileContents(path / "file2")) double(file2)