def test_parallelism(ParallelType: Type[Parallel]) -> None: if tmp_root.exists(): shutil.rmtree(tmp_root) tmp_root.mkdir(parents=True) square.group = MemoizedGroup( obj_store=DirObjStore(temp_path()), fine_grain_persistence=True, temporary=True ) calls, unique_calls = make_overlapping_calls(N_PROCS, N_OVERLAP) procs = [ ParallelType( target=square_all, args=(call,), ) for call in calls ] for proc in procs: proc.start() for proc in procs: proc.join() recomputed = [int(log.read_text()) for log in tmp_root.iterdir()] # Note that two parallel workers *can* sometimes compute redundant function values because they don't know that the other is in progress. # However, it would be improbable that *every* worker *always* is computing redundant values. assert len(recomputed) < N_OVERLAP * N_PROCS assert set(recomputed) == unique_calls assert all(square.would_hit(x) for x in unique_calls)
class Class: @staticmethod @memoize( group=MemoizedGroup(obj_store=DirObjStore(temp_path()), temporary=True), ) def method(z: int) -> int: return z + 1
class Class: def __init__(self, y: int) -> None: self.y = y @memoize( group=MemoizedGroup(obj_store=DirObjStore(temp_path()), temporary=True), ) def method(self, z: int) -> int: return self.y + z
class Class: x = 3 @classmethod @memoize( group=MemoizedGroup(obj_store=DirObjStore(temp_path()), temporary=True), ) def method(cls: Type[Any], z: int) -> int: return cls.x + z
def test_cloudpickle() -> None: if tmp_root.exists(): shutil.rmtree(tmp_root) tmp_root.mkdir(parents=True) # I need to make Memoize compatible with cloudpickle so that it can be parallelized with dask. square.group = MemoizedGroup( obj_store=DirObjStore(temp_path()), fine_grain_persistence=True, temporary=True ) square(2) square2 = cloudpickle.loads(cloudpickle.dumps(square)) assert square2.would_hit(2)
def test_dask_delayed() -> None: if tmp_root.exists(): shutil.rmtree(tmp_root) tmp_root.mkdir(parents=True) square.group = MemoizedGroup( obj_store=DirObjStore(temp_path()), fine_grain_persistence=True, temporary=True ) calls, unique_calls = make_overlapping_calls(N_PROCS, N_OVERLAP) square2 = dask.delayed(square) # type: ignore results = dask.compute(*[square2(x) for call in calls for x in call]) # type: ignore recomputed = [int(log.read_text()) for log in tmp_root.iterdir()] assert len(recomputed) < N_OVERLAP * N_PROCS assert set(recomputed) == unique_calls assert all(square.would_hit(x) for x in unique_calls) assert results == tuple([x**2 for call in calls for x in call])
def test_dask_bag() -> None: if tmp_root.exists(): shutil.rmtree(tmp_root) tmp_root.mkdir(parents=True) square.group = MemoizedGroup( obj_store=DirObjStore(temp_path()), fine_grain_persistence=True, temporary=True ) calls, unique_calls = make_overlapping_calls(N_PROCS, N_OVERLAP) dask.bag.from_sequence(itertools.chain.from_iterable(calls), npartitions=N_PROCS).map(square).compute() # type: ignore recomputed = [int(log.read_text()) for log in tmp_root.iterdir()] assert len(recomputed) < N_OVERLAP * N_PROCS assert set(recomputed) == unique_calls subprocess.run(["sync", "--file-system", "."], check=True) calls_would_hit = [square.would_hit(x) for x in unique_calls] assert all(calls_would_hit)
def test_filecontents() -> None: path = temp_path() double.group = MemoizedGroup(obj_store=DirObjStore(path), temporary=True) file1 = cast(str, FileContents(path / "file1")) Path(file1).write_text("hello") outfile1 = double(file1) assert (Path(outfile1).read_text() == "hello hello" ), "reading and writing are transparent" assert double.would_hit( file1), "cache ver is same since file didn't change" Path(outfile1).write_text("blah blah") outfile1 = double(file1) assert Path( outfile1).read_text() == "hello hello", "recall from storage works" Path(file1).write_text("world") assert not double.would_hit(file1), "cache ver changes since file changed"
def test_ttl() -> None: get_now.group = MemoizedGroup(obj_store=DirObjStore(temp_path()), temporary=True) assert datetime.datetime.now() - get_now() < dt time.sleep(dt.total_seconds()) assert datetime.datetime.now() - get_now() < dt
def test_filecontents_empty() -> None: path = temp_path() double.group = MemoizedGroup(obj_store=DirObjStore(path), temporary=True) file2 = cast(str, FileContents(path / "file2")) double(file2)