def test_checkpoint(): cnt = NodeCounter() dsk1 = {("a", h1): (cnt.f, 1), ("a", h2): (cnt.f, 2)} dsk2 = {"b": (cnt.f, 2)} cp = checkpoint(Tuple(dsk1, list(dsk1)), {"x": [Tuple(dsk2, list(dsk2))]}) assert cp.compute(scheduler="sync") is None assert cnt.n == 3
def test_wait_on_many(): cnt = NodeCounter() dsk1 = {("a", h1): (cnt.f, 1), ("a", h2): (cnt.f, 2)} dsk2 = {"b": (cnt.f, 3)} out = wait_on(Tuple(dsk1, list(dsk1)), {"x": [Tuple(dsk2, list(dsk2))]}) assert dask.compute(*out, scheduler="sync") == ((1, 2), {"x": [(3,)]}) assert cnt.n == 3
def test_bind(layers): dsk1 = {("a-1", h1): 1, ("a-1", h2): 2} dsk2 = {"b-1": (add, ("a-1", h1), ("a-1", h2))} dsk3 = {"c-1": "b-1"} cnt = NodeCounter() dsk4 = {("d-1", h1): (cnt.f, 1), ("d-1", h2): (cnt.f, 2)} dsk4b = {"e": (cnt.f, 3)} if layers: dsk1 = HighLevelGraph.from_collections("a-1", dsk1) dsk2 = HighLevelGraph({ "a-1": dsk1, "b-1": dsk2 }, dependencies={ "a-1": set(), "b-1": {"a-1"} }) dsk3 = HighLevelGraph( { "a-1": dsk1, "b-1": dsk2, "c-1": dsk3 }, dependencies={ "a-1": set(), "b-1": {"a-1"}, "c-1": {"b-1"} }, ) dsk4 = HighLevelGraph({"d-1": dsk4, "e": dsk4b}, {}) else: dsk2.update(dsk1) dsk3.update(dsk2) dsk4.update(dsk4b) # t1 = Tuple(dsk1, [("a", h1), ("a", h2)]) t2 = Tuple(dsk2, ["b-1"]) t3 = Tuple(dsk3, ["c-1"]) t4 = Tuple(dsk4, [("d-1", h1), ("d-1", h2), "e"]) # Multiple names bound1 = bind(t3, t4, seed=1, assume_layers=layers) cloned_a_name = clone_key("a-1", seed=1) assert bound1.__dask_graph__()[cloned_a_name, h1][0] is chunks.bind assert bound1.__dask_graph__()[cloned_a_name, h2][0] is chunks.bind assert bound1.compute() == (3, ) assert cnt.n == 3 bound2 = bind(t3, t4, omit=t2, seed=1, assume_layers=layers) cloned_c_name = clone_key("c-1", seed=1) assert bound2.__dask_graph__()[cloned_c_name][0] is chunks.bind assert bound2.compute() == (3, ) assert cnt.n == 6
def demo_tuples(layers: bool) -> "tuple[Tuple, Tuple, NodeCounter]": cnt = NodeCounter() # Collections have multiple names dsk1 = HighLevelGraph( {"a": {("a", h1): (cnt.f, 1), ("a", h2): (cnt.f, 2)}, "b": {"b": (cnt.f, 3)}}, {}, ) dsk2 = HighLevelGraph({"c": {"c": (cnt.f, 4)}, "d": {"d": (cnt.f, 5)}}, {}) if not layers: dsk1 = dsk1.to_dict() dsk2 = dsk2.to_dict() return Tuple(dsk1, list(dsk1)), Tuple(dsk2, list(dsk2)), cnt
def test_split_every(split_every, nkeys): dsk = {("a", i): i for i in range(100)} t1 = Tuple(dsk, list(dsk)) c = checkpoint(t1, split_every=split_every) assert len(c.__dask_graph__()) == nkeys assert c.compute(scheduler="sync") is None t2 = wait_on(t1, split_every=split_every) assert len(t2.__dask_graph__()) == nkeys + 100 assert t2.compute(scheduler="sync") == tuple(range(100)) dsk3 = {"b": 1, "c": 2} t3 = Tuple(dsk3, list(dsk3)) t4 = bind(t3, t1, split_every=split_every, assume_layers=False) assert len(t4.__dask_graph__()) == nkeys + 2 assert t4.compute(scheduler="sync") == (1, 2)
def test_clone(layers): dsk1 = {("a", h1): 1, ("a", h2): 2} dsk2 = {"b": (add, ("a", h1), ("a", h2))} dsk3 = {"c": 1, "d": 1} # Multiple names if layers: dsk1 = HighLevelGraph.from_collections("a", dsk1) dsk2 = HighLevelGraph({ "a": dsk1, "b": dsk2 }, dependencies={ "a": set(), "b": {"a"} }) dsk3 = HighLevelGraph.from_collections("c", dsk3) else: dsk2.update(dsk1) t1 = Tuple(dsk1, [("a", h1), ("a", h2)]) t2 = Tuple(dsk2, ["b"]) t3 = Tuple(dsk3, ["c"]) c1 = clone(t2, seed=1, assume_layers=layers) c2 = clone(t2, seed=1, assume_layers=layers) c3 = clone(t2, seed=2, assume_layers=layers) c4 = clone(c1, seed=1, assume_layers=layers) # Clone of a clone has different keys c5 = clone(t2, assume_layers=layers) # Random seed c6 = clone(t2, assume_layers=layers) # Random seed c7 = clone(t2, omit=t1, seed=1, assume_layers=layers) assert c1.__dask_graph__() == c2.__dask_graph__() assert_no_common_keys(c1, t2, layers=layers) assert_no_common_keys(c1, c3, layers=layers) assert_no_common_keys(c1, c4, layers=layers) assert_no_common_keys(c1, c5, layers=layers) assert_no_common_keys(c5, c6, layers=layers) assert_no_common_keys(c7, t2, omit=t1, layers=layers) assert dask.compute(t2, c1, c2, c3, c4, c5, c6, c7) == ((3, ), ) * 8 # Clone nested; some of the collections in omit are unrelated out = clone({"x": [t2]}, omit={"y": [t1, t3]}, assume_layers=layers) assert dask.compute(out) == ({"x": [(3, )]}, ) c8 = out["x"][0] assert_no_common_keys(c8, t2, omit=t1, layers=layers) assert_no_common_keys(c8, t3, layers=layers)
def test_split_every_invalid(): t = Tuple({"a": 1, "b": 2}, ["a", "b"]) with pytest.raises(ValueError): checkpoint(t, split_every=1) with pytest.raises(ValueError): checkpoint(t, split_every=1.9) with pytest.raises(ValueError): checkpoint(t, split_every=0) # Not to be confused with False or None with pytest.raises(ValueError): checkpoint(t, split_every=-2) with pytest.raises(TypeError): checkpoint(t, split_every={0: 2}) # This is legal for dask.array but not here
def test_wait_on_one(): cnt = NodeCounter() dsk = {("a", h1): (cnt.f, 1), ("a", h2): (cnt.f, 2)} t = wait_on(Tuple(dsk, list(dsk))) assert t.compute(scheduler="sync") == (1, 2) assert cnt.n == 2