Example #1
0
def test_checkpoint():
    cnt = NodeCounter()
    dsk1 = {("a", h1): (cnt.f, 1), ("a", h2): (cnt.f, 2)}
    dsk2 = {"b": (cnt.f, 2)}
    cp = checkpoint(Tuple(dsk1, list(dsk1)), {"x": [Tuple(dsk2, list(dsk2))]})
    assert cp.compute(scheduler="sync") is None
    assert cnt.n == 3
Example #2
0
def test_wait_on_many():
    cnt = NodeCounter()
    dsk1 = {("a", h1): (cnt.f, 1), ("a", h2): (cnt.f, 2)}
    dsk2 = {"b": (cnt.f, 3)}
    out = wait_on(Tuple(dsk1, list(dsk1)), {"x": [Tuple(dsk2, list(dsk2))]})
    assert dask.compute(*out, scheduler="sync") == ((1, 2), {"x": [(3,)]})
    assert cnt.n == 3
def test_bind(layers):
    dsk1 = {("a-1", h1): 1, ("a-1", h2): 2}
    dsk2 = {"b-1": (add, ("a-1", h1), ("a-1", h2))}
    dsk3 = {"c-1": "b-1"}
    cnt = NodeCounter()
    dsk4 = {("d-1", h1): (cnt.f, 1), ("d-1", h2): (cnt.f, 2)}
    dsk4b = {"e": (cnt.f, 3)}

    if layers:
        dsk1 = HighLevelGraph.from_collections("a-1", dsk1)
        dsk2 = HighLevelGraph({
            "a-1": dsk1,
            "b-1": dsk2
        },
                              dependencies={
                                  "a-1": set(),
                                  "b-1": {"a-1"}
                              })
        dsk3 = HighLevelGraph(
            {
                "a-1": dsk1,
                "b-1": dsk2,
                "c-1": dsk3
            },
            dependencies={
                "a-1": set(),
                "b-1": {"a-1"},
                "c-1": {"b-1"}
            },
        )
        dsk4 = HighLevelGraph({"d-1": dsk4, "e": dsk4b}, {})
    else:
        dsk2.update(dsk1)
        dsk3.update(dsk2)
        dsk4.update(dsk4b)

    # t1 = Tuple(dsk1, [("a", h1), ("a", h2)])
    t2 = Tuple(dsk2, ["b-1"])
    t3 = Tuple(dsk3, ["c-1"])
    t4 = Tuple(dsk4, [("d-1", h1), ("d-1", h2), "e"])  # Multiple names

    bound1 = bind(t3, t4, seed=1, assume_layers=layers)
    cloned_a_name = clone_key("a-1", seed=1)
    assert bound1.__dask_graph__()[cloned_a_name, h1][0] is chunks.bind
    assert bound1.__dask_graph__()[cloned_a_name, h2][0] is chunks.bind
    assert bound1.compute() == (3, )
    assert cnt.n == 3

    bound2 = bind(t3, t4, omit=t2, seed=1, assume_layers=layers)
    cloned_c_name = clone_key("c-1", seed=1)
    assert bound2.__dask_graph__()[cloned_c_name][0] is chunks.bind
    assert bound2.compute() == (3, )
    assert cnt.n == 6
def demo_tuples(layers: bool) -> "tuple[Tuple, Tuple, NodeCounter]":
    cnt = NodeCounter()
    # Collections have multiple names
    dsk1 = HighLevelGraph(
        {"a": {("a", h1): (cnt.f, 1), ("a", h2): (cnt.f, 2)}, "b": {"b": (cnt.f, 3)}},
        {},
    )
    dsk2 = HighLevelGraph({"c": {"c": (cnt.f, 4)}, "d": {"d": (cnt.f, 5)}}, {})
    if not layers:
        dsk1 = dsk1.to_dict()
        dsk2 = dsk2.to_dict()

    return Tuple(dsk1, list(dsk1)), Tuple(dsk2, list(dsk2)), cnt
def test_split_every(split_every, nkeys):
    dsk = {("a", i): i for i in range(100)}
    t1 = Tuple(dsk, list(dsk))
    c = checkpoint(t1, split_every=split_every)
    assert len(c.__dask_graph__()) == nkeys
    assert c.compute(scheduler="sync") is None

    t2 = wait_on(t1, split_every=split_every)
    assert len(t2.__dask_graph__()) == nkeys + 100
    assert t2.compute(scheduler="sync") == tuple(range(100))

    dsk3 = {"b": 1, "c": 2}
    t3 = Tuple(dsk3, list(dsk3))
    t4 = bind(t3, t1, split_every=split_every, assume_layers=False)
    assert len(t4.__dask_graph__()) == nkeys + 2
    assert t4.compute(scheduler="sync") == (1, 2)
def test_clone(layers):
    dsk1 = {("a", h1): 1, ("a", h2): 2}
    dsk2 = {"b": (add, ("a", h1), ("a", h2))}
    dsk3 = {"c": 1, "d": 1}  # Multiple names
    if layers:
        dsk1 = HighLevelGraph.from_collections("a", dsk1)
        dsk2 = HighLevelGraph({
            "a": dsk1,
            "b": dsk2
        },
                              dependencies={
                                  "a": set(),
                                  "b": {"a"}
                              })
        dsk3 = HighLevelGraph.from_collections("c", dsk3)
    else:
        dsk2.update(dsk1)

    t1 = Tuple(dsk1, [("a", h1), ("a", h2)])
    t2 = Tuple(dsk2, ["b"])
    t3 = Tuple(dsk3, ["c"])

    c1 = clone(t2, seed=1, assume_layers=layers)
    c2 = clone(t2, seed=1, assume_layers=layers)
    c3 = clone(t2, seed=2, assume_layers=layers)
    c4 = clone(c1, seed=1,
               assume_layers=layers)  # Clone of a clone has different keys
    c5 = clone(t2, assume_layers=layers)  # Random seed
    c6 = clone(t2, assume_layers=layers)  # Random seed
    c7 = clone(t2, omit=t1, seed=1, assume_layers=layers)

    assert c1.__dask_graph__() == c2.__dask_graph__()
    assert_no_common_keys(c1, t2, layers=layers)
    assert_no_common_keys(c1, c3, layers=layers)
    assert_no_common_keys(c1, c4, layers=layers)
    assert_no_common_keys(c1, c5, layers=layers)
    assert_no_common_keys(c5, c6, layers=layers)
    assert_no_common_keys(c7, t2, omit=t1, layers=layers)
    assert dask.compute(t2, c1, c2, c3, c4, c5, c6, c7) == ((3, ), ) * 8

    # Clone nested; some of the collections in omit are unrelated
    out = clone({"x": [t2]}, omit={"y": [t1, t3]}, assume_layers=layers)
    assert dask.compute(out) == ({"x": [(3, )]}, )
    c8 = out["x"][0]
    assert_no_common_keys(c8, t2, omit=t1, layers=layers)
    assert_no_common_keys(c8, t3, layers=layers)
def test_split_every_invalid():
    t = Tuple({"a": 1, "b": 2}, ["a", "b"])
    with pytest.raises(ValueError):
        checkpoint(t, split_every=1)
    with pytest.raises(ValueError):
        checkpoint(t, split_every=1.9)
    with pytest.raises(ValueError):
        checkpoint(t, split_every=0)  # Not to be confused with False or None
    with pytest.raises(ValueError):
        checkpoint(t, split_every=-2)
    with pytest.raises(TypeError):
        checkpoint(t, split_every={0: 2})  # This is legal for dask.array but not here
Example #8
0
def test_wait_on_one():
    cnt = NodeCounter()
    dsk = {("a", h1): (cnt.f, 1), ("a", h2): (cnt.f, 2)}
    t = wait_on(Tuple(dsk, list(dsk)))
    assert t.compute(scheduler="sync") == (1, 2)
    assert cnt.n == 2