Exemple #1
0
def test_annotations_survive_optimization():
    with dask.annotate(foo="bar"):
        graph = HighLevelGraph.from_collections(
            "b",
            {
                "a": 1,
                "b": (inc, "a"),
                "c": (inc, "b")
            },
            [],
        )
        d = Delayed("b", graph)

    assert type(d.dask) is HighLevelGraph
    assert len(d.dask.layers) == 1
    assert len(d.dask.layers["b"]) == 3
    assert d.dask.layers["b"].annotations == {"foo": "bar"}

    # Ensure optimizing a Delayed object returns a HighLevelGraph
    # and doesn't loose annotations
    (d_opt, ) = dask.optimize(d)
    assert type(d_opt.dask) is HighLevelGraph
    assert len(d_opt.dask.layers) == 1
    assert len(d_opt.dask.layers["b"]) == 2  # c is culled
    assert d_opt.dask.layers["b"].annotations == {"foo": "bar"}
Exemple #2
0
def test_single_annotation(annotation):
    with dask.annotate(**annotation):
        A = da.ones((10, 10), chunks=(5, 5))

    alayer = A.__dask_graph__().layers[A.name]
    assert alayer.annotations == annotation
    assert dask.config.get("annotations", None) is None
Exemple #3
0
async def test_compute(c, s, a, b):
    with dask.annotate(resources={"A": 1}):
        x = delayed(inc)(1)
    with dask.annotate(resources={"B": 1}):
        y = delayed(inc)(x)

    yy = c.compute(y, optimize_graph=False)
    await wait(yy)

    assert b.data

    xs = [delayed(inc)(i) for i in range(10, 20)]
    xxs = c.compute(xs, resources={"B": 1})
    await wait(xxs)

    assert len(b.data) > 10
Exemple #4
0
def test_to_dataframe_optimize_graph():
    pytest.importorskip("dask.dataframe")
    from dask.dataframe.utils import assert_eq as assert_eq_df

    x = db.from_sequence(
        [{"name": "test1", "v1": 1}, {"name": "test2", "v1": 2}], npartitions=2
    )

    # linear `map` tasks will be fused by graph optimization
    with dask.annotate(foo=True):
        y = x.map(lambda a: dict(**a, v2=a["v1"] + 1))
        y = y.map(lambda a: dict(**a, v3=a["v2"] + 1))
        y = y.map(lambda a: dict(**a, v4=a["v3"] + 1))

    # verifying the maps are not fused yet
    assert len(y.dask) == y.npartitions * 4

    # with optimizations
    d = y.to_dataframe()

    # All the `map` tasks have been fused
    assert len(d.dask) < len(y.dask)

    # no optimizations
    d2 = y.to_dataframe(optimize_graph=False)

    # Graph hasn't been fused. It contains all the original tasks,
    # plus one extra layer converting to DataFrame
    assert len(d2.dask) == len(y.dask) + d.npartitions

    # Annotations are still there
    assert hlg_layer_topological(d2.dask, 1).annotations == {"foo": True}

    assert_eq_df(d, d2)
async def test_dataframe_annotations(c, s, a, b):
    retries = 5
    plugin = ExampleAnnotationPlugin(retries=retries)
    s.add_plugin(plugin)

    assert plugin in s.plugins

    df = dd.from_pandas(
        pd.DataFrame({
            "a": np.arange(10, dtype=int),
            "b": np.arange(10, 0, -1, dtype=float)
        }),
        npartitions=5,
    )
    df = df.shuffle("a", shuffle="tasks", max_branch=2)
    acol = df["a"]
    bcol = df["b"]

    with dask.annotate(retries=retries):
        df = acol + bcol

    with dask.config.set(optimization__fuse__active=False):
        rdf = await c.compute(df)

    assert rdf.dtypes == np.float64
    assert (rdf == 10.0).all()

    # There is an annotation match per partition (i.e. task)
    assert plugin.retry_matches == df.npartitions
Exemple #6
0
def test_multiple_annotations():
    with dask.annotate(block_id=annot_map_fn):
        with dask.annotate(resource="GPU"):
            A = da.ones((10, 10), chunks=(5, 5))

        B = A + 1

    C = B + 1

    assert dask.config.get("annotations", None) is None

    alayer = A.__dask_graph__().layers[A.name]
    blayer = B.__dask_graph__().layers[B.name]
    clayer = C.__dask_graph__().layers[C.name]
    assert alayer.annotations == {"resource": "GPU", "block_id": annot_map_fn}
    assert blayer.annotations == {"block_id": annot_map_fn}
    assert clayer.annotations is None
Exemple #7
0
def test_multiple_annotations():
    da = pytest.importorskip("dask.array")
    with dask.annotate(block_id=annot_map_fn):
        with dask.annotate(resources={"GPU": 1}):
            A = da.ones((10, 10), chunks=(5, 5))

        B = A + 1

    C = B + 1

    assert dask.config.get("annotations", None) is None

    alayer = A.__dask_graph__().layers[A.name]
    blayer = B.__dask_graph__().layers[B.name]
    clayer = C.__dask_graph__().layers[C.name]
    assert alayer.annotations == {"resources": {"GPU": 1}, "block_id": annot_map_fn}
    assert blayer.annotations == {"block_id": annot_map_fn}
    assert clayer.annotations is None
Exemple #8
0
def test_worker_actor_handle_is_weakref_from_compute_sync(client):
    workers = list(client.run(lambda: None))

    with dask.annotate(workers=workers[0]):
        counter = dask.delayed(Counter)()
    with dask.annotate(workers=workers[1]):
        intermediate = dask.delayed(lambda c: None)(counter)
    with dask.annotate(workers=workers[0]):
        final = dask.delayed(lambda x, c: x)(intermediate, counter)

    final.compute(actors=counter, optimize_graph=False)

    def worker_tasks_running(dask_worker):
        return len(dask_worker.data) + len(dask_worker.actors)

    start = time()
    while any(client.run(worker_tasks_running).values()):
        sleep(0.01)
        assert time() < start + 30
async def test_persist_multiple_collections(c, s, a, b):
    with dask.annotate(resources={"A": 1}):
        x = delayed(inc)(1)
        y = delayed(inc)(x)

    xx, yy = c.persist([x, y], optimize_graph=False)

    await wait([xx, yy])

    assert x.key in a.data
    assert y.key in a.data
    assert not b.data
async def test_persist_collections(c, s, a, b):
    da = pytest.importorskip("dask.array")
    x = da.arange(10, chunks=(5, ))
    with dask.annotate(resources={"A": 1}):
        y = x.map_blocks(lambda x: x + 1)
    z = y.map_blocks(lambda x: 2 * x)
    w = z.sum()

    ww, yy = c.persist([w, y], optimize_graph=False)

    await wait([ww, yy])

    assert all(stringify(key) in a.data for key in y.__dask_keys__())
Exemple #11
0
def test_fuse_roots_annotations():
    x = da.ones(10, chunks=(2,))
    y = da.zeros(10, chunks=(2,))

    with dask.annotate(foo="bar"):
        y = y ** 2

    z = (x + 1) + (2 * y)
    hlg = dask.blockwise.optimize_blockwise(z.dask)
    assert len(hlg.layers) == 3
    assert {"foo": "bar"} in [l.annotations for l in hlg.layers.values()]
    za = da.Array(hlg, z.name, z.chunks, z.dtype)
    assert_eq(za, z)
def test_collections_get(client, optimize_graph, s, a, b):
    da = pytest.importorskip("dask.array")

    async def f(dask_worker):
        await dask_worker.set_resources(**{"A": 1})

    client.run(f, workers=[a["address"]])

    with dask.annotate(resources={"A": 1}):
        x = da.random.random(100, chunks=(10, )) + 1

    x.compute(optimize_graph=optimize_graph)

    def g(dask_worker):
        return len(dask_worker.log)

    logs = client.run(g)
    assert logs[a["address"]]
    assert not logs[b["address"]]
Exemple #13
0
def persist_across_workers(client, objects, workers=None):
    """
    Calls persist on the 'objects' ensuring they are spread
    across the workers on 'workers'.

    Parameters
    ----------
    client : dask.distributed.Client
    objects : list
        Dask distributed objects to be persisted
    workers : list or None
        List of workers across which to persist objects
        If None, then all workers attached to 'client' will be used
    """
    if workers is None:
        workers = client.has_what().keys()  # Default to all workers

    if check_min_dask_version("2020.12.0"):
        with dask.annotate(workers=set(workers)):
            return client.persist(objects)

    else:
        return client.persist(objects, workers={o: workers for o in objects})
import dask
import dask.array as da

# Start Ray.
# Tip: If connecting to an existing cluster, use ray.init(address="auto").
ray.init()

# Use our Dask config helper to set the scheduler to ray_dask_get globally,
# without having to specify it on each compute call.
enable_dask_on_ray()

# All Ray tasks that underly the Dask operations performed in an annotation
# context will require the indicated resources: 2 CPUs and 0.01 of the custom
# resource.
with dask.annotate(
    ray_remote_args=dict(num_cpus=2, resources={"custom_resource": 0.01})
):
    d_arr = da.ones(100)

# Operations on the same collection can have different annotations.
with dask.annotate(ray_remote_args=dict(resources={"other_custom_resource": 0.01})):
    d_arr = 2 * d_arr

# This happens outside of the annotation context, so no resource constraints
# will be attached to the underlying Ray tasks for the sum() operation.
sum_ = d_arr.sum()

# Compute the result, passing in a default resource request that will be
# applied to all operations that aren't already annotated with a resource
# request. In this case, only the sum() operation will get this default
# resource request.
Exemple #15
0
    def wrapped_fn(*args, **kwargs):
        if dd is None or not hasattr(dask, "annotate"):
            return fn(*args, **kwargs)

        with dask.annotate(ray_remote_args=dict(scheduling_strategy="SPREAD")):
            return fn(*args, **kwargs)