Python optimize Exemples, dask.optimize Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : nd2_reader.py Projet : zeroth/nd2-dask

def get_layer_list(channels, nd2_func, path, frame_shape, frame_dtype,
                   n_timepoints):
    # channel_dict = dict(zip(channels, [[] for _ in range(len(channels))]))
    channel_dict = {}
    for i, channel in enumerate(channels):
        arr = da.stack([
            da.from_delayed(delayed(nd2_func(path, i))(j),
                            shape=frame_shape,
                            dtype=frame_dtype) for j in range(n_timepoints)
        ])
        channel_dict[color_maps[i % len(color_maps)]] = dask.optimize(arr)[0]

    layer_list = []
    print("channel_dict", channel_dict)
    for channel_name, channel in channel_dict.items():
        blending = 'additive'
        meta = get_metadata(path)
        add_kwargs = {
            "name": channel_name,
            "colormap": channel_name,
            "blending": blending,
            "rendering": "mip",
            **meta
        }
        layer_type = "image"
        layer_list.append((channel, add_kwargs, layer_type))
    return layer_list

Exemple #2

0

Afficher le fichier

def test_atop_non_atop_output():
    x = da.ones(10, chunks=(5, ))
    y = (((x + 1) + 2) + 3)
    w = y.sum()
    z = (((y * 2) * 3) * 4)

    z_top_before = tuple(z.dask.dicts[z.name].indices)
    (zz, ) = dask.optimize(z)
    z_top_after = tuple(z.dask.dicts[z.name].indices)
    assert z_top_before == z_top_after, "z_top mutated"

    dsk = optimize_atop(z.dask,
                        keys=list(dask.core.flatten(z.__dask_keys__())))
    assert isinstance(dsk, dask.sharedict.ShareDict)
    assert len(
        [layer for layer in dsk.dicts.values() if isinstance(layer, TOP)]) == 1

    dsk = optimize_atop(
        dask.sharedict.merge(w.dask, z.dask),
        keys=list(dask.core.flatten([w.__dask_keys__(),
                                     z.__dask_keys__()])))
    assert isinstance(dsk, dask.sharedict.ShareDict)
    assert len(
        [layer
         for layer in z.dask.dicts.values() if isinstance(layer, TOP)]) >= 1

Exemple #3

0

Afficher le fichier

Fichier : nd2_reader.py Projet : yichechang/nd2-dask

def get_layer_list(channels, nd2_func, path, frame_shape, frame_dtype,
                   n_timepoints):
    channel_dict = dict(zip(channels, [[] for _ in range(len(channels))]))
    for i, channel in enumerate(channels):
        arr = da.stack([
            da.from_delayed(delayed(nd2_func(path, i))(j),
                            shape=frame_shape,
                            dtype=frame_dtype) for j in range(n_timepoints)
        ])
        channel_dict[channel] = dask.optimize(arr)[0]

    layer_list = []
    for channel_name, channel in channel_dict.items():
        visible = channel_name in VISIBLE
        blending = 'additive' if visible else 'translucent'
        channel_color = list(CHANNEL_COLORS[channel_name])
        color = Colormap([[0, 0, 0], channel_color])
        meta = get_metadata(path)
        add_kwargs = {
            "name": channel_name,
            "visible": visible,
            "colormap": color,
            "blending": blending,
            **meta
        }
        layer_type = "image"
        layer_list.append((channel, add_kwargs, layer_type))
    return layer_list

Exemple #4

0

Afficher le fichier

Fichier : dask.py Projet : ClusterHQ/eliot

def compute_with_trace(*args):
    """Do Dask compute(), but with added Eliot tracing.

    Dask is a graph of tasks, but Eliot logs trees.  So we need to emulate a
    graph using a tree.  We do this by making Eliot action for each task, but
    having it list the tasks it depends on.

    We use the following algorithm:

        1. Create a top-level action.

        2. For each entry in the dask graph, create a child with
           serialize_task_id.  Do this in likely order of execution, so that
           if B depends on A the task level of B is higher than the task Ievel
           of A.

        3. Replace each function with a wrapper that uses the corresponding
           task ID (with Action.continue_task), and while it's at it also
           records which other things this function depends on.

    Known issues:

        1. Retries will confuse Eliot.  Probably need different
           distributed-tree mechanism within Eliot to solve that.
    """
    # 1. Create top-level Eliot Action:
    with start_action(action_type="dask:compute"):
        # In order to reduce logging verbosity, add logging to the already
        # optimized graph:
        optimized = optimize(*args, optimizations=[_add_logging])
        return compute(*optimized, optimize_graph=False)

Exemple #5

0

Afficher le fichier

Fichier : utilities.py Projet : evermountaintech/minian

def optimize_chunk(arr: xr.DataArray, chk: dict) -> xr.DataArray:
    """
    Rechunk a `xr.DataArray` with constrained "rechunk-merge" tasks.

    Parameters
    ----------
    arr : xr.DataArray
        The array to be rechunked.
    chk : dict
        The desired chunk size.

    Returns
    -------
    arr_chk : xr.DataArray
        The rechunked array.
    """
    fast_funcs = FAST_FUNCTIONS + [darr.core.concatenate3]
    arr_chk = arr.chunk(chk)
    arr_opt = fct.partial(
        custom_arr_optimize,
        fast_funcs=fast_funcs,
        rewrite_dict={"rechunk-merge": "merge_restricted"},
    )
    with da.config.set(array_optimize=arr_opt):
        arr_chk.data = da.optimize(arr_chk.data)[0]
    return arr_chk

Exemple #6

0

Afficher le fichier

def get_layer_list(channels, nd2_func, path, frame_shape, frame_dtype,
                   n_timepoints):
    channel_dict = dict(zip(channels, [[] for _ in range(len(channels))]))
    for i, channel in enumerate(channels):
        arr = da.stack([
            da.from_delayed(delayed(nd2_func(path, i))(j),
                            shape=frame_shape,
                            dtype=frame_dtype) for j in range(n_timepoints)
        ])
        channel_dict[channel] = dask.optimize(arr)[0]

    layer_list = []
    for channel_name, channel in channel_dict.items():
        visible = True
        blending = 'additive' if visible else 'translucent'
        meta = get_metadata(path)
        channel_color = meta['channels'][channel_name]
        color = Colormap([[0, 0, 0], channel_color[:-1]])  # ignore alpha
        add_kwargs = {
            "name": channel_name,
            "visible": visible,
            "colormap": color,
            "blending": blending,
            "scale": meta['scale'],
            "translate": meta['translate'],
        }
        layer_type = "image"
        layer_list.append((channel, add_kwargs, layer_type))
    return layer_list

Exemple #7

0

Afficher le fichier

def test_annotations_survive_optimization():
    with dask.annotate(foo="bar"):
        graph = HighLevelGraph.from_collections(
            "b",
            {
                "a": 1,
                "b": (inc, "a"),
                "c": (inc, "b")
            },
            [],
        )
        d = Delayed("b", graph)

    assert type(d.dask) is HighLevelGraph
    assert len(d.dask.layers) == 1
    assert len(d.dask.layers["b"]) == 3
    assert d.dask.layers["b"].annotations == {"foo": "bar"}

    # Ensure optimizing a Delayed object returns a HighLevelGraph
    # and doesn't loose annotations
    (d_opt, ) = dask.optimize(d)
    assert type(d_opt.dask) is HighLevelGraph
    assert len(d_opt.dask.layers) == 1
    assert len(d_opt.dask.layers["b"]) == 2  # c is culled
    assert d_opt.dask.layers["b"].annotations == {"foo": "bar"}

Exemple #8

0

Afficher le fichier

Fichier : test_delayed.py Projet : maxrudolph1/vsepr_hero

def test_delayed_optimize():
    x = Delayed('b', {'a': 1,
                      'b': (inc, 'a'),
                      'c': (inc, 'b')})
    (x2,) = dask.optimize(x)
    # Delayed's __dask_optimize__ culls out 'c'
    assert sorted(x2.dask.keys()) == ['a', 'b']

Exemple #9

0

Afficher le fichier

Fichier : test_atop.py Projet : vertexclique/dask

def test_blockwise_non_blockwise_output():
    x = da.ones(10, chunks=(5, ))
    y = ((x + 1) + 2) + 3
    w = y.sum()
    z = ((y * 2) * 3) * 4

    z_top_before = tuple(z.dask.dicts[z.name].indices)
    (zz, ) = dask.optimize(z)
    z_top_after = tuple(z.dask.dicts[z.name].indices)
    assert z_top_before == z_top_after, "z_top mutated"

    dsk = optimize_blockwise(z.dask,
                             keys=list(dask.core.flatten(z.__dask_keys__())))
    assert isinstance(dsk, HighLevelGraph)
    assert (len([
        layer for layer in dsk.dicts.values() if isinstance(layer, Blockwise)
    ]) == 1)

    dsk = optimize_blockwise(
        HighLevelGraph.merge(w.dask, z.dask),
        keys=list(dask.core.flatten([w.__dask_keys__(),
                                     z.__dask_keys__()])),
    )
    assert isinstance(dsk, HighLevelGraph)
    assert (len([
        layer
        for layer in z.dask.dicts.values() if isinstance(layer, Blockwise)
    ]) >= 1)

Exemple #10

0

Afficher le fichier

Fichier : test_delayed.py Projet : mrocklin/dask

def test_delayed_optimize():
    x = Delayed('b', {'a': 1,
                      'b': (inc, 'a'),
                      'c': (inc, 'b')})
    (x2,) = dask.optimize(x)
    # Delayed's __dask_optimize__ culls out 'c'
    assert sorted(x2.dask.keys()) == ['a', 'b']

Exemple #11

0

Afficher le fichier

def compute_with_trace(*args):
    """Do Dask compute(), but with added Eliot tracing.

    Dask is a graph of tasks, but Eliot logs trees.  So we need to emulate a
    graph using a tree.  We do this by making Eliot action for each task, but
    having it list the tasks it depends on.

    We use the following algorithm:

        1. Create a top-level action.

        2. For each entry in the dask graph, create a child with
           serialize_task_id.  Do this in likely order of execution, so that
           if B depends on A the task level of B is higher than the task Ievel
           of A.

        3. Replace each function with a wrapper that uses the corresponding
           task ID (with Action.continue_task), and while it's at it also
           records which other things this function depends on.

    Known issues:

        1. Retries will confuse Eliot.  Probably need different
           distributed-tree mechanism within Eliot to solve that.
    """
    # 1. Create top-level Eliot Action:
    with start_action(action_type="dask:compute"):
        # In order to reduce logging verbosity, add logging to the already
        # optimized graph:
        optimized = optimize(*args, optimizations=[_add_logging])
        return compute(*optimized, optimize_graph=False)

Exemple #12

0

Afficher le fichier

def run_photoz_dask(runs, modelD, galcat, output_dir, fit_bands, ip_dask):
    """Run the photo-z on a Dask cluster."""

    path_out = Path(output_dir) / 'pzcat.pq'
    if path_out.exists():
        print('Photo-z catalogue already exists.')
        return

    # If not specified, we start up a local cluster.
    client = Client(ip_dask) if not ip_dask is None else Client()
    xnew_modelD = client.scatter(fix_model(modelD, fit_bands))
    #xnew_modelD = fix_model(modelD, fit_bands)

    galcat = dd.read_parquet(str(output_dir / 'galcat_in.pq'))

    #npartitions = int(302138 / 10) + 1
    npartitions = int(9900 / 10) + 1
    galcat = galcat.reset_index().repartition(npartitions=npartitions).set_index('ref_id')

    ebvD = dict(runs.EBV)
    pzcat = galcat.map_partitions(
        bcnz.fit.photoz_flatten, xnew_modelD, ebvD, fit_bands)


    pzcat = pzcat.repartition(npartitions=100)
    pzcat = dask.optimize(pzcat)[0]

    pzcat.to_parquet(str(path_out))

Exemple #13

0

Afficher le fichier

Fichier : test_aics_image_delayed.py Projet : zeroth/aicsimageio

def test_imread_use_dask_false(resources_dir):
    # Load image as delayed dask array then as numpy array
    # Check computed task count
    with dask_utils.cluster_and_client(processes=False) as (cluster, client):
        # Get filepath
        f = resources_dir / BIG_OME_FILE

        # Check that there are no open file pointers after init
        proc = Process()
        assert str(f) not in [f.path for f in proc.open_files()]

        # Check that a client does exist
        get_client()

        # Don't use dask for reads
        use_dask(False)

        # Read image without dask
        img = AICSImage(f)
        assert img.data.shape == (3, 1, 3, 5, 325, 475)

        # Check that the file was read with base reader then rechunked with dask
        # Normally the task count for this file is 90
        assert len(optimize(img.dask_data)[0].__dask_graph__()) == 3

    # Check that there are no open file pointers after basics
    assert str(f) not in [f.path for f in proc.open_files()]

Exemple #14

0

Afficher le fichier

def test_fuse_roots():
    x = da.ones(10, chunks=(2,))
    y = da.zeros(10, chunks=(2,))
    z = (x + 1) + (2 * y ** 2)
    (zz,) = dask.optimize(z)
    # assert len(zz.dask) == 5
    assert sum(map(dask.istask, zz.dask.values())) == 5  # there are some aliases
    assert_eq(zz, z)

Exemple #15

0

Afficher le fichier

def initbf(varr: xr.DataArray, A: xr.DataArray,
           C: xr.DataArray) -> Tuple[xr.DataArray, xr.DataArray]:
    """
    Initialize background terms given spatial and temporal components of cells.

    A movie representation (with dimensions "height" "width" and "frame") of
    estimated cell activities are computed as the product between the spatial
    components matrix and the temporal components matrix of cells over the
    "unit_id" dimension. Then the residule movie is computed by subtracting the
    estimated cell activity movie from the input movie. Then the spatial
    footprint of background `b` is the mean of the residule movie over "frame"
    dimension, and the temporal component of background `f` is the mean of the
    residule movie over "height" and "width" dimensions.

    Parameters
    ----------
    varr : xr.DataArray
        Input movie data. Should have dimensions ("frame", "height", "width").
    A : xr.DataArray
        Estimation of spatial footprints of cells. Should have dimensions
        ("unit_id", "height", "width").
    C : xr.DataArray
        Estimation of temporal activities of cells. Should have dimensions
        ("unit_id", "frame").

    Returns
    -------
    b : xr.DataArray
        Initial estimation of the spatial footprint of background. Has
        dimensions ("height", "width").
    f : xr.DataArray
        Initial estimation of the temporal activity of background. Has dimension
        "frame".
    """
    A = A.data.map_blocks(sparse.COO).compute()
    Yb = (varr - darr.tensordot(C, A, axes=[(0, ), (0, )])).clip(0)
    b = Yb.mean("frame")
    f = Yb.mean(["height", "width"])
    arr_opt = fct.partial(custom_arr_optimize,
                          rename_dict={"tensordot": "tensordot_restricted"})
    with da.config.set(array_optimize=arr_opt):
        b = da.optimize(b)[0]
        f = da.optimize(f)[0]
    b, f = da.compute([b, f])[0]
    return b, f

Exemple #16

0

Afficher le fichier

Fichier : test_graph.py Projet : haraldschilly/distributed

def test_basic(client: Client):
    df = dd.demo.make_timeseries(freq="15D", partition_freq="30D")
    shuffled = shuffle(df, "id")

    (opt,) = dask.optimize(shuffled)
    assert isinstance(hlg_layer_topological(opt.dask, 1), Blockwise)
    # setup -> blockwise -> barrier -> unpack -> drop_by_shallow_copy
    assert len(opt.dask.layers) == 5

    dd.utils.assert_eq(shuffled, df.shuffle("id", shuffle="tasks"), scheduler=client)

Exemple #17

0

Afficher le fichier

Fichier : cg_variants.py Projet : bungun/scs-dask

def cg_iterate(A, state, persist=True):
    ox, or_, op = state
    Ap = A.dot(op)
    alpha = or_.dot(or_) / op.dot(Ap)
    x = ox + alpha * op
    r = or_ - alpha * Ap
    p = r + op * r.dot(r) / or_.dot(or_)
    x, r, p = dask.optimize(x, r, p)
    if persist:
        x, r, p = dask.persist(x, r, p, optimize_graph=False)
    return x, r, p

Exemple #18

0

Afficher le fichier

 def optimize(self, *args, **kwargs):
     """ Run optimisation of graphs
     
     Only does something when using dask
     
     :param args:
     :param kwargs:
     :return:
     """
     if self.using_dask and self._optimize:
         return optimize(*args, **kwargs)[0]
     else:
         return args[0]

Exemple #19

0

Afficher le fichier

def random_mode(dataset_file: object) -> dict:
    dd, data = independent_mode(dataset_file, alone=False)
    # After running independent attribute mode, 1) make all distributions uniform; 2) set missing rate to zero.
    for attr in dd['meta']['attrs']:
        distribution = dd['distribution']['probs'][attr]
        uniform_distribution = np.ones_like(distribution)
        uniform_distribution = utils.normalize_given_distribution(
            uniform_distribution).tolist()
        dd['distribution']['probs'][attr] = uniform_distribution
        dd['missing_rate'][attr] = 0
    dd = dask.optimize(dd)[0]
    dd = dask.compute(dd)[0]
    return dd, data

Exemple #20

0

Afficher le fichier

def persist_with_trace(*args):
    """Do Dask persist(), but with added Eliot tracing.

    Known issues:

        1. Retries will confuse Eliot.  Probably need different
           distributed-tree mechanism within Eliot to solve that.
    """
    # 1. Create top-level Eliot Action:
    with start_action(action_type="dask:persist"):
        # In order to reduce logging verbosity, add logging to the already
        # optimized graph:
        optimized = optimize(*args, optimizations=[_add_logging])
        return persist(*optimized, optimize_graph=False)

Exemple #21

0

Afficher le fichier

Fichier : dask.py Projet : dcherian/dcpy

def ntasks(obj, optimize=False):
    """Returns length of dask graph.

    Parameters
    ----------
    optimize: bool, optional
        Optimize graph?

    Returns
    -------
    number of tasks, int
    """
    if optimize:
        return len(dask.optimize(obj)[0].__dask_graph__())
    else:
        return len(obj.__dask_graph__())

Exemple #22

0

Afficher le fichier

Fichier : cg_variants.py Projet : bungun/scs-dask

def cg_calcs_proto(shape, chunks, dtype, dsk, key, optimize=False, **options):
    if options.pop('finish', False):
        dsk_final = dict()
        key_final = options.pop('name', 'cg-output')
        for i in range(len(chunks[0])):
            dsk_final[('x-' + key_final, i)] = ('x-' + key, i)
            dsk_final[('r-' + key_final, i)] = ('r-' + key, i)
            dsk_final[('p-' + key_final, i)] = ('p-' + key, i)
            dsk = dask.sharedict.merge(dsk, dsk_final)
        key = key_final
    x = da.Array(dsk, 'x-' + key, shape=shape, chunks=chunks, dtype=dtype)
    r = da.Array(dsk, 'r-' + key, shape=shape, chunks=chunks, dtype=dtype)
    p = da.Array(dsk, 'p-' + key, shape=shape, chunks=chunks, dtype=dtype)
    if optimize:
        (x, r, p) = dask.optimize(x, r, p)
    (x, r, p) = dask.persist(x, r, p, optimize_graph=False, traverse=False)
    (res,) = dask.compute(da.linalg.norm(r))
    return x, r, p, res

Exemple #23

0

Afficher le fichier

Fichier : test_aics_image_delayed.py Projet : fabian19941220-gmail-com/aicsimageio

def test_imread_from_delayed(resources_dir, filename, expected_shape, expected_tasks):
    # Load image as delayed dask array then as numpy array
    # Check computed task count
    with dask_utils.cluster_and_client(processes=False) as (cluster, client):
        # Get filepath
        f = resources_dir / filename

        # Check that there are no open file pointers after init
        proc = Process()
        assert str(f) not in [f.path for f in proc.open_files()]

        # Read image
        img = imread_dask(f)
        assert img.shape == expected_shape
        assert len(optimize(img)[0].__dask_graph__()) == expected_tasks

    # Check that there are no open file pointers after basics
    assert str(f) not in [f.path for f in proc.open_files()]

Exemple #24

0

Afficher le fichier

Fichier : test_atop.py Projet : mrocklin/dask

def test_atop_non_atop_output():
    x = da.ones(10, chunks=(5,))
    y = (((x + 1) + 2) + 3)
    w = y.sum()
    z = (((y * 2) * 3) * 4)

    z_top_before = tuple(z.dask.dicts[z.name].indices)
    (zz,) = dask.optimize(z)
    z_top_after = tuple(z.dask.dicts[z.name].indices)
    assert z_top_before == z_top_after, "z_top mutated"

    dsk = optimize_atop(z.dask, keys=list(dask.core.flatten(z.__dask_keys__())))
    assert isinstance(dsk, dask.sharedict.ShareDict)
    assert len([layer for layer in dsk.dicts.values() if isinstance(layer, TOP)]) == 1

    dsk = optimize_atop(dask.sharedict.merge(w.dask, z.dask),
                        keys=list(dask.core.flatten([w.__dask_keys__(), z.__dask_keys__()])))
    assert isinstance(dsk, dask.sharedict.ShareDict)
    assert len([layer for layer in z.dask.dicts.values() if isinstance(layer, TOP)]) >= 1

Exemple #25

0

Afficher le fichier

Fichier : test_atop.py Projet : yliapis/dask

def test_blockwise_non_blockwise_output():
    x = da.ones(10, chunks=(5,))
    y = (((x + 1) + 2) + 3)
    w = y.sum()
    z = (((y * 2) * 3) * 4)

    z_top_before = tuple(z.dask.dicts[z.name].indices)
    (zz,) = dask.optimize(z)
    z_top_after = tuple(z.dask.dicts[z.name].indices)
    assert z_top_before == z_top_after, "z_top mutated"

    dsk = optimize_blockwise(z.dask, keys=list(dask.core.flatten(z.__dask_keys__())))
    assert isinstance(dsk, HighLevelGraph)
    assert len([layer for layer in dsk.dicts.values() if isinstance(layer, Blockwise)]) == 1

    dsk = optimize_blockwise(HighLevelGraph.merge(w.dask, z.dask),
                             keys=list(dask.core.flatten([w.__dask_keys__(), z.__dask_keys__()])))
    assert isinstance(dsk, HighLevelGraph)
    assert len([layer for layer in z.dask.dicts.values() if isinstance(layer, Blockwise)]) >= 1

Exemple #26

0

Afficher le fichier

Fichier : test_distributed.py Projet : rubenvdg/dask

def test_blockwise_different_optimization(c):
    # Regression test for incorrect results due to SubgraphCallable.__eq__
    # not correctly handling subgraphs with the same outputs and arity but
    # different internals (GH-7632). The bug is triggered by distributed
    # because it uses a function cache.
    da = pytest.importorskip("dask.array")
    np = pytest.importorskip("numpy")

    u = da.from_array(np.arange(3))
    v = da.from_array(np.array([10 + 2j, 7 - 3j, 8 + 1j]))
    cv = v.conj()
    x = u * cv
    (cv, ) = dask.optimize(cv)
    y = u * cv
    expected = np.array([0 + 0j, 7 + 3j, 16 - 2j])
    with dask.config.set({"optimization.fuse.active": False}):
        x_value = x.compute()
        y_value = y.compute()
    np.testing.assert_equal(x_value, expected)
    np.testing.assert_equal(y_value, expected)

Exemple #27

0

Afficher le fichier

def independent_mode(dataset_file: object, alone=True) -> dict:
    dd = {}
    datos = {}
    datos['data'] = read_csv(dataset_file)
    datos['dropna'] = dropna(datos['data'])
    dd['meta'] = init_meta(datos, describer)
    dd['meta']['datatypes'] = describer['datatypes']
    dd['meta']['categories'] = describer['categories']
    dd['meta']['key_candidates'] = describer['key_candidates']
    dd['mins'] = attributes.get_mins(datos, dd, describer)
    dd['maxes'] = attributes.get_maxes(datos, dd, describer)
    dd['missing_rates'] = attributes.get_missing_rates(datos, dd, describer)
    dd['distribution'] = {}
    dd['distribution']['probs'], dd['distribution'][
        'bins'] = attributes.get_distribution(datos, dd, describer)
    #First noise injection for differential privacy
    dd['distribution']['probs'] = attributes.inject_laplace_noise(datos, dd)
    if alone:
        dd = dask.optimize(dd)[0]
        dd = dask.compute(dd)[0]
    return dd, datos

Exemple #28

0

Afficher le fichier

Fichier : test_extend.py Projet : xhochy/kartothek

def test_dask_bag_fusing(driver, function_store, driver_name, skip_eager,
                         existing_cube):
    """
    See kartothek/tests/io/cube/test_build.py::test_dask_bag_fusing
    """
    if driver_name == "dask_dataframe":
        pytest.skip("not relevant for dask.dataframe")

    partition_size = 1 if driver_name == "dask_bag_bs1" else 3
    n_partitions = 4

    dfs = [{
        "a": pd.DataFrame({
            "x": [2 * i, 2 * i + 1],
            "p": i,
            "v3": 42
        }),
        "b": pd.DataFrame({
            "x": [2 * i, 2 * i + 1],
            "p": i,
            "v4": 1337
        }),
    } for i in range(partition_size * n_partitions)]

    cube = Cube(
        dimension_columns=["x"],
        partition_columns=["p"],
        uuid_prefix="cube",
        seed_dataset="source",
    )

    bag = db.from_sequence(dfs, partition_size=partition_size).map(
        _count_execution_to_store, store=function_store)
    bag = extend_cube_from_bag(data=bag,
                               cube=cube,
                               store=function_store,
                               ktk_cube_dataset_ids=["a", "b"])
    dct = dask.optimize(bag)[0].__dask_graph__()
    tasks = {k for k, v in dct.items() if dask.core.istask(v)}
    assert len(tasks) == (n_partitions + 1)

Exemple #29

0

Afficher le fichier

Fichier : converter.py Projet : qinxuye/mars

def convert_dask_collection(dc):
    """
    Convert dask collection object into mars.core.Object via remote API

    Parameters
    ----------
    dc: dask collection
        Dask collection object to be converted.

    Returns
    -------
    Object
        Mars Object.
    """
    if not is_dask_collection(dc):
        raise TypeError(
            f"'{type(dc).__name__}' object is not a valid dask collection")

    dc.__dask_graph__().validate()
    dsk = optimize(dc)[0].__dask_graph__()

    first_key = next(iter(dsk.keys()))
    if isinstance(first_key, str):
        key = [first_key]
    elif isinstance(first_key, tuple):
        key = sorted([i for i in dsk.keys() if i[0] == first_key[0]],
                     key=lambda x: x[1])
    else:
        raise ValueError(
            f"Dask collection object seems be broken, with unexpected key type:'{type(first_key).__name__}'"
        )
    res = reduce(mars_dask_get(dsk, [key]))
    if isinstance(dc, Bag):
        return spawn(lambda x: list(x[0][0]), args=(res, ))
    else:
        return res

Exemple #30

0

Afficher le fichier

Fichier : cellphy_reader.py Projet : zeroth/napari-cellphy-reader

def reader_function(path):
    """Take a path or list of paths and return a list of LayerData tuples.

    Readers are expected to return data as a list of tuples, where each tuple
    is (data, [add_kwargs, [layer_type]]), "add_kwargs" and "layer_type" are
    both optional.

    Parameters
    ----------
    path : str or list of str
        Path to file, or list of paths.

    Returns
    -------
    layer_data : list of tuples
        A list of LayerData tuples where each tuple in the list contains
        (data, metadata, layer_type), where data is a numpy array, metadata is
        a dict of keyword arguments for the corresponding viewer.add_* method
        in napari, and layer_type is a lower-case string naming the type of layer.
        Both "meta", and "layer_type" are optional. napari will default to
        layer_type=="image" if not provided
    """
    print("reading file ", path)
    aics_img = AICSImage(path)

    # dims are normaly in "STCZYX"
    number_of_channels = aics_img.size_c
    number_of_time_points = aics_img.size_t
    nz = aics_img.size_z
    ny = aics_img.size_y
    nx = aics_img.size_x
    name_of_channels = aics_img.get_channel_names()
    pixel_x, pixel_y, pixel_z = aics_img.get_physical_pixel_size()
    scale = [1, pixel_z, pixel_y, pixel_x]

    print("number_of_channels", number_of_channels)
    print("number_of_time_points", number_of_time_points)
    print("name_of_channels", name_of_channels)
    print("scale", scale)
    print("nz", nz)
    layer_list = []
    channel_dict = {}
    # for channel in name_of_channels:
    #     channel_dict[channel] = {}

    if number_of_channels > 1:
        print("number_of_channels > 1")
        for cindex, channel_name in enumerate(name_of_channels):
            if number_of_time_points > 1:
                print("number_of_time_points > 1")
                if nz > 1:
                    arr = da.stack([
                        aics_img.get_image_dask_data('ZYX',
                                                     S=0,
                                                     C=cindex,
                                                     T=tindex)
                        for tindex in range(number_of_time_points)
                    ])
                else:
                    arr = da.stack([
                        aics_img.get_image_dask_data('YX',
                                                     S=0,
                                                     C=cindex,
                                                     T=tindex,
                                                     Z=0)
                        for tindex in range(number_of_time_points)
                    ])
                    scale = [1, pixel_y, pixel_x]
            else:

                if nz > 1:
                    arr = aics_img.get_image_dask_data('ZYX',
                                                       S=0,
                                                       C=cindex,
                                                       T=0)
                    scale = [pixel_z, pixel_y, pixel_x]
                else:
                    print("number_of_time_points < 1")
                    print("nz < 1")
                    print("cindex: ", cindex)
                    print("channel_name: ", channel_name)
                    arr = aics_img.get_image_dask_data('YX',
                                                       S=0,
                                                       C=cindex,
                                                       T=0,
                                                       Z=0)

                    scale = [pixel_y, pixel_x]
                    print("arr.shape", arr.shape)

            channel_dict[channel_name] = {
                "data": dask.optimize(arr)[0],
                "colormap": color_maps[cindex % len(color_maps)]
            }

    else:
        if number_of_time_points > 1:
            if nz > 1:
                arr = da.stack([
                    aics_img.get_image_dask_data('ZYX', S=0, C=0, T=tindex)
                    for tindex in range(number_of_time_points)
                ])
            else:
                arr = da.stack([
                    aics_img.get_image_dask_data('YX', S=0, C=0, T=tindex, Z=0)
                    for tindex in range(number_of_time_points)
                ])
                scale = [1, pixel_y, pixel_x]
        else:
            if nz > 1:
                arr = aics_img.get_image_dask_data('ZYX', S=0, C=0, T=0)
                scale = [pixel_z, pixel_y, pixel_x]
            else:
                arr = aics_img.get_image_dask_data('YX', S=0, C=0, T=0, Z=0)
                scale = [pixel_y, pixel_x]

            channel_dict[channel_name] = {
                "data": dask.optimize(arr)[0],
                "colormap": color_maps[0]
            }

    for channel_name, channel in channel_dict.items():
        print("creating layer channel_name", channel_name)
        add_kwargs = {
            "name": channel_name,
            "blending": 'additive',
            "rendering": "mip",
            "scale": scale,
            "colormap": channel['colormap']
        }
        print("channel['data'].shape", channel['data'].shape)
        layer_list.append((
            channel['data'],  #data
            add_kwargs,  # kwargs
            "image"  # layer type
        ))

    return layer_list

Exemple #31

0

Afficher le fichier

Fichier : test_delayed.py Projet : zhuomingliang/dask

def test_delayed_optimize():
    x = Delayed("b", {"a": 1, "b": (inc, "a"), "c": (inc, "b")})
    (x2, ) = dask.optimize(x)
    # Delayed's __dask_optimize__ culls out 'c'
    assert sorted(x2.dask.keys()) == ["a", "b"]

Exemple #32

0

Afficher le fichier

def est_motion_part(varr: darr.Array,
                    npart: int,
                    chunk_nfm: int,
                    alt_error=5,
                    **kwargs) -> Tuple[darr.Array, darr.Array]:
    """
    Construct dask graph for the recursive motion estimation algorithm.

    Parameters
    ----------
    varr : darr.Array
        Input dask array representing movie data.
    npart : int
        Number of frames/chunks to combine for the recursive algorithm.
    chunk_nfm : int
        Number of frames in each parallel task.
    alt_error : int, optional
        Error threshold between estimated shifts from two alternative methods,
        specified in pixels. By default `5`.

    Returns
    -------
    temps : darr.Array
        Registration template for the movie.
    shifts : darr.Array
        Estimated motion.
    See Also
    --------
    estimate_motion
    """
    if chunk_nfm is None:
        chunk_nfm = varr.chunksize[0]
    varr = varr.rechunk((chunk_nfm, None, None))
    arr_opt = fct.partial(custom_arr_optimize,
                          keep_patterns=["^est_motion_chunk"])
    if kwargs.get("mesh_size", None):
        param = get_bspline_param(varr[0].compute(), kwargs["mesh_size"])
    tmp_ls = []
    sh_ls = []
    for blk in varr.blocks:
        res = da.delayed(est_motion_chunk)(blk,
                                           None,
                                           alt_error=alt_error,
                                           npart=npart,
                                           **kwargs)
        if alt_error:
            tmp = darr.from_delayed(res[0],
                                    shape=(3, blk.shape[1], blk.shape[2]),
                                    dtype=blk.dtype)
        else:
            tmp = darr.from_delayed(res[0],
                                    shape=(blk.shape[1], blk.shape[2]),
                                    dtype=blk.dtype)
        if kwargs.get("mesh_size", None):
            sh = darr.from_delayed(
                res[1],
                shape=(blk.shape[0], 2, int(param[1]), int(param[0])),
                dtype=float,
            )
        else:
            sh = darr.from_delayed(res[1],
                                   shape=(blk.shape[0], 2),
                                   dtype=float)
        tmp_ls.append(tmp)
        sh_ls.append(sh)
    with da.config.set(array_optimize=arr_opt):
        temps = da.optimize(darr.stack(tmp_ls, axis=0))[0]
        shifts = da.optimize(darr.concatenate(sh_ls, axis=0))[0]
    while temps.shape[0] > 1:
        tmp_ls = []
        sh_ls = []
        for idx in np.arange(0, temps.numblocks[0], npart):
            tmps = temps.blocks[idx:idx + npart]
            sh_org = shifts.blocks[idx:idx + npart]
            sh_org_ls = [sh_org.blocks[i] for i in range(sh_org.numblocks[0])]
            res = da.delayed(est_motion_chunk)(tmps,
                                               sh_org_ls,
                                               alt_error=alt_error,
                                               npart=npart,
                                               **kwargs)
            if alt_error:
                tmp = darr.from_delayed(res[0],
                                        shape=(3, tmps.shape[1],
                                               tmps.shape[2]),
                                        dtype=tmps.dtype)
            else:
                tmp = darr.from_delayed(res[0],
                                        shape=(tmps.shape[1], tmps.shape[2]),
                                        dtype=tmps.dtype)
            sh_new = darr.from_delayed(res[1],
                                       shape=sh_org.shape,
                                       dtype=sh_org.dtype)
            tmp_ls.append(tmp)
            sh_ls.append(sh_new)
        temps = darr.stack(tmp_ls, axis=0)
        shifts = darr.concatenate(sh_ls, axis=0)
    return temps, shifts

Exemple #33

0

Afficher le fichier

Fichier : io.py Projet : jsignell/dask-benchmarks

 def time_optimize_getitem(self):
     df = dd.read_parquet("data.parquet", engine="pyarrow")
     dask.optimize(df)

Exemple #34

0

Afficher le fichier

Fichier : utilities.py Projet : evermountaintech/minian

def load_videos(
    vpath: str,
    pattern=r"msCam[0-9]+\.avi$",
    dtype: Union[str, type] = np.float64,
    downsample: Optional[dict] = None,
    downsample_strategy="subset",
    post_process: Optional[Callable] = None,
) -> xr.DataArray:
    """
    Load multiple videos in a folder and return a `xr.DataArray`.

    Load videos from the folder specified in `vpath` and according to the regex
    `pattern`, then concatenate them together and return a `xr.DataArray`
    representation of the concatenated videos. The videos are sorted by
    filenames with :func:`natsort.natsorted` before concatenation. Optionally
    the data can be downsampled, and the user can pass in a custom callable to
    post-process the result.

    Parameters
    ----------
    vpath : str
        The path containing the videos to load.
    pattern : regexp, optional
        The regexp matching the filenames of the videso. By default
        `r"msCam[0-9]+\.avi$"`, which can be interpreted as filenames starting
        with "msCam" followed by at least a number, and then followed by ".avi".
    dtype : Union[str, type], optional
        Datatype of the resulting DataArray, by default `np.float64`.
    downsample : dict, optional
        A dictionary mapping dimension names to an integer downsampling factor.
        The dimension names should be one of "height", "width" or "frame". By
        default `None`.
    downsample_strategy : str, optional
        How the downsampling should be done. Only used if `downsample` is not
        `None`. Either `"subset"` where data points are taken at an interval
        specified in `downsample`, or `"mean"` where mean will be taken over
        data within each interval. By default `"subset"`.
    post_process : Callable, optional
        An user-supplied custom function to post-process the resulting array.
        Four arguments will be passed to the function: the resulting DataArray
        `varr`, the input path `vpath`, the list of matched video filenames
        `vlist`, and the list of DataArray before concatenation `varr_list`. The
        function should output another valide DataArray. In other words, the
        function should have signature `f(varr: xr.DataArray, vpath: str, vlist:
        List[str], varr_list: List[xr.DataArray]) -> xr.DataArray`. By default
        `None`

    Returns
    -------
    varr : xr.DataArray
        The resulting array representation of the input movie. Should have
        dimensions ("frame", "height", "width").

    Raises
    ------
    FileNotFoundError
        if no files under `vpath` match the pattern `pattern`
    ValueError
        if the matched files does not have extension ".avi", ".mkv" or ".tif"
    NotImplementedError
        if `downsample_strategy` is not "subset" or "mean"
    """
    vpath = os.path.normpath(vpath)
    vlist = natsorted(
        [vpath + os.sep + v for v in os.listdir(vpath) if re.search(pattern, v)]
    )
    if not vlist:
        raise FileNotFoundError(
            "No data with pattern {}"
            " found in the specified folder {}".format(pattern, vpath)
        )
    print("loading {} videos in folder {}".format(len(vlist), vpath))

    file_extension = os.path.splitext(vlist[0])[1]
    if file_extension in (".avi", ".mkv"):
        movie_load_func = load_avi_lazy
    elif file_extension == ".tif":
        movie_load_func = load_tif_lazy
    else:
        raise ValueError("Extension not supported.")

    varr_list = [movie_load_func(v) for v in vlist]
    varr = darr.concatenate(varr_list, axis=0)
    varr = xr.DataArray(
        varr,
        dims=["frame", "height", "width"],
        coords=dict(
            frame=np.arange(varr.shape[0]),
            height=np.arange(varr.shape[1]),
            width=np.arange(varr.shape[2]),
        ),
    )
    if dtype:
        varr = varr.astype(dtype)
    if downsample:
        if downsample_strategy == "mean":
            varr = varr.coarsen(**downsample, boundary="trim", coord_func="min").mean()
        elif downsample_strategy == "subset":
            varr = varr.isel(**{d: slice(None, None, w) for d, w in downsample.items()})
        else:
            raise NotImplementedError("unrecognized downsampling strategy")
    varr = varr.rename("fluorescence")
    if post_process:
        varr = post_process(varr, vpath, vlist, varr_list)
    arr_opt = fct.partial(custom_arr_optimize, keep_patterns=["^load_avi_ffmpeg"])
    with da.config.set(array_optimize=arr_opt):
        varr = da.optimize(varr)[0]
    return varr