コード例 #1
0
ファイル: test_base.py プロジェクト: sighingnow/dask
def test_replace_name_in_keys():
    assert replace_name_in_key("foo", "bar") == "bar"
    assert replace_name_in_key("foo-123", "bar-456") == "bar-456"
    h1 = object()  # Arbitrary hashables
    h2 = object()
    assert replace_name_in_key(("foo-123", h1, h2), "bar") == ("bar", h1, h2)
    with pytest.raises(TypeError):
        replace_name_in_key(1, "foo")
コード例 #2
0
def _build_map_layer(
        func: Callable,
        prev_name: str,
        new_name: str,
        collection,
        dependencies: tuple[Delayed, ...] = (),
) -> Layer:
    """Apply func to all keys of collection. Create a Blockwise layer whenever possible;
    fall back to MaterializedLayer otherwise.

    Parameters
    ----------
    func
        Callable to be invoked on the graph node
    prev_name : str
        name of the layer to map from; in case of dask base collections, this is the
        collection name. Note how third-party collections, e.g. xarray.Dataset, can
        have multiple names.
    new_name : str
        name of the layer to map to
    collection
        Arbitrary dask collection
    dependencies
        Zero or more Delayed objects, which will be passed as arbitrary variadic args to
        func after the collection's chunk
    """
    if _can_apply_blockwise(collection):
        # Use a Blockwise layer
        try:
            numblocks = collection.numblocks
        except AttributeError:
            numblocks = (collection.npartitions, )
        indices = tuple(i for i, _ in enumerate(numblocks))
        kwargs = {
            "_deps": [d.key for d in dependencies]
        } if dependencies else {}

        return blockwise(
            func,
            new_name,
            indices,
            prev_name,
            indices,
            numblocks={prev_name: numblocks},
            dependencies=dependencies,
            **kwargs,
        )
    else:
        # Delayed, bag.Item, dataframe.core.Scalar, or third-party collection;
        # fall back to MaterializedLayer
        dep_keys = tuple(d.key for d in dependencies)
        return MaterializedLayer({
            replace_name_in_key(k, {prev_name: new_name}): (func, k) + dep_keys
            for k in flatten(collection.__dask_keys__())
            if get_name_from_key(k) == prev_name
        })
コード例 #3
0
ファイル: delayed.py プロジェクト: jakirkham/dask
 def _rebuild(self, dsk, *, rename=None):
     key = replace_name_in_key(self.key, rename) if rename else self.key
     if isinstance(dsk, HighLevelGraph) and len(dsk.layers) == 1:
         # FIXME Delayed is currently the only collection type that supports both high- and low-level graphs.
         # The HLG output of `optimize` will have a layer name that doesn't match `key`.
         # Remove this when Delayed is HLG-only (because `optimize` will only be passed HLGs, so it won't have
         # to generate random layer names).
         layer = next(iter(dsk.layers))
     else:
         layer = None
     return Delayed(key, dsk, self._length, layer=layer)
コード例 #4
0
 def _rebuild(dsk, keys, *, rename=None):
     if rename:
         keys = [replace_name_in_key(key, rename) for key in keys]
     return Tuple(dsk, keys)
コード例 #5
0
def test_replace_name_in_keys():
    assert replace_name_in_key("foo", {}) == "foo"
    assert replace_name_in_key("foo", {"bar": "baz"}) == "foo"
    assert replace_name_in_key("foo", {"foo": "bar", "baz": "asd"}) == "bar"
    assert replace_name_in_key("foo-123", {"foo-123": "bar-456"}) == "bar-456"
    h1 = object()  # Arbitrary hashables
    h2 = object()
    assert replace_name_in_key(("foo-123", h1, h2), {"foo-123": "bar"}) == (
        "bar",
        h1,
        h2,
    )
    with pytest.raises(TypeError):
        replace_name_in_key(1, {})
    with pytest.raises(TypeError):
        replace_name_in_key((), {})
    with pytest.raises(TypeError):
        replace_name_in_key((1, ), {})
コード例 #6
0
ファイル: test_base.py プロジェクト: sighingnow/dask
 def _rebuild(dsk, keys, name=None):
     if name is not None:
         keys = [replace_name_in_key(key, name) for key in keys]
     return Tuple(dsk, keys)