Example #1
0
def test_dispatch_variadic_on_first_argument():
    foo = Dispatch()
    foo.register(int, lambda a, b: a + b)
    foo.register(float, lambda a, b: a - b)

    assert foo(1, 2) == 3
    assert foo(1., 2.) == -1
Example #2
0
def test_dispatch():
    foo = Dispatch()
    foo.register(int, lambda a: a + 1)
    foo.register(float, lambda a: a - 1)
    foo.register(tuple, lambda a: tuple(foo(i) for i in a))
    foo.register(object, lambda a: a)

    class Bar(object):
        pass
    b = Bar()
    assert foo(1) == 2
    assert foo.dispatch(int)(1) == 2
    assert foo(1.0) == 0.0
    assert foo(b) == b
    assert foo((1, 2.0, b)) == (2, 1.0, b)
Example #3
0
def test_dispatch_lazy():
    # this tests the recursive component of dispatch
    foo = Dispatch()
    foo.register(int, lambda a: a)

    import decimal

    # keep it outside lazy dec for test
    def foo_dec(a):
        return a + 1

    @foo.register_lazy("decimal")
    def register_decimal():
        import decimal
        foo.register(decimal.Decimal, foo_dec)

    # This test needs to be *before* any other calls
    assert foo.dispatch(decimal.Decimal) == foo_dec
    assert foo(decimal.Decimal(1)) == decimal.Decimal(2)
    assert foo(1) == 1
Example #4
0
def test_dispatch_variadic_on_first_argument():
    foo = Dispatch()
    foo.register(int, lambda a, b: a + b)
    foo.register(float, lambda a, b: a - b)

    assert foo(1, 2) == 3
    assert foo(1.0, 2.0) == -1
Example #5
0
def test_dispatch():
    foo = Dispatch()
    foo.register(int, lambda a: a + 1)
    foo.register(float, lambda a: a - 1)
    foo.register(tuple, lambda a: tuple(foo(i) for i in a))

    def f(a):
        """ My Docstring """
        return a

    foo.register(object, f)

    class Bar(object):
        pass
    b = Bar()
    assert foo(1) == 2
    assert foo.dispatch(int)(1) == 2
    assert foo(1.0) == 0.0
    assert foo(b) == b
    assert foo((1, 2.0, b)) == (2, 1.0, b)

    assert foo.__doc__ == f.__doc__
Example #6
0
def test_dispatch():
    foo = Dispatch()
    foo.register(int, lambda a: a + 1)
    foo.register(float, lambda a: a - 1)
    foo.register(tuple, lambda a: tuple(foo(i) for i in a))
    foo.register(object, lambda a: a)

    class Bar(object):
        pass

    b = Bar()
    assert foo(1) == 2
    assert foo.dispatch(int)(1) == 2
    assert foo(1.0) == 0.0
    assert foo(b) == b
    assert foo((1, 2.0, b)) == (2, 1.0, b)
Example #7
0
"""
Dispatch in dask.array.

Also see backends.py
"""

from dask.utils import Dispatch

concatenate_lookup = Dispatch("concatenate")
tensordot_lookup = Dispatch("tensordot")
einsum_lookup = Dispatch("einsum")
empty_lookup = Dispatch("empty")
divide_lookup = Dispatch("divide")
percentile_lookup = Dispatch("percentile")
Example #8
0
def test_dispatch_kwargs():
    foo = Dispatch()
    foo.register(int, lambda a, b=10: a + b)

    assert foo(1, b=20) == 21
Example #9
0
"""
Support for pandas ExtensionArray in dask.dataframe.

See :ref:`extensionarrays` for more.
"""
from dask.dataframe.accessor import (
    register_dataframe_accessor,
    register_index_accessor,
    register_series_accessor,
)
from dask.utils import Dispatch

make_array_nonempty = Dispatch("make_array_nonempty")
make_scalar = Dispatch("make_scalar")

__all__ = [
    "make_array_nonempty",
    "make_scalar",
    "register_dataframe_accessor",
    "register_index_accessor",
    "register_series_accessor",
]
Example #10
0
def test_dispatch_kwargs():
    foo = Dispatch()
    foo.register(int, lambda a, b=10: a + b)

    assert foo(1, b=20) == 21
Example #11
0
from __future__ import absolute_import, division, print_function

from dask.utils import Dispatch

is_device_object = Dispatch(name="is_device_object")


@is_device_object.register(object)
def is_device_object_default(o):
    return hasattr(o, "__cuda_array_interface__")


@is_device_object.register(list)
@is_device_object.register(tuple)
@is_device_object.register(set)
@is_device_object.register(frozenset)
def is_device_object_python_collection(seq):
    return any([is_device_object(s) for s in seq])


@is_device_object.register_lazy("cudf")
def register_cudf():
    import cudf

    @is_device_object.register(cudf.DataFrame)
    def is_device_object_cudf_dataframe(df):
        return True

    @is_device_object.register(cudf.Series)
    def is_device_object_cudf_series(s):
        return True
Example #12
0
from dask.sizeof import sizeof
from dask.utils import Dispatch

dispatch = Dispatch(name="get_device_memory_objects")


def get_device_memory_objects(obj) -> set:
    """ Find all CUDA device objects in `obj`

    Search through `obj` and find all CUDA device objects, which are objects
    that either are known to `dispatch` or implement `__cuda_array_interface__`.

    Notice, the CUDA device objects must be hashable.

    Parameters
    ----------
    obj: Any
        Object to search through

    Returns
    -------
    ret: set
        Set of CUDA device memory objects
    """
    return set(dispatch(obj))


@dispatch.register(object)
def get_device_memory_objects_default(obj):
    if hasattr(obj, "_obj_pxy"):
        if obj._obj_pxy["serializers"] is None:
Example #13
0
from typing import Any, Dict, List

from dask.utils import Dispatch

from .proxy_object import ProxyObject, asproxy

dispatch = Dispatch(name="proxify_device_objects")


def proxify_device_objects(
    obj: Any,
    proxied_id_to_proxy: Dict[int, ProxyObject],
    found_proxies: List[ProxyObject],
):
    """ Wrap device objects in ProxyObject

    Search through `obj` and wraps all CUDA device objects in ProxyObject.
    It uses `proxied_id_to_proxy` to make sure that identical CUDA device
    objects found in `obj` are wrapped by the same ProxyObject.

    Parameters
    ----------
    obj: Any
        Object to search through or wrap in a ProxyObject.
    proxied_id_to_proxy: Dict[int, ProxyObject]
        Dict mapping the id() of proxied objects (CUDA device objects) to
        their proxy and is updated with all new proxied objects found in `obj`.
    found_proxies: List[ProxyObject]
        List of found proxies in `obj`. Notice, this includes all proxies found,
        including those already in `proxied_id_to_proxy`.
Example #14
0
import itertools
import random
import sys
from array import array

from dask.utils import Dispatch

sizeof = Dispatch(name="sizeof")


@sizeof.register(object)
def sizeof_default(o):
    return sys.getsizeof(o)


@sizeof.register(bytes)
@sizeof.register(bytearray)
def sizeof_bytes(o):
    return len(o)


@sizeof.register(memoryview)
def sizeof_memoryview(o):
    return o.nbytes


@sizeof.register(array)
def sizeof_array(o):
    return o.itemsize * len(o)

Example #15
0
from __future__ import print_function, division, absolute_import

import sys

from dask.utils import Dispatch

is_device_object = Dispatch(name='is_device_object')


@is_device_object.register(object)
def is_device_object_default(o):
    return hasattr(o, "__cuda_array_interface__")


@is_device_object.register(list)
@is_device_object.register(tuple)
@is_device_object.register(set)
@is_device_object.register(frozenset)
def is_device_object_python_collection(seq):
    return any([is_device_object(s) for s in seq])


@is_device_object.register_lazy("cudf")
def register_cudf():
    import cudf

    @is_device_object.register(cudf.DataFrame)
    def is_device_object_cudf_dataframe(df):
        return True

    @is_device_object.register(cudf.Series)
Example #16
0
def tokenize(*args, **kwargs):
    """Deterministic token

    >>> tokenize([1, 2, '3'])
    '7d6a880cd9ec03506eee6973ff551339'

    >>> tokenize('Hello') == tokenize('Hello')
    True
    """
    hasher = _md5(str(tuple(map(normalize_token, args))).encode())
    if kwargs:
        hasher.update(str(normalize_token(kwargs)).encode())
    return hasher.hexdigest()


normalize_token = Dispatch()
normalize_token.register(
    (
        int,
        float,
        str,
        bytes,
        type(None),
        type,
        slice,
        complex,
        type(Ellipsis),
        datetime.date,
    ),
    identity,
)
Example #17
0
"""
Dispatch in dask.dataframe.

Also see extension.py
"""

import pandas as pd

import dask.array as da
import dask.dataframe as dd
from dask.utils import Dispatch

make_meta_dispatch = Dispatch("make_meta_dispatch")
make_meta_obj = Dispatch("make_meta_obj")
meta_nonempty = Dispatch("meta_nonempty")
hash_object_dispatch = Dispatch("hash_object_dispatch")
group_split_dispatch = Dispatch("group_split_dispatch")
get_parallel_type = Dispatch("get_parallel_type")
categorical_dtype_dispatch = Dispatch("CategoricalDtype")
concat_dispatch = Dispatch("concat")
tolist_dispatch = Dispatch("tolist")
is_categorical_dtype_dispatch = Dispatch("is_categorical_dtype")
union_categoricals_dispatch = Dispatch("union_categoricals")
grouper_dispatch = Dispatch("grouper")


def concat(
    dfs,
    axis=0,
    join="outer",
    uniform=False,
Example #18
0
import logging
import sys

from dask.utils import Dispatch

try:  # PyPy does not support sys.getsizeof
    sys.getsizeof(1)
    getsizeof = sys.getsizeof
except (AttributeError, TypeError):  # Monkey patch
    getsizeof = lambda x: 100


logger = logging.getLogger(__name__)


sizeof = Dispatch()


@sizeof.register(object)
def sizeof_default(o):
    return getsizeof(o)


@sizeof.register(list)
@sizeof.register(tuple)
@sizeof.register(set)
@sizeof.register(frozenset)
def sizeof_python_collection(seq):
    return getsizeof(seq) + sum(map(sizeof, seq))

Example #19
0
def test_dispatch():
    foo = Dispatch()
    foo.register(int, lambda a: a + 1)
    foo.register(float, lambda a: a - 1)
    foo.register(tuple, lambda a: tuple(foo(i) for i in a))

    def f(a):
        """My Docstring"""
        return a

    foo.register(object, f)

    class Bar:
        pass

    b = Bar()
    assert foo(1) == 2
    assert foo.dispatch(int)(1) == 2
    assert foo(1.0) == 0.0
    assert foo(b) == b
    assert foo((1, 2.0, b)) == (2, 1.0, b)

    assert foo.__doc__ == f.__doc__
Example #20
0
class DaskBaseEstimator(Base):
    """Base class for dask-backed estimators"""
    _default_get = staticmethod(threaded_get)

    @staticmethod
    def _optimize(dsk, keys, **kwargs):
        dsk2, deps = fuse(dsk, keys)
        return dsk2

    def _keys(self):
        return [self._name]


@partial(normalize_token.register, BaseEstimator)
def normalize_BaseEstimator(est):
    return type(est).__name__, normalize_token(vars(est))


@partial(normalize_token.register, DaskBaseEstimator)
def normalize_dask_estimators(est):
    return type(est).__name__, est._name


def from_sklearn(est):
    """Wrap a scikit-learn estimator in a dask object."""
    return from_sklearn.dispatch(est)


from_sklearn.dispatch = Dispatch()
from_sklearn.dispatch.register(DaskBaseEstimator, identity)