Exemple #1
0
def cli_keywords(d: dict, cls=None, cmd=None):
    """Convert a kwargs dictionary into a list of CLI keywords

    Parameters
    ----------
    d : dict
        The keywords to convert
    cls : callable
        The callable that consumes these terms to check them for validity
    cmd : string or object
        A string with the name of a module, or the module containing a
        click-generated command with a "main" function, or the function itself.
        It may be used to parse a module's custom arguments (i.e., arguments that
        are not part of Worker class), such as nprocs from dask-worker CLI or
        enable_nvlink from dask-cuda-worker CLI.

    Examples
    --------
    >>> cli_keywords({"x": 123, "save_file": "foo.txt"})
    ['--x', '123', '--save-file', 'foo.txt']

    >>> from dask.distributed import Worker
    >>> cli_keywords({"x": 123}, Worker)
    Traceback (most recent call last):
    ...
    ValueError: Class distributed.worker.Worker does not support keyword x
    """
    from dask.utils import typename

    if cls or cmd:
        for k in d:
            if not has_keyword(cls, k) and not command_has_keyword(cmd, k):
                if cls and cmd:
                    raise ValueError(
                        "Neither class %s or module %s support keyword %s"
                        % (typename(cls), typename(cmd), k)
                    )
                elif cls:
                    raise ValueError(
                        f"Class {typename(cls)} does not support keyword {k}"
                    )
                else:
                    raise ValueError(
                        f"Module {typename(cmd)} does not support keyword {k}"
                    )

    def convert_value(v):
        out = str(v)
        if " " in out and "'" not in out and '"' not in out:
            out = '"' + out + '"'
        return out

    return sum(
        (["--" + k.replace("_", "-"), convert_value(v)] for k, v in d.items()), []
    )
Exemple #2
0
    def __init__(
        self,
        asynchronous=False,
        loop=None,
        quiet=False,
        name=None,
        scheduler_sync_interval=1,
    ):
        self._loop_runner = LoopRunner(loop=loop, asynchronous=asynchronous)
        self.loop = self._loop_runner.loop

        self.scheduler_info = {"workers": {}}
        self.periodic_callbacks = {}
        self._watch_worker_status_comm = None
        self._watch_worker_status_task = None
        self._cluster_manager_logs = []
        self.quiet = quiet
        self.scheduler_comm = None
        self._adaptive = None
        self._sync_interval = parse_timedelta(scheduler_sync_interval,
                                              default="seconds")
        self._sync_cluster_info_task = None

        if name is None:
            name = str(uuid.uuid4())[:8]

        # Mask class attribute with instance attribute
        self._cluster_info = {
            "name": name,
            "type": typename(type(self)),
            **type(self)._cluster_info,
        }
        self.status = Status.created
Exemple #3
0
def get_snippet(name: str) -> str:
    """Get a code snippet for connecting to a cluster.

    Parameters
    ----------
    name
        Name of cluster to get a snippet for.

    Returns
    -------
    str
        Code snippet.

    Examples
    --------
    >>> from dask.distributed import LocalCluster  # doctest: +SKIP
    >>> cluster = LocalCluster(scheduler_port=8786)  # doctest: +SKIP
    >>> get_snippet("proxycluster-8786")  # doctest: +SKIP
    from dask.distributed import Client
    from dask_ctl.proxy import ProxyCluster

    cluster = ProxyCluster.from_name("proxycluster-8786")
    client = Client(cluster)

    """

    cluster = get_cluster(name)
    try:
        return cluster.get_snippet()
    except AttributeError:
        *module, cm = typename(type(cluster)).split(".")
        module = ".".join(module)
        return get_template("snippet.py.j2").render(
            module=module, cm=cm, name=name, cluster=cluster
        )
Exemple #4
0
def cuda_dumps(x):
    type_name = typename(type(x))
    try:
        dumps = cuda_serialize.dispatch(type(x))
    except TypeError:
        raise NotImplementedError(type_name)

    header, frames = dumps(x)
    header["type-serialized"] = pickle.dumps(type(x))
    header["serializer"] = "cuda"
    header["compression"] = (False,) * len(frames)  # no compression for gpu data
    return header, frames
Exemple #5
0
def cuda_dumps(x):
    type_name = typename(type(x))
    try:
        dumps = cuda_serialize.dispatch(type(x))
    except TypeError:
        raise NotImplementedError(type_name)

    sub_header, frames = dumps(x)
    header = {
        "sub-header": sub_header,
        "type-serialized": pickle.dumps(type(x)),
        "serializer": "cuda",
        "compression": (False, ) * len(frames),  # no compression for gpu data
    }
    return header, frames
def dask_dumps(x, context=None):
    """Serialize object using the class-based registry"""
    type_name = typename(type(x))
    try:
        dumps = dask_serialize.dispatch(type(x))
    except TypeError:
        raise NotImplementedError(type_name)
    if has_keyword(dumps, "context"):
        header, frames = dumps(x, context=context)
    else:
        header, frames = dumps(x)

    header["type"] = type_name
    header["type-serialized"] = pickle.dumps(type(x), protocol=4)
    header["serializer"] = "dask"
    return header, frames
Exemple #7
0
    def __init__(self,
                 asynchronous,
                 quiet=False,
                 name=None,
                 scheduler_sync_interval=1):
        self.scheduler_info = {"workers": {}}
        self.periodic_callbacks = {}
        self._asynchronous = asynchronous
        self._watch_worker_status_comm = None
        self._watch_worker_status_task = None
        self._cluster_manager_logs = []
        self.quiet = quiet
        self.scheduler_comm = None
        self._adaptive = None
        self._sync_interval = parse_timedelta(scheduler_sync_interval,
                                              default="seconds")

        if name is None:
            name = str(uuid.uuid4())[:8]

        self._cluster_info = {"name": name, "type": typename(type(self))}
        self.status = Status.created
Exemple #8
0
def test_typename_on_instances():
    instance = MyType()
    assert typename(instance) == typename(MyType)
Exemple #9
0
def test_typename():
    assert typename(HighLevelGraph) == "dask.highlevelgraph.HighLevelGraph"
    assert typename(HighLevelGraph, short=True) == "dask.HighLevelGraph"
Exemple #10
0
def check_meta(x, meta, funcname=None, numeric_equal=True):
    """Check that the dask metadata matches the result.

    If metadata matches, ``x`` is passed through unchanged. A nice error is
    raised if metadata doesn't match.

    Parameters
    ----------
    x : DataFrame, Series, or Index
    meta : DataFrame, Series, or Index
        The expected metadata that ``x`` should match
    funcname : str, optional
        The name of the function in which the metadata was specified. If
        provided, the function name will be included in the error message to be
        more helpful to users.
    numeric_equal : bool, optionl
        If True, integer and floating dtypes compare equal. This is useful due
        to panda's implicit conversion of integer to floating upon encountering
        missingness, which is hard to infer statically.
    """
    eq_types = {"i", "f", "u"} if numeric_equal else set()

    def equal_dtypes(a, b):
        if is_categorical_dtype(a) != is_categorical_dtype(b):
            return False
        if isinstance(a, str) and a == "-" or isinstance(b, str) and b == "-":
            return False
        if is_categorical_dtype(a) and is_categorical_dtype(b):
            if UNKNOWN_CATEGORIES in a.categories or UNKNOWN_CATEGORIES in b.categories:
                return True
            return a == b
        return (a.kind in eq_types and b.kind in eq_types) or is_dtype_equal(
            a, b)

    if not (is_dataframe_like(meta) or is_series_like(meta)
            or is_index_like(meta)) or is_dask_collection(meta):
        raise TypeError("Expected partition to be DataFrame, Series, or "
                        "Index, got `%s`" % typename(type(meta)))

    # Notice, we use .__class__ as opposed to type() in order to support
    # object proxies see <https://github.com/dask/dask/pull/6981>
    if x.__class__ != meta.__class__:
        errmsg = "Expected partition of type `{}` but got `{}`".format(
            typename(type(meta)),
            typename(type(x)),
        )
    elif is_dataframe_like(meta):
        dtypes = pd.concat([x.dtypes, meta.dtypes], axis=1, sort=True)
        bad_dtypes = [(repr(col), a, b)
                      for col, a, b in dtypes.fillna("-").itertuples()
                      if not equal_dtypes(a, b)]
        if bad_dtypes:
            errmsg = "Partition type: `{}`\n{}".format(
                typename(type(meta)),
                asciitable(["Column", "Found", "Expected"], bad_dtypes),
            )
        else:
            check_matching_columns(meta, x)
            return x
    else:
        if equal_dtypes(x.dtype, meta.dtype):
            return x
        errmsg = "Partition type: `{}`\n{}".format(
            typename(type(meta)),
            asciitable(["", "dtype"], [("Found", x.dtype),
                                       ("Expected", meta.dtype)]),
        )

    raise ValueError("Metadata mismatch found%s.\n\n"
                     "%s" %
                     ((" in `%s`" % funcname if funcname else ""), errmsg))