def test_lingering_client(): @gen_cluster() async def f(s, a, b): await Client(s.address, asynchronous=True) f() with pytest.raises(ValueError): default_client()
def test_lingering_client(): @gen_cluster() def f(s, a, b): c = yield Client(s.address, asynchronous=True) f() with pytest.raises(ValueError): default_client()
def __init__(self, client=None): self.client = client if client is not None else default_client() self.sessionId = uuid.uuid4().bytes # Get address of all workers (not Nanny addresses) self.worker_addresses = list(self.client.run(lambda: 42).keys()) # Make all workers listen and get all listen addresses self.worker_direct_addresses = [] for rank, address in enumerate(self.worker_addresses): self.worker_direct_addresses.append( self.submit( address, _create_listeners, len(self.worker_addresses), rank, wait=True, )) # Each worker creates an endpoint to all workers with greater rank self.run(_create_endpoints, self.worker_direct_addresses) # At this point all workers should have a rank and endpoints to # all other workers thus we can now stop the listening. self.run(_stop_ucp_listeners)
def extract_ddf_partitions(ddf, client=None, agg=True): """ Given a Dask dataframe, return an OrderedDict mapping 'worker -> [list of futures]' for each partition in ddf. :param ddf: Dask.dataframe split dataframe partitions into a list of futures. :param client: dask.distributed.Client Optional client to use """ client = default_client() if client is None else client delayed_ddf = ddf.to_delayed() parts = client.compute(delayed_ddf) yield wait(parts) key_to_part_dict = dict([(str(part.key), part) for part in parts]) who_has = yield client.who_has(parts) worker_map = {} # Map from part -> worker for key, workers in who_has.items(): worker = first(workers) worker_map[key_to_part_dict[key]] = worker worker_to_parts = [] for part in parts: worker = worker_map[part] worker_to_parts.append((worker, part)) yield wait(worker_to_parts) raise gen.Return(worker_to_parts)
def __init__(self, fs): """ Parameters ---------- fs: FileSystem instance """ import distributed super().__init__(fs) client = distributed.default_client() self.files = client.submit(FileActor, actor=True).result()
def dataframes_to_dask_dataframe(futures, client=None): """ Convert a list of futures containing Dataframes (pandas or cudf) into a Dask.Dataframe :param futures: list of futures containing dataframes :param client: dask.distributed.Client Optional client to use :return: dask.Dataframe a dask.Dataframe """ c = default_client() if client is None else client # Convert a list of futures containing dfs back into a dask_cudf dfs = [d for d in futures if d.type != type(None)] # NOQA meta = c.submit(get_meta, dfs[0]).result() return dd.from_delayed(dfs, meta=meta)
def test_get_client(c, s, a, b): def f(x): cc = get_client() future = cc.submit(inc, x) return future.result() assert default_client() is c future = c.submit(f, 10, workers=a.address) result = yield future assert result == 11 assert a._client assert not b._client assert a._client is c assert default_client() is c a_client = a._client for i in range(10): yield wait(c.submit(f, i)) assert a._client is a_client