def test_pid_file(loop): def check_pidfile(proc, pidfile): while not os.path.exists(pidfile): sleep(0.01) text = False while not text: sleep(0.01) with open(pidfile) as f: text = f.read() pid = int(text) if sys.platform.startswith('win'): # On Windows, `dask-XXX` invokes the dask-XXX.exe # shim, but the PID is written out by the child Python process assert pid else: assert proc.pid == pid with tmpfile() as s: with popen(['dask-scheduler', '--pid-file', s, '--no-bokeh']) as sched: check_pidfile(sched, s) with tmpfile() as w: with popen(['dask-worker', '127.0.0.1:8786', '--pid-file', w, '--no-bokeh']) as worker: check_pidfile(worker, w)
def test_pid_file(loop): with tmpfile() as s: with popen(['dask-scheduler', '--pid-file', s]) as sched: while not os.path.exists(s): sleep(0.01) text = False while not text: sleep(0.01) with open(s) as f: text = f.read() pid = int(text) assert sched.pid == pid with tmpfile() as w: with popen(['dask-worker', '127.0.0.1:8786', '--pid-file', w]) as worker: while not os.path.exists(w): sleep(0.01) text = False while not text: sleep(0.01) with open(w) as f: text = f.read() pid = int(text) assert worker.pid == pid
def test_worker_dir(worker): with tmpfile() as fn: @gen_cluster(client=True, worker_kwargs={'local_dir': fn}) def test_worker_dir(c, s, a, b): directories = [w.local_directory for w in s.workers.values()] assert all(d.startswith(fn) for d in directories) assert len(set(directories)) == 2 # distinct test_worker_dir()
def test_raise_error_on_serialize_write_permissions(): with tmpfile() as fn: with h5py.File(fn, mode='a') as f: x = f.create_dataset('/x', shape=(2, 2), dtype='i4') f.flush() with pytest.raises(TypeError): deserialize(*serialize(x)) with pytest.raises(TypeError): deserialize(*serialize(f))
def test_netcdf4_serialize(c, s, a, b): with tmpfile() as fn: create_test_dataset(fn) with netCDF4.Dataset(fn, mode='r') as f: dset = f.variables['x'] x = da.from_array(dset, chunks=2) y = c.compute(x) y = yield y._result() assert (y[:] == dset[:]).all()
def test_scheduler_file(loop, nanny): with tmpfile() as fn: with popen(['dask-scheduler', '--no-bokeh', '--scheduler-file', fn]) as sched: with popen(['dask-worker', '--scheduler-file', fn, nanny, '--no-bokeh']): with Client(scheduler_file=fn, loop=loop) as c: start = time() while not c.scheduler_info()['workers']: sleep(0.1) assert time() < start + 10
def test_scheduler_file(): with tmpfile() as fn: s = Scheduler(scheduler_file=fn) s.start(8009) w = Worker(scheduler_file=fn) yield w._start() assert set(s.workers) == {w.address} yield w._close() s.stop()
def test_bokeh_port_zero(loop): pytest.importorskip('bokeh') with tmpfile() as fn: with popen(['dask-scheduler', '--bokeh-port', '0']) as proc: count = 0 while count < 1: line = proc.stderr.readline() if b'bokeh' in line.lower() or b'web' in line.lower(): count += 1 assert b':0' not in line
def test_scheduler_file(): with tmpfile() as fn: s = yield Scheduler(scheduler_file=fn, port=0) with open(fn) as f: data = json.load(f) assert data["address"] == s.address c = yield Client(scheduler_file=fn, loop=s.loop, asynchronous=True) yield c.close() yield s.close()
def test_scheduler_file(): with tmpfile() as fn: s = Scheduler(scheduler_file=fn) s.start(0) with open(fn) as f: data = json.load(f) assert data['address'] == s.address c = yield Client(scheduler_file=fn, loop=s.loop, asynchronous=True) yield s.close()
def test_serialize_deserialize_variable(): with tmpfile() as fn: create_test_dataset(fn) with netCDF4.Dataset(fn, mode='r') as f: x = f.variables['x'] y = deserialize(*serialize(x)) assert isinstance(y, netCDF4.Variable) assert y.dimensions == ('x', ) assert (x.dtype == y.dtype) assert (x[:] == y[:]).all()
def test_worker_dir(worker): with tmpfile() as fn: @gen_cluster(client=True, worker_kwargs={"local_directory": fn}) def test_worker_dir(c, s, a, b): directories = [w.local_directory for w in s.workers.values()] assert all(d.startswith(fn) for d in directories) assert len(set(directories)) == 2 # distinct test_worker_dir()
def test_bokeh_worker(loop, mpirun): with tmpfile(extension="json") as fn: cmd = mpirun + [ "-np", "2", "dask-mpi", "--scheduler-file", fn, "--bokeh-worker-port", "59584" ] with popen(cmd, stdin=FNULL): check_port_okay(59584)
def test_serialize_deserialize_variable(): with tmpfile() as fn: create_test_dataset(fn) with netCDF4.Dataset(fn, mode='r') as f: x = f.variables['x'] y = deserialize(*serialize(x)) assert isinstance(y, netCDF4.Variable) assert y.dimensions == ('x',) assert (x.dtype == y.dtype) assert (x[:] == y[:]).all()
def test_local_directory(loop, nanny): with tmpfile() as fn: with popen(['dask-scheduler', '--no-bokeh']) as sched: with popen(['dask-worker', '127.0.0.1:8786', nanny, '--no-bokeh', '--local-directory', fn]) as worker: with Client('127.0.0.1:8786', loop=loop) as c: while not c.scheduler_info()['workers']: sleep(0.1) info = c.scheduler_info() worker = list(info['workers'].values())[0] assert worker['local_directory'] == fn
def test_scheduler_file(loop, nanny): with tmpfile() as fn: with popen(["dask-scheduler", "--no-bokeh", "--scheduler-file", fn]) as sched: with popen( ["dask-worker", "--scheduler-file", fn, nanny, "--no-bokeh"]): with Client(scheduler_file=fn, loop=loop) as c: start = time() while not c.scheduler_info()["workers"]: sleep(0.1) assert time() < start + 10
def test_sink_to_file(): with tmpfile() as fn: source = Stream() with sink_to_file(fn, source) as f: source.emit('a') source.emit('b') with open(fn) as f: data = f.read() assert data == 'a\nb\n'
def test_serialize_deserialize_dataset(): with tmpfile() as fn: create_test_dataset(fn) with netCDF4.Dataset(fn, mode='r') as f: g = deserialize(*serialize(f)) assert f.filepath() == g.filepath() assert isinstance(g, netCDF4.Dataset) assert g.variables['x'].dimensions == ('x',) assert g.variables['x'].dtype == np.int32 assert (g.variables['x'][:] == np.arange(3)).all()
async def test_h5py_serialize_2(c, s, a, b): with tmpfile() as fn: with h5py.File(fn, mode="a") as f: x = f.create_dataset("/group/x", shape=(12,), dtype="i4", chunks=(4,)) x[:] = [1, 2, 3, 4] * 3 with h5py.File(fn, mode="r") as f: dset = f["/group/x"] x = da.from_array(dset, chunks=(3,)) y = c.compute(x.sum()) y = await y assert y == (1 + 2 + 3 + 4) * 3
def test_serialize_deserialize_file(): with tmpfile() as fn: with h5py.File(fn, mode='a') as f: f.create_dataset('/x', shape=(2, 2), dtype='i4') with h5py.File(fn, mode='r') as f: g = deserialize(*serialize(f)) assert f.filename == g.filename assert isinstance(g, h5py.File) assert f.mode == g.mode assert g['x'].shape == (2, 2)
def job_file(self, script): """ Write job submission script to a temporary file script -- script contents """ with tmpfile(extension="sh") as fn: with open(fn, "w") as f: logger.debug(f"writing job script: {fn}\n{script}") f.write(script) yield fn
def test_serialize_deserialize_dataset(): with tmpfile() as fn: with h5py.File(fn, mode="a") as f: x = f.create_dataset("/group1/group2/x", shape=(2, 2), dtype="i4") with h5py.File(fn, mode="r") as f: x = f["group1/group2/x"] y = deserialize(*serialize(x)) assert isinstance(y, h5py.Dataset) assert x.name == y.name assert x.file.filename == y.file.filename assert (x[:] == y[:]).all()
def test_serialize_deserialize_file(): with tmpfile() as fn: with h5py.File(fn, mode="a") as f: f.create_dataset("/x", shape=(2, 2), dtype="i4") with h5py.File(fn, mode="r") as f: g = deserialize(*serialize(f)) assert f.filename == g.filename assert isinstance(g, h5py.File) assert f.mode == g.mode assert g["x"].shape == (2, 2)
def test_serialize_deserialize_dataset(): with tmpfile() as fn: create_test_dataset(fn) with netCDF4.Dataset(fn, mode='r') as f: g = deserialize(*serialize(f)) assert f.filepath() == g.filepath() assert isinstance(g, netCDF4.Dataset) assert g.variables['x'].dimensions == ('x', ) assert g.variables['x'].dtype == np.int32 assert (g.variables['x'][:] == np.arange(3)).all()
def test_bokeh_scheduler(loop): with tmpfile(extension='json') as fn: with popen([ 'mpirun', '-np', '2', 'dask-mpi', '--scheduler-file', fn, '--bokeh-port', '59583' ], stdin=FNULL): check_port_okay(59583) with pytest.raises(Exception): requests.get('http://localhost:59583/status/')
def test_dashboard_port_zero(loop): pytest.importorskip("bokeh") with tmpfile() as fn: with popen(["dask-scheduler", "--dashboard-address", ":0"]) as proc: count = 0 while count < 1: line = proc.stderr.readline() if b"dashboard" in line.lower(): sleep(0.01) count += 1 assert b":0" not in line
def test_serialize_deserialize_dataset(): with tmpfile() as fn: with h5py.File(fn, mode='a') as f: x = f.create_dataset('/group1/group2/x', shape=(2, 2), dtype='i4') with h5py.File(fn, mode='r') as f: x = f['group1/group2/x'] y = deserialize(*serialize(x)) assert isinstance(y, h5py.Dataset) assert x.name == y.name assert x.file.filename == y.file.filename assert (x[:] == y[:]).all()
def test_scheduler_file(loop, nanny): with tmpfile() as fn: with popen(['dask-scheduler', '--no-bokeh', '--scheduler-file', fn]) as sched: with popen( ['dask-worker', '--scheduler-file', fn, nanny, '--no-bokeh']): with Client(scheduler_file=fn, loop=loop) as c: start = time() while not c.scheduler_info()['workers']: sleep(0.1) assert time() < start + 10
def test_scheduler_file(): with tmpfile() as fn: s = Scheduler(scheduler_file=fn) s.start(0) with open(fn) as f: data = json.load(f) assert data['address'] == s.address c = Client(scheduler_file=fn, loop=s.loop, start=False) yield c._start() yield s.close()
def test_serialize_deserialize_group(): with tmpfile() as fn: with h5py.File(fn, mode='a') as f: f.create_dataset('/group1/group2/x', shape=(2, 2), dtype='i4') with h5py.File(fn, mode='r') as f: group = f['/group1/group2'] group2 = deserialize(*serialize(group)) assert isinstance(group2, h5py.Group) assert group.file.filename == group2.file.filename assert group2['x'].shape == (2, 2)
def test_basic(loop, nanny): with tmpfile() as fn: with popen(['mpirun', '--np', '4', 'dask-mpi', '--scheduler-file', fn, nanny], stdin=subprocess.DEVNULL): with Client(scheduler_file=fn) as c: start = time() while len(c.scheduler_info()['workers']) != 3: assert time() < start + 10 sleep(0.2) assert c.submit(lambda x: x + 1, 10, workers=1).result() == 11
def test_h5py_serialize_2(c, s, a, b): with tmpfile() as fn: with h5py.File(fn, mode='a') as f: x = f.create_dataset('/group/x', shape=(12,), dtype='i4', chunks=(4,)) x[:] = [1, 2, 3, 4] * 3 with h5py.File(fn, mode='r') as f: dset = f['/group/x'] x = da.from_array(dset, chunks=(3,)) y = c.compute(x.sum()) y = yield y assert y == (1 + 2 + 3 + 4) * 3
def test_serialize_deserialize_group(): with tmpfile() as fn: with h5py.File(fn, mode="a") as f: f.create_dataset("/group1/group2/x", shape=(2, 2), dtype="i4") with h5py.File(fn, mode="r") as f: group = f["/group1/group2"] group2 = deserialize(*serialize(group)) assert isinstance(group2, h5py.Group) assert group.file.filename == group2.file.filename assert group2["x"].shape == (2, 2)
async def test_pod_from_yaml(image_name, ns, auth): test_yaml = { "kind": "Pod", "metadata": { "labels": { "app": "dask", "component": "dask-worker" } }, "spec": { "containers": [{ "args": [ "dask-worker", "$(DASK_SCHEDULER_ADDRESS)", "--nthreads", "1", ], "image": image_name, "imagePullPolicy": "IfNotPresent", "name": "dask-worker", }] }, } with tmpfile(extension="yaml") as fn: with open(fn, mode="w") as f: yaml.dump(test_yaml, f) async with KubeCluster.from_yaml(f.name, namespace=ns, auth=auth, **cluster_kwargs) as cluster: assert cluster.namespace == ns cluster.scale(2) await cluster async with Client(cluster, asynchronous=True) as client: future = client.submit(lambda x: x + 1, 10) result = await future.result(timeout=10) assert result == 11 start = time() while len(cluster.scheduler_info["workers"]) < 2: await asyncio.sleep(0.1) assert time() < start + 20, "timeout" # Ensure that inter-worker communication works well futures = client.map(lambda x: x + 1, range(10)) total = client.submit(sum, futures) assert (await total) == sum(map(lambda x: x + 1, range(10))) assert all((await client.has_what()).values())
def test_basic(loop, nanny, mpirun): with tmpfile(extension="json") as fn: cmd = mpirun + ["-np", "4", "dask-mpi", "--scheduler-file", fn, nanny] with popen(cmd): with Client(scheduler_file=fn) as c: start = time() while len(c.scheduler_info()["workers"]) != 3: assert time() < start + 10 sleep(0.2) assert c.submit(lambda x: x + 1, 10, workers=1).result() == 11
def test_bokeh_scheduler(loop, mpirun): with tmpfile(extension="json") as fn: cmd = mpirun + [ "-np", "2", "dask-mpi", "--scheduler-file", fn, "--bokeh-port", "59583" ] with popen(cmd, stdin=FNULL): check_port_okay(59583) with pytest.raises(Exception): requests.get("http://localhost:59583/status/")
def test_local_directory(loop, nanny): with tmpfile() as fn: with popen(['dask-scheduler', '--no-bokeh']) as sched: with popen(['dask-worker', '127.0.0.1:8786', nanny, '--no-bokeh', '--local-directory', fn]) as worker: with Client('127.0.0.1:8786', loop=loop, timeout=10) as c: start = time() while not c.scheduler_info()['workers']: sleep(0.1) assert time() < start + 8 info = c.scheduler_info() worker = list(info['workers'].values())[0] assert worker['local_directory'].startswith(fn)
def test_memmap(): with tmpfile("npy") as fn: with open(fn, "wb") as f: # touch file pass x = np.memmap(fn, shape=(5, 5), dtype="i4", mode="readwrite") x[:] = 5 header, frames = serialize(x) if "compression" in header: frames = decompress(header, frames) y = deserialize(header, frames) np.testing.assert_equal(x, y)
def test_basic(loop, nanny): with tmpfile(extension='json') as fn: with popen( ['mpirun', '-np', '4', 'dask-mpi', '--scheduler-file', fn, nanny]): with Client(scheduler_file=fn) as c: start = time() while len(c.scheduler_info()['workers']) != 3: assert time() < start + 10 sleep(0.2) assert c.submit(lambda x: x + 1, 10, workers='mpi-rank-1').result() == 11
def test_memmap(): with tmpfile('npy') as fn: with open(fn, 'wb') as f: # touch file pass x = np.memmap(fn, shape=(5, 5), dtype='i4', mode='readwrite') x[:] = 5 header, frames = serialize(x) if 'compression' in header: frames = decompress(header, frames) y = deserialize(header, frames) np.testing.assert_equal(x, y)
def test_h5py_serialize(c, s, a, b): from dask.utils import SerializableLock lock = SerializableLock('hdf5') with tmpfile() as fn: with h5py.File(fn, mode='a') as f: x = f.create_dataset('/group/x', shape=(4,), dtype='i4', chunks=(2,)) x[:] = [1, 2, 3, 4] with h5py.File(fn, mode='r') as f: dset = f['/group/x'] x = da.from_array(dset, chunks=dset.chunks, lock=lock) y = c.compute(x) y = yield y assert (y[:] == dset[:]).all()
def test_log_exception_on_failed_task(c, s, a, b): with tmpfile() as fn: fh = logging.FileHandler(fn) try: from distributed.worker import logger logger.addHandler(fh) future = c.submit(div, 1, 0) yield wait(future) yield gen.sleep(0.1) fh.flush() with open(fn) as f: text = f.read() assert "ZeroDivisionError" in text assert "Exception" in text finally: logger.removeHandler(fh)
def test_no_scheduler(loop): with tmpfile() as fn: with popen(['mpirun', '--np', '2', 'dask-mpi', '--scheduler-file', fn], stdin=subprocess.DEVNULL): with Client(scheduler_file=fn) as c: start = time() while len(c.scheduler_info()['workers']) != 1: assert time() < start + 10 sleep(0.2) assert c.submit(lambda x: x + 1, 10).result() == 11 with popen(['mpirun', '--np', '1', 'dask-mpi', '--scheduler-file', fn, '--no-scheduler']): start = time() while len(c.scheduler_info()['workers']) != 2: assert time() < start + 10 sleep(0.2)
def test_bokeh(loop): with tmpfile() as fn: with popen(['mpirun', '--np', '2', 'dask-mpi', '--scheduler-file', fn, '--bokeh-port', '59583', '--bokeh-worker-port', '59584'], stdin=subprocess.DEVNULL): for port in [59853, 59584]: start = time() while True: try: response = requests.get('http://localhost:%d/status/' % port) assert response.ok break except Exception: sleep(0.1) assert time() < start + 20 with pytest.raises(Exception): requests.get('http://localhost:59583/status/')
def test_serialize_deserialize_group(): with tmpfile() as fn: create_test_dataset(fn) with netCDF4.Dataset(fn, mode='r') as f: for path in ['group', 'group/group1']: g = f[path] h = deserialize(*serialize(g)) assert isinstance(h, netCDF4.Group) assert h.name == g.name assert list(g.groups) == list(h.groups) assert list(g.variables) == list(h.variables) vars = [f.variables['x'], f['group'].variables['y'], f['group/group1'].variables['z']] for x in vars: y = deserialize(*serialize(x)) assert isinstance(y, netCDF4.Variable) assert y.dimensions == ('x',) assert (x.dtype == y.dtype) assert (x[:] == y[:]).all()
def test_upload_file_pyc(c, s, w): with tmpfile() as dirname: os.mkdir(dirname) with open(os.path.join(dirname, 'foo.py'), mode='w') as f: f.write('def f():\n return 123') sys.path.append(dirname) try: import foo assert foo.f() == 123 pyc = cache_from_source(os.path.join(dirname, 'foo.py')) assert os.path.exists(pyc) yield c.upload_file(pyc) def g(): import foo return foo.x future = c.submit(g) result = yield future assert result == 123 finally: sys.path.remove(dirname)