def local_cluster_url(): """ Shared dask cluster, can be used repeatedly by different executors. This allows numba caching across tests, without sharing the executor, for example """ cluster_port = find_unused_port() devices = detect() spec = cluster_spec( # Only use at most 2 CPUs and 1 GPU cpus=devices['cpus'][:2], cudas=devices['cudas'][:1], has_cupy=devices['has_cupy']) cluster_kwargs = { 'silence_logs': logging.WARN, 'scheduler': { 'cls': Scheduler, 'options': { 'port': cluster_port }, }, } cluster = dd.SpecCluster(workers=spec, **(cluster_kwargs or {})) yield 'tcp://localhost:%d' % cluster_port cluster.close()
def test_start_local_cupyonly(hdf5_ds_1): cudas = detect()['cudas'] # Make sure we have enough partitions hdf5_ds_1.set_num_cores(len(cudas)) mask = _mk_random(size=(16, 16)) with hdf5_ds_1.get_reader().get_h5ds() as h5ds: data = h5ds[:] expected = _naive_mask_apply([mask], data) spec = cluster_spec(cpus=(), cudas=cudas, has_cupy=True) with DaskJobExecutor.make_local(spec=spec) as executor: ctx = api.Context(executor=executor) # Uses ApplyMasksUDF, which supports CuPy analysis = ctx.create_mask_analysis( dataset=hdf5_ds_1, factories=[lambda: mask] ) results = ctx.run(analysis) udf_res = ctx.run_udf(udf=DebugDeviceUDF(), dataset=hdf5_ds_1) # No CPU compute resources with pytest.raises(RuntimeError): _ = ctx.run_udf(udf=DebugDeviceUDF(backends=('numpy',)), dataset=hdf5_ds_1) cuda_res = ctx.run_udf(udf=DebugDeviceUDF(backends=('cuda',)), dataset=hdf5_ds_1) assert np.allclose( results.mask_0.raw_data, expected ) found = {} for val in udf_res['device_id'].data[0].values(): print(val) # no CPU assert val["cpu"] is None # Register which GPUs got work found[val["cuda"]] = True for val in cuda_res['device_id'].data[0].values(): print(val) # no CPU assert val["cpu"] is None # Register which GPUs got work found[val["cuda"]] = True for val in udf_res['backend'].data[0].values(): # use CuPy print(val) assert 'cupy' in val for val in cuda_res['backend'].data[0].values(): # no CuPy, i.e. NumPy print(val) assert 'numpy' in val # Test if each GPU got work. We have to see if this # actually works always since this depends on the scheduler behavior assert set(found.keys()) == set(cudas) assert np.all(udf_res['device_class'].data == 'cuda') assert np.allclose(udf_res['on_device'].data, data.sum(axis=(0, 1)))
async def async_executor(): spec = cluster_spec(cpus=[0, 1], cudas=[], has_cupy=False) sync_executor = await sync_to_async( functools.partial(DaskJobExecutor.make_local, spec=spec) ) executor = AsyncAdapter(wrapped=sync_executor) yield executor await executor.close()
async def put(self): # TODO: extract json request data stuff into mixin? request_data = tornado.escape.json_decode(self.request.body) connection = request_data['connection'] if connection["type"].lower() == "tcp": try: sync_executor = await sync_to_async(partial(DaskJobExecutor.connect, scheduler_uri=connection['address'], )) except Exception as e: msg = Message(self.state).cluster_conn_error(msg=str(e)) log_message(msg) self.write(msg) return None elif connection["type"].lower() == "local": devices = detect() options = { "local_directory": self.state.get_local_directory() } if "numWorkers" in connection: devices["cpus"] = range(connection["numWorkers"]) # Deactivate GPU support in local cluster until GUI allows deactivation # to not interfere with other applications using the GPU # FIXME implement GUI interface https://github.com/LiberTEM/LiberTEM/issues/803 devices["cudas"] = [] sync_executor = await sync_to_async( partial( DaskJobExecutor.make_local, spec=cluster_spec(**devices, options=options), ) ) else: raise ValueError("unknown connection type") executor = AsyncAdapter(wrapped=sync_executor) await self.state.executor_state.set_executor(executor, request_data) await self.state.dataset_state.verify() datasets = await self.state.dataset_state.serialize_all() msg = Message(self.state).initial_state( jobs=self.state.job_state.serialize_all(), datasets=datasets, analyses=self.state.analysis_state.serialize_all(), compound_analyses=self.state.compound_analysis_state.serialize_all(), ) log_message(msg) # FIXME: don't broadcast, only send to the websocket that matches this HTTP connection # (is this even possible?) self.event_registry.broadcast_event(msg) await self.send_existing_job_results() self.write({ "status": "ok", "connection": connection, })
async def put(self): # TODO: extract json request data stuff into mixin? request_data = tornado.escape.json_decode(self.request.body) connection = request_data['connection'] pool = AsyncAdapter.make_pool() if connection["type"].lower() == "tcp": try: sync_executor = await sync_to_async(partial( DaskJobExecutor.connect, scheduler_uri=connection['address'], ), pool=pool) except Exception as e: msg = Message(self.state).cluster_conn_error(msg=str(e)) log_message(msg) self.write(msg) return None elif connection["type"].lower() == "local": devices = detect() options = {"local_directory": self.state.get_local_directory()} if "numWorkers" in connection: devices["cpus"] = range(connection["numWorkers"]) devices["cudas"] = connection.get("cudas", []) sync_executor = await sync_to_async(partial( DaskJobExecutor.make_local, spec=cluster_spec(**devices, options=options, preload=self.state.get_preload())), pool=pool) else: raise ValueError("unknown connection type") executor = AsyncAdapter(wrapped=sync_executor, pool=pool) await self.state.executor_state.set_executor(executor, request_data) await self.state.dataset_state.verify() datasets = await self.state.dataset_state.serialize_all() msg = Message(self.state).initial_state( jobs=self.state.job_state.serialize_all(), datasets=datasets, analyses=self.state.analysis_state.serialize_all(), compound_analyses=self.state.compound_analysis_state.serialize_all( ), ) log_message(msg) # FIXME: don't broadcast, only send to the websocket that matches this HTTP connection # (is this even possible?) self.event_registry.broadcast_event(msg) await self.engine.send_existing_job_results() self.write({ "status": "ok", "connection": connection, })
def test_start_local_cpuonly(hdf5_ds_1): # We don't use all since that might be too many cpus = (0, 1) hdf5_ds_1.set_num_cores(len(cpus)) mask = _mk_random(size=(16, 16)) with hdf5_ds_1.get_reader().get_h5ds() as h5ds: data = h5ds[:] expected = _naive_mask_apply([mask], data) spec = cluster_spec(cpus=cpus, cudas=(), has_cupy=False) with DaskJobExecutor.make_local(spec=spec) as executor: ctx = api.Context(executor=executor) analysis = ctx.create_mask_analysis( dataset=hdf5_ds_1, factories=[lambda: mask] ) results = ctx.run(analysis) udf_res = ctx.run_udf(udf=DebugDeviceUDF(), dataset=hdf5_ds_1) # No CuPy resources with pytest.raises(RuntimeError): _ = ctx.run_udf(udf=DebugDeviceUDF(backends=('cupy',)), dataset=hdf5_ds_1) assert np.allclose( results.mask_0.raw_data, expected ) found = {} for val in udf_res['device_id'].data[0].values(): print(val) # no CUDA assert val["cuda"] is None found[val["cpu"]] = True for val in udf_res['backend'].data[0].values(): print(val) # no CUDA assert 'numpy' in val # Each CPU got work. We have to see if this # actually works always since this depends on the scheduler behavior assert set(found.keys()) == set(cpus) assert np.all(udf_res['device_class'].data == 'cpu') assert np.allclose(udf_res['on_device'].data, data.sum(axis=(0, 1)))
def test_preload(hdf5_ds_1): # We don't use all since that might be too many cpus = (0, 1) hdf5_ds_1.set_num_cores(len(cpus)) class CheckEnvUDF(NoOpUDF): def process_tile(self, tile): assert os.environ['LT_TEST_1'] == 'hello' assert os.environ['LT_TEST_2'] == 'world' preloads = ( "import os; os.environ['LT_TEST_1'] = 'hello'", "import os; os.environ['LT_TEST_2'] = 'world'", ) spec = cluster_spec(cpus=cpus, cudas=(), has_cupy=False, preload=preloads) with DaskJobExecutor.make_local(spec=spec) as executor: ctx = api.Context(executor=executor) ctx.run_udf(udf=CheckEnvUDF(), dataset=hdf5_ds_1)
def test_start_local_cudaonly(hdf5_ds_1): cudas = detect()['cudas'] # Make sure we have enough partitions hdf5_ds_1.set_num_cores(len(cudas)) with hdf5_ds_1.get_reader().get_h5ds() as h5ds: data = h5ds[:] spec = cluster_spec(cpus=(), cudas=cudas, has_cupy=False) with DaskJobExecutor.make_local(spec=spec) as executor: ctx = api.Context(executor=executor) udf_res = ctx.run_udf(udf=DebugDeviceUDF(backends=('cuda', )), dataset=hdf5_ds_1) # No CPU compute resources with pytest.raises(RuntimeError): _ = ctx.run_udf(udf=DebugDeviceUDF(backends=('numpy', )), dataset=hdf5_ds_1) # No ndarray (CuPy) resources with pytest.raises(RuntimeError): _ = ctx.run_udf(udf=DebugDeviceUDF(backends=('cupy', )), dataset=hdf5_ds_1) found = {} for val in udf_res['device_id'].data[0].values(): print(val) # no CPU assert val["cpu"] is None # Register which GPUs got work found[val["cuda"]] = True for val in udf_res['backend'].data[0].values(): print(val) # CUDA, but no CuPy, i.e. use NumPy assert 'numpy' in val # Test if each GPU got work. We have to see if this # actually works always since this depends on the scheduler behavior assert set(found.keys()) == set(cudas) assert np.all(udf_res['device_class'].data == 'cuda') assert np.allclose(udf_res['on_device'].data, data.sum(axis=(0, 1)))