예제 #1
0
파일: conftest.py 프로젝트: GHzytp/LiberTEM
def local_cluster_url():
    """
    Shared dask cluster, can be used repeatedly by different executors.

    This allows numba caching across tests, without sharing the executor,
    for example
    """
    cluster_port = find_unused_port()
    devices = detect()
    spec = cluster_spec(
        # Only use at most 2 CPUs and 1 GPU
        cpus=devices['cpus'][:2],
        cudas=devices['cudas'][:1],
        has_cupy=devices['has_cupy'])

    cluster_kwargs = {
        'silence_logs': logging.WARN,
        'scheduler': {
            'cls': Scheduler,
            'options': {
                'port': cluster_port
            },
        },
    }

    cluster = dd.SpecCluster(workers=spec, **(cluster_kwargs or {}))

    yield 'tcp://localhost:%d' % cluster_port

    cluster.close()
예제 #2
0
def test_start_local_cupyonly(hdf5_ds_1):
    cudas = detect()['cudas']
    # Make sure we have enough partitions
    hdf5_ds_1.set_num_cores(len(cudas))
    mask = _mk_random(size=(16, 16))
    with hdf5_ds_1.get_reader().get_h5ds() as h5ds:
        data = h5ds[:]
        expected = _naive_mask_apply([mask], data)

    spec = cluster_spec(cpus=(), cudas=cudas, has_cupy=True)
    with DaskJobExecutor.make_local(spec=spec) as executor:
        ctx = api.Context(executor=executor)
        # Uses ApplyMasksUDF, which supports CuPy
        analysis = ctx.create_mask_analysis(
            dataset=hdf5_ds_1, factories=[lambda: mask]
        )
        results = ctx.run(analysis)
        udf_res = ctx.run_udf(udf=DebugDeviceUDF(), dataset=hdf5_ds_1)
        # No CPU compute resources
        with pytest.raises(RuntimeError):
            _ = ctx.run_udf(udf=DebugDeviceUDF(backends=('numpy',)), dataset=hdf5_ds_1)
        cuda_res = ctx.run_udf(udf=DebugDeviceUDF(backends=('cuda',)), dataset=hdf5_ds_1)

    assert np.allclose(
        results.mask_0.raw_data,
        expected
    )

    found = {}

    for val in udf_res['device_id'].data[0].values():
        print(val)
        # no CPU
        assert val["cpu"] is None
        # Register which GPUs got work
        found[val["cuda"]] = True

    for val in cuda_res['device_id'].data[0].values():
        print(val)
        # no CPU
        assert val["cpu"] is None
        # Register which GPUs got work
        found[val["cuda"]] = True

    for val in udf_res['backend'].data[0].values():
        # use CuPy
        print(val)
        assert 'cupy' in val

    for val in cuda_res['backend'].data[0].values():
        # no CuPy, i.e. NumPy
        print(val)
        assert 'numpy' in val

    # Test if each GPU got work. We have to see if this
    # actually works always since this depends on the scheduler behavior
    assert set(found.keys()) == set(cudas)

    assert np.all(udf_res['device_class'].data == 'cuda')
    assert np.allclose(udf_res['on_device'].data, data.sum(axis=(0, 1)))
예제 #3
0
async def async_executor():
    spec = cluster_spec(cpus=[0, 1], cudas=[], has_cupy=False)
    sync_executor = await sync_to_async(
        functools.partial(DaskJobExecutor.make_local, spec=spec)
    )
    executor = AsyncAdapter(wrapped=sync_executor)
    yield executor
    await executor.close()
예제 #4
0
    async def put(self):
        # TODO: extract json request data stuff into mixin?
        request_data = tornado.escape.json_decode(self.request.body)
        connection = request_data['connection']
        if connection["type"].lower() == "tcp":
            try:
                sync_executor = await sync_to_async(partial(DaskJobExecutor.connect,
                    scheduler_uri=connection['address'],
                ))
            except Exception as e:
                msg = Message(self.state).cluster_conn_error(msg=str(e))
                log_message(msg)
                self.write(msg)
                return None
        elif connection["type"].lower() == "local":
            devices = detect()
            options = {
                "local_directory": self.state.get_local_directory()
            }
            if "numWorkers" in connection:
                devices["cpus"] = range(connection["numWorkers"])
            # Deactivate GPU support in local cluster until GUI allows deactivation
            # to not interfere with other applications using the GPU
            # FIXME implement GUI interface https://github.com/LiberTEM/LiberTEM/issues/803
            devices["cudas"] = []

            sync_executor = await sync_to_async(
                partial(
                    DaskJobExecutor.make_local,
                    spec=cluster_spec(**devices, options=options),
                )
            )
        else:
            raise ValueError("unknown connection type")
        executor = AsyncAdapter(wrapped=sync_executor)
        await self.state.executor_state.set_executor(executor, request_data)
        await self.state.dataset_state.verify()
        datasets = await self.state.dataset_state.serialize_all()
        msg = Message(self.state).initial_state(
            jobs=self.state.job_state.serialize_all(),
            datasets=datasets, analyses=self.state.analysis_state.serialize_all(),
            compound_analyses=self.state.compound_analysis_state.serialize_all(),
        )
        log_message(msg)
        # FIXME: don't broadcast, only send to the websocket that matches this HTTP connection
        # (is this even possible?)
        self.event_registry.broadcast_event(msg)
        await self.send_existing_job_results()
        self.write({
            "status": "ok",
            "connection": connection,
        })
예제 #5
0
    async def put(self):
        # TODO: extract json request data stuff into mixin?
        request_data = tornado.escape.json_decode(self.request.body)
        connection = request_data['connection']
        pool = AsyncAdapter.make_pool()
        if connection["type"].lower() == "tcp":
            try:
                sync_executor = await sync_to_async(partial(
                    DaskJobExecutor.connect,
                    scheduler_uri=connection['address'],
                ),
                                                    pool=pool)
            except Exception as e:
                msg = Message(self.state).cluster_conn_error(msg=str(e))
                log_message(msg)
                self.write(msg)
                return None
        elif connection["type"].lower() == "local":
            devices = detect()
            options = {"local_directory": self.state.get_local_directory()}
            if "numWorkers" in connection:
                devices["cpus"] = range(connection["numWorkers"])
            devices["cudas"] = connection.get("cudas", [])

            sync_executor = await sync_to_async(partial(
                DaskJobExecutor.make_local,
                spec=cluster_spec(**devices,
                                  options=options,
                                  preload=self.state.get_preload())),
                                                pool=pool)
        else:
            raise ValueError("unknown connection type")
        executor = AsyncAdapter(wrapped=sync_executor, pool=pool)
        await self.state.executor_state.set_executor(executor, request_data)
        await self.state.dataset_state.verify()
        datasets = await self.state.dataset_state.serialize_all()
        msg = Message(self.state).initial_state(
            jobs=self.state.job_state.serialize_all(),
            datasets=datasets,
            analyses=self.state.analysis_state.serialize_all(),
            compound_analyses=self.state.compound_analysis_state.serialize_all(
            ),
        )
        log_message(msg)
        # FIXME: don't broadcast, only send to the websocket that matches this HTTP connection
        # (is this even possible?)
        self.event_registry.broadcast_event(msg)
        await self.engine.send_existing_job_results()
        self.write({
            "status": "ok",
            "connection": connection,
        })
예제 #6
0
def test_start_local_cpuonly(hdf5_ds_1):
    # We don't use all since that might be too many
    cpus = (0, 1)
    hdf5_ds_1.set_num_cores(len(cpus))
    mask = _mk_random(size=(16, 16))
    with hdf5_ds_1.get_reader().get_h5ds() as h5ds:
        data = h5ds[:]
        expected = _naive_mask_apply([mask], data)

    spec = cluster_spec(cpus=cpus, cudas=(), has_cupy=False)
    with DaskJobExecutor.make_local(spec=spec) as executor:
        ctx = api.Context(executor=executor)
        analysis = ctx.create_mask_analysis(
            dataset=hdf5_ds_1, factories=[lambda: mask]
        )
        results = ctx.run(analysis)
        udf_res = ctx.run_udf(udf=DebugDeviceUDF(), dataset=hdf5_ds_1)
        # No CuPy resources
        with pytest.raises(RuntimeError):
            _ = ctx.run_udf(udf=DebugDeviceUDF(backends=('cupy',)), dataset=hdf5_ds_1)

    assert np.allclose(
        results.mask_0.raw_data,
        expected
    )
    found = {}

    for val in udf_res['device_id'].data[0].values():
        print(val)
        # no CUDA
        assert val["cuda"] is None
        found[val["cpu"]] = True

    for val in udf_res['backend'].data[0].values():
        print(val)
        # no CUDA
        assert 'numpy' in val

    # Each CPU got work. We have to see if this
    # actually works always since this depends on the scheduler behavior
    assert set(found.keys()) == set(cpus)

    assert np.all(udf_res['device_class'].data == 'cpu')
    assert np.allclose(udf_res['on_device'].data, data.sum(axis=(0, 1)))
예제 #7
0
def test_preload(hdf5_ds_1):
    # We don't use all since that might be too many
    cpus = (0, 1)
    hdf5_ds_1.set_num_cores(len(cpus))

    class CheckEnvUDF(NoOpUDF):
        def process_tile(self, tile):
            assert os.environ['LT_TEST_1'] == 'hello'
            assert os.environ['LT_TEST_2'] == 'world'

    preloads = (
        "import os; os.environ['LT_TEST_1'] = 'hello'",
        "import os; os.environ['LT_TEST_2'] = 'world'",
    )

    spec = cluster_spec(cpus=cpus, cudas=(), has_cupy=False, preload=preloads)
    with DaskJobExecutor.make_local(spec=spec) as executor:
        ctx = api.Context(executor=executor)
        ctx.run_udf(udf=CheckEnvUDF(), dataset=hdf5_ds_1)
예제 #8
0
def test_start_local_cudaonly(hdf5_ds_1):
    cudas = detect()['cudas']
    # Make sure we have enough partitions
    hdf5_ds_1.set_num_cores(len(cudas))
    with hdf5_ds_1.get_reader().get_h5ds() as h5ds:
        data = h5ds[:]

    spec = cluster_spec(cpus=(), cudas=cudas, has_cupy=False)
    with DaskJobExecutor.make_local(spec=spec) as executor:
        ctx = api.Context(executor=executor)
        udf_res = ctx.run_udf(udf=DebugDeviceUDF(backends=('cuda', )),
                              dataset=hdf5_ds_1)
        # No CPU compute resources
        with pytest.raises(RuntimeError):
            _ = ctx.run_udf(udf=DebugDeviceUDF(backends=('numpy', )),
                            dataset=hdf5_ds_1)
        # No ndarray (CuPy) resources
        with pytest.raises(RuntimeError):
            _ = ctx.run_udf(udf=DebugDeviceUDF(backends=('cupy', )),
                            dataset=hdf5_ds_1)

    found = {}

    for val in udf_res['device_id'].data[0].values():
        print(val)
        # no CPU
        assert val["cpu"] is None
        # Register which GPUs got work
        found[val["cuda"]] = True

    for val in udf_res['backend'].data[0].values():
        print(val)
        # CUDA, but no CuPy, i.e. use NumPy
        assert 'numpy' in val

    # Test if each GPU got work. We have to see if this
    # actually works always since this depends on the scheduler behavior
    assert set(found.keys()) == set(cudas)

    assert np.all(udf_res['device_class'].data == 'cuda')
    assert np.allclose(udf_res['on_device'].data, data.sum(axis=(0, 1)))