def test_subframe_tiles(): data = np.random.choice(a=[0, 1], size=(16, 16, 16, 16)) mask = np.random.choice(a=[0, 1], size=(16, 16)) expected = _naive_mask_apply([mask], data) mask_factories = [ lambda: mask, ] dataset = MemoryDataSet(data=data, tileshape=(1, 1, 4, 4), partition_shape=(16, 16, 16, 16)) job = ApplyMasksJob(dataset=dataset, mask_factories=mask_factories) part = next(dataset.get_partitions()) executor = InlineJobExecutor() result = np.zeros((1, 16, 16)) for tiles in executor.run_job(job): for tile in tiles: tile.copy_to_result(result) print(part.shape) print(expected) print(result) assert np.allclose( result, expected )
def test_apply_mask_job(default_k2is, lt_ctx): mask = np.ones((1860, 2048)) tileshape = Shape( (16, 930, 16), sig_dims=2, ) tiling_scheme = TilingScheme.make_for_shape( tileshape=tileshape, dataset_shape=default_k2is.shape, ) job = ApplyMasksJob( dataset=default_k2is, mask_factories=[lambda: mask], tiling_scheme=tiling_scheme, ) out = job.get_result_buffer() executor = InlineJobExecutor() for tiles in executor.run_job(job): for tile in tiles: tile.reduce_into_result(out) results = lt_ctx.run(job) assert results[0].shape == (34 * 35, ) # there should be _something_ in each result pixel for px in results[0].reshape((-1, )): assert not np.isclose(px, 0)
def do_com(fn, tileshape): ds = H5DataSet( path=fn, ds_path="data", tileshape=tileshape, target_size=512*1024*1024, ) masks = [ # summation of all pixels: lambda: np.ones(shape=ds.shape[2:]), # gradient from left to right lambda: gradient_x(*ds.shape[2:]), # gradient from top to bottom lambda: gradient_y(*ds.shape[2:]), ] job = ApplyMasksJob(dataset=ds, mask_factories=masks) print(job.masks.computed_masks) print("\n\n") executor = InlineJobExecutor() full_result = np.zeros(shape=(3,) + ds.shape[:2]) color = np.zeros(shape=(3,) + ds.shape[:2]) for result in executor.run_job(job): for tile in result: print(tile) print(tile.data[0]) color[tile.tile_slice.get()[:2]] += 1 tile.copy_to_result(full_result) x_centers = np.divide(full_result[1], full_result[0]) y_centers = np.divide(full_result[2], full_result[0]) print(color) return full_result, x_centers, y_centers
def test_run_each_worker_1(): def fn1(): return "some result" executor = InlineJobExecutor() results = executor.run_each_worker(fn1) assert len(results.keys()) == 1 assert len(results.keys()) == len(executor.get_available_workers()) k = next(iter(results)) result0 = results[k] assert result0 == "some result" assert k == "inline"
def test_apply_mask_on_raw_job(default_blo, lt_ctx): mask = np.ones((144, 144)) job = ApplyMasksJob(dataset=default_blo, mask_factories=[lambda: mask]) out = job.get_result_buffer() executor = InlineJobExecutor() for tiles in executor.run_job(job): for tile in tiles: tile.reduce_into_result(out) results = lt_ctx.run(job) assert results[0].shape == (90 * 121, )
def test_apply_mask_on_empad_job(default_empad, lt_ctx): mask = np.ones((128, 128)) job = ApplyMasksJob(dataset=default_empad, mask_factories=[lambda: mask]) out = job.get_result_buffer() executor = InlineJobExecutor() for tiles in executor.run_job(job): for tile in tiles: tile.reduce_into_result(out) results = lt_ctx.run(job) assert results[0].shape == (4 * 4,) assert np.count_nonzero(results[0]) > 0
def main(): # Set a plot class for Digital Micrograph with api.Context(executor=InlineJobExecutor(), plot_class=GMSLive2DPlot) as ctx: ds = ctx.load("RAW", path=r"C:\Users\Dieter\testfile-32-32-32-32-float32.raw", nav_shape=(32, 32), sig_shape=(32, 32), dtype=np.float32) sum_udf = SumUDF() ring_udf = ApplyMasksUDF(mask_factories=[ functools.partial( ring, centerX=16, centerY=16, imageSizeX=32, imageSizeY=32, radius=15, radius_inner=11, ) ]) ctx.run_udf(dataset=ds, udf=[sum_udf, ring_udf], plots=True)
def __init__(self, path: str, continuous=False, rois=None, max_runs=-1): """ Parameters ---------- path Path to the HDR file continuous If set to True, will continuously output data rois: List[np.ndarray] If a list of ROIs is given, in continuous mode, cycle through these ROIs from the source data max_runs: int Maximum number of continuous runs """ if rois is None: rois = [] if not path.lower().endswith(".hdr"): raise ValueError("please pass the path to the HDR file!") self._path = path self._continuous = continuous self._rois = rois self._ctx = Context(executor=InlineJobExecutor()) self._ds = None self._max_runs = max_runs self._mmaps = {}
def chunked_emd(tmpdir_factory): lt_ctx = Context(executor=InlineJobExecutor()) datadir = tmpdir_factory.mktemp('hdf5_chunked_data') filename = os.path.join(datadir, 'chunked.emd') chunks = (32, 32, 128, 128) with h5py.File(filename, mode="w") as f: f.attrs.create('version_major', 0) f.attrs.create('version_minor', 2) f.create_group('experimental/science_data') group = f['experimental/science_data'] group.attrs.create('emd_group_type', 1) data = np.ones((256, 256, 128, 128), dtype=np.float32) group.create_dataset(name='data', data=data, chunks=chunks) group.create_dataset(name='dim1', data=range(256)) group['dim1'].attrs.create('name', b'dim1') group['dim1'].attrs.create('units', b'units1') group.create_dataset(name='dim2', data=range(256)) group['dim2'].attrs.create('name', b'dim2') group['dim2'].attrs.create('units', b'units2') group.create_dataset(name='dim3', data=range(128)) group['dim3'].attrs.create('name', b'dim3') group['dim3'].attrs.create('units', b'units3') group.create_dataset(name='dim4', data=range(128)) group['dim4'].attrs.create('name', b'dim4') group['dim4'].attrs.create('units', b'units4') f.close() yield lt_ctx.load("auto", path=filename, ds_path="/experimental/science_data/data")
def test_detect_fail(): executor = InlineJobExecutor() # does not exist: assert not EMPADDataSet.detect_params("/does/not/exist.raw", executor=executor) # exists but we can't detect any parameters (and we don't know if it even is an EMPAD file) assert not EMPADDataSet.detect_params(EMPAD_RAW, executor=executor)
def default_empad(): executor = InlineJobExecutor() ds = EMPADDataSet( path=EMPAD_XML, ) ds = ds.initialize(executor) yield ds
def test_ssb_rotate(): ctx = lt.Context(executor=InlineJobExecutor()) dtype = np.float64 scaling = 4 det = 45 shape = (29, 30, det, det) # ? shape = np.random.uniform(1, 300, (4,1,)) # The acceleration voltage U in keV U = 300 lamb = wavelength(U) # STEM pixel size in m, here 50 STEM pixels on 0.5654 nm dpix = 0.5654 / 50 * 1e-9 # STEM semiconvergence angle in radians semiconv = 25e-3 # Diameter of the primary beam in the diffraction pattern in pixels semiconv_pix = 78.6649 / scaling cy = det // 2 cx = det // 2 input_data = (np.random.uniform(0, 1, np.prod(shape)) * np.linspace(1.0, 1000.0, num=np.prod(shape))) input_data = input_data.astype(np.float64).reshape(shape) data_90deg = np.zeros_like(input_data) # Rotate 90 degrees clockwise for y in range(det): for x in range(det): data_90deg[:, :, x, det - 1 - y] = input_data[:, :, y, x] udf = SSB_UDF(lamb=lamb, dpix=dpix, semiconv=semiconv, semiconv_pix=semiconv_pix, dtype=dtype, cy=cy, cx=cx, transformation=rotate_deg(-90.)) dataset = MemoryDataSet( data=data_90deg, tileshape=(20, shape[2], shape[3]), num_partitions=2, sig_dims=2, ) result = ctx.run_udf(udf=udf, dataset=dataset) result_f, _ = reference_ssb(input_data, U=U, dpix=dpix, semiconv=semiconv, semiconv_pix=semiconv_pix, cy=cy, cx=cx) assert np.allclose(result['pixels'].data, result_f)
def hdf5_ds_large_sig(random_hdf5): ds = H5DataSet( path=random_hdf5.filename, ds_path="data", ) ds = ds.initialize(InlineJobExecutor()) return ds
def hdf5_ds_1(hdf5): ds = H5DataSet( path=hdf5.filename, ds_path="data", ) ds = ds.initialize(InlineJobExecutor()) return ds
def default_blo(): ds = BloDataSet( path=str(BLO_TESTDATA_PATH), tileshape=(1, 8, 144, 144), ) ds.initialize(InlineJobExecutor()) return ds
def test_ssb(): ctx = lt.Context(executor=InlineJobExecutor()) dtype = np.float64 scaling = 4 shape = (29, 30, 189 // scaling, 197 // scaling) # ? shape = np.random.uniform(1, 300, (4,1,)) # The acceleration voltage U in keV U = 300 # STEM pixel size in m, here 50 STEM pixels on 0.5654 nm dpix = 0.5654 / 50 * 1e-9 # STEM semiconvergence angle in radians semiconv = 25e-3 # Diameter of the primary beam in the diffraction pattern in pixels semiconv_pix = 78.6649 / scaling cy = 93 // scaling cx = 97 // scaling input_data = np.random.uniform(0, 1, shape) LG = np.linspace(1.0, 1000.0, num=shape[0] * shape[1] * shape[2] * shape[3]) LG = LG.reshape(shape[0], shape[1], shape[2], shape[3]) input_data = input_data * LG input_data = input_data.astype(np.float64) udf = SSB_UDF(U=U, dpix=dpix, semiconv=semiconv, semiconv_pix=semiconv_pix, dtype=dtype, cy=cy, cx=cx) dataset = MemoryDataSet( data=input_data, tileshape=(20, shape[2], shape[3]), num_partitions=2, sig_dims=2, ) result = ctx.run_udf(udf=udf, dataset=dataset) result_f, _, _ = reference_ssb(input_data, U=U, dpix=dpix, semiconv=semiconv, semiconv_pix=semiconv_pix, cy=cy, cx=cx) # atol = np.max(np.abs(result_f))*0.009 # print(np.max(np.abs(np.abs(result['pixels']) - np.abs(result_f)))) assert np.allclose(np.abs(result['pixels']), np.abs(result_f))
def create_random_hdf5(path): with h5py.File(path, 'w') as f: sample_data = np.random.randn(16, 16, 16, 16).astype("float32") f.create_dataset("data", (16, 16, 16, 16), data=sample_data) # read and provide the ds ds = H5DataSet(path=path, ds_path='data') ds = ds.initialize(InlineJobExecutor()) return ds
def test_nonexistent(): ds = EMPADDataSet( path="/does/not/exist.raw", scan_size=(4, 4), ) with pytest.raises(DataSetException) as einfo: ds = ds.initialize(InlineJobExecutor()) assert einfo.match("No such file or directory")
def test_nonexistent(): ds = EMPADDataSet( path="/does/not/exist.raw", nav_shape=(4, 4), ) with pytest.raises(DataSetException) as einfo: ds = ds.initialize(InlineJobExecutor()) assert einfo.match("could not open file /does/not/exist.raw")
def test_apply_mask_on_raw_job(default_raw, lt_ctx): mask = np.ones((128, 128)) job = ApplyMasksJob(dataset=default_raw, mask_factories=[lambda: mask]) out = job.get_result_buffer() executor = InlineJobExecutor() for tiles in executor.run_job(job): for tile in tiles: tile.reduce_into_result(out) results = lt_ctx.run(job) # FIXME: should the result here be 1D or 2D? # currently, for inherently 4D datasets it is 2D, and for 3D datasets # it is 1D. make this consistent? assert results[0].shape == (16, 16)
def test_ssb(dpix, backend, n_threads): lt_ctx = lt.Context(InlineJobExecutor(debug=True, inline_threads=n_threads)) try: if backend == 'cupy': set_use_cuda(0) dtype = np.float64 scaling = 4 shape = (29, 30, 189 // scaling, 197 // scaling) # The acceleration voltage U in keV U = 300 lamb = wavelength(U) # STEM semiconvergence angle in radians semiconv = 25e-3 # Diameter of the primary beam in the diffraction pattern in pixels semiconv_pix = 78.6649 / scaling cy = 93 // scaling cx = 97 // scaling input_data = ( np.random.uniform(0, 1, np.prod(shape)) * np.linspace(1.0, 1000.0, num=np.prod(shape)) ) input_data = input_data.astype(np.float64).reshape(shape) udf = SSB_UDF(lamb=lamb, dpix=dpix, semiconv=semiconv, semiconv_pix=semiconv_pix, dtype=dtype, cy=cy, cx=cx, method='subpix') dataset = MemoryDataSet( data=input_data, tileshape=(20, shape[2], shape[3]), num_partitions=2, sig_dims=2, ) result = lt_ctx.run_udf(udf=udf, dataset=dataset) result_f, reference_masks = reference_ssb(input_data, U=U, dpix=dpix, semiconv=semiconv, semiconv_pix=semiconv_pix, cy=cy, cx=cx) task_data = udf.get_task_data() udf_masks = task_data['masks'].computed_masks half_y = shape[0] // 2 + 1 # Use symmetry and reshape like generate_masks() reference_masks = reference_masks[:half_y].reshape((half_y*shape[1], shape[2], shape[3])) print(np.max(np.abs(udf_masks.todense() - reference_masks))) print(np.max(np.abs(result['fourier'].data - result_f))) assert np.allclose(result['fourier'].data, result_f) backwards = result['amplitude'].data**2 * np.exp(1j*result['phase'].data) assert np.allclose(result['fourier'].data, np.fft.fft2(backwards)) finally: if backend == 'cupy': set_use_cpu(0)
def test_detect(): params = K2ISDataSet.detect_params(K2IS_TESTDATA_PATH, InlineJobExecutor())["parameters"] assert params == { "path": K2IS_TESTDATA_PATH, "nav_shape": (34, 35), "sig_shape": (1860, 2048), "sync_offset": 250 }
def medium_raw_float32(medium_raw_file_float32): filename, shape, dtype = medium_raw_file_float32 ds = RawFileDataSet(path=str(filename), nav_shape=shape[:2], dtype=dtype, sig_shape=shape[2:], io_backend=MMapBackend()) ds = ds.initialize(InlineJobExecutor()) yield ds
def test_threads_per_worker(default_raw, dask_executor): ctx = Context(executor=dask_executor) inline_ctx = Context(executor=InlineJobExecutor()) res = ctx.run_udf(dataset=default_raw, udf=ThreadsPerWorkerUDF())['num_threads'] res_inline = inline_ctx.run_udf(dataset=default_raw, udf=ThreadsPerWorkerUDF())['num_threads'] assert np.allclose(res, 1) assert np.allclose(res_inline, psutil.cpu_count(logical=False))
async def test_prime_cache( shared_state, default_raw, base_url, http_client, server_port, local_cluster_url, default_token, ): # first, connect to get the state await create_connection(base_url, http_client, local_cluster_url, default_token) executor = InlineJobExecutor() pool = AsyncAdapter.make_pool() executor = AsyncAdapter(wrapped=executor, pool=pool) conn_details = { 'connection': { 'type': 'local', 'numWorkers': 1, 'cudas': [], } } await shared_state.executor_state.set_executor(executor, conn_details) raw_path = default_raw._path uuid = "ae5d23bd-1f2a-4c57-bab2-dfc59a1219f3" ds_url = "{}/api/datasets/{}/?token={}".format( base_url, uuid, default_token, ) ds_data = _get_raw_params(raw_path) # connect to ws endpoint: ws_url = f"ws://127.0.0.1:{server_port}/api/events/?token={default_token}" async with websockets.connect(ws_url) as ws: initial_msg = json.loads(await ws.recv()) assert_msg(initial_msg, 'INITIAL_STATE') async with http_client.put(ds_url, json=ds_data) as resp: assert resp.status == 200 resp_json = await resp.json() assert_msg(resp_json, 'CREATE_DATASET') async with websockets.connect(ws_url) as ws: initial_msg = json.loads(await ws.recv()) assert_msg(initial_msg, 'INITIAL_STATE') assert initial_msg["jobs"] == [] assert len(initial_msg["datasets"]) == 1 assert initial_msg["datasets"][0]["id"] == uuid assert initial_msg["datasets"][0]["params"] == { 'sig_shape': [128, 128], "enable_direct": False, 'dtype': 'float32', 'path': raw_path, 'nav_shape': [16, 16], 'shape': [16, 16, 128, 128], 'type': 'RAW', 'sync_offset': 0 } assert len(initial_msg["datasets"][0]["diagnostics"]) == 6
def test_ssb_roi(): ctx = lt.Context(executor=InlineJobExecutor()) dtype = np.float64 scaling = 4 shape = (29, 30, 189 // scaling, 197 // scaling) # ? shape = np.random.uniform(1, 300, (4,1,)) # The acceleration voltage U in keV U = 300 lamb = wavelength(U) # STEM pixel size in m, here 50 STEM pixels on 0.5654 nm dpix = 0.5654 / 50 * 1e-9 # STEM semiconvergence angle in radians semiconv = 25e-3 # Diameter of the primary beam in the diffraction pattern in pixels semiconv_pix = 78.6649 / scaling cy = 93 // scaling cx = 97 // scaling input_data = (np.random.uniform(0, 1, np.prod(shape)) * np.linspace(1.0, 1000.0, num=np.prod(shape))) input_data = input_data.astype(np.float64).reshape(shape) udf = SSB_UDF(lamb=lamb, dpix=dpix, semiconv=semiconv, semiconv_pix=semiconv_pix, dtype=dtype, cy=cy, cx=cx) dataset = MemoryDataSet( data=input_data, tileshape=(20, shape[2], shape[3]), num_partitions=2, sig_dims=2, ) roi_1 = np.random.choice([True, False], shape[:2]) roi_2 = np.invert(roi_1) result_1 = ctx.run_udf(udf=udf, dataset=dataset, roi=roi_1) result_2 = ctx.run_udf(udf=udf, dataset=dataset, roi=roi_2) result_f, _ = reference_ssb(input_data, U=U, dpix=dpix, semiconv=semiconv, semiconv_pix=semiconv_pix, cy=cy, cx=cx) assert np.allclose(result_1['pixels'].data + result_2['pixels'].data, result_f)
def large_raw(large_raw_file): filename, shape, dtype = large_raw_file ds = RawFileDataSet( path=str(filename), nav_shape=shape[:2], dtype=dtype, sig_shape=shape[2:], ) ds = ds.initialize(InlineJobExecutor()) yield ds
def test_detection_nonempty_hdf5(hdf5_ds_1): executor = InlineJobExecutor() fn = hdf5_ds_1.path params = detect(fn, executor=executor) assert params != {} assert params["ds_path"] == "data" assert params["path"] == fn assert params["tileshape"] == (1, 8, 16, 16) assert params["type"] == "hdf5" assert list(params.keys()) == ["path", "ds_path", "tileshape", "type"]
def test_detection_nonempty_hdf5(hdf5_ds_1): executor = InlineJobExecutor() fn = hdf5_ds_1.path params = detect(fn, executor=executor) parameters = params["parameters"] assert parameters != {} assert parameters["ds_path"] == "data" assert parameters["path"] == fn assert params["type"] == "hdf5" assert list(parameters.keys()) == ["path", "ds_path"]
def test_invalid_size(): ds = EMPADDataSet( path=EMPAD_RAW, scan_size=(4, 5), ) ds = ds.initialize(InlineJobExecutor()) with pytest.raises(DataSetException) as einfo: ds.check_valid() assert einfo.match("invalid filesize")