def test_cartesian_blocks(): test_size = (20, 20) a = np.ones(test_size) blocks = Blocks(test_size, shape_block=(3, 7), padding=(1, 2)) for idx, block in blocks.slices(): a[block] = 0 np.testing.assert_array_equal(a, np.zeros(test_size))
def dff(dataset: SplitDataset, baseline_stack, output_dir=None, n_jobs=20, **kwargs): """Calculates change over baseline :param dataset: :param baseline_stack: F stack for the (F_i - F) / F calculation :param output_dir: :param n_jobs: :return: """ old_dataset = Blocks(shape_full=dataset.shape, shape_block=dataset.shape_block) new_dataset = EmptySplitDataset( root=output_dir or dataset.root.parent, name="dff", shape_full=dataset.shape, shape_block=dataset.shape_block, ) Parallel(n_jobs=n_jobs)(delayed(_dff)( dataset, old_block, str(new_dataset.root / new_dataset.files[i_block]), baseline_stack, **kwargs, ) for i_block, (_, old_block) in enumerate(old_dataset.slices( as_tuples=True))) return new_dataset.finalize()
def downsample( dataset: SplitDataset, downsampling=(1, 1, 2, 2), proc_block_shape=None, crop=None, output_dir=None, n_jobs=20, method=np.sum, ): """Downsamples a dataset :param dataset: :param downsampling: tuple of 4 (original, original, ds_factor, ds_factor) :param crop: :param output_dir: :param n_jobs: :param method: :return: """ crop = crop or tuple((0, 0) for _ in dataset.shape) old_dataset = Blocks( shape_full=dataset.shape, crop=crop, shape_block=proc_block_shape if proc_block_shape else dataset.shape_block, ) shape_downsampled = tuple( sc // ds for sc, ds in zip(old_dataset.shape_cropped, downsampling)) block_size_downsampled = tuple( sb // ds for (sb, ds) in zip(old_dataset.shape_block, downsampling)) new_dataset = EmptySplitDataset( root=output_dir or dataset.root.parent, name="downsampled", shape_full=shape_downsampled, shape_block=block_size_downsampled, ) Parallel(n_jobs=n_jobs)(delayed(_downsample_block)( dataset, old_block, str(new_dataset.root / new_dataset.files[i_block]), downsampling, method, ) for i_block, (_, old_block) in enumerate(old_dataset.slices( as_tuples=True))) return new_dataset.finalize()
def run_in_blocks(function, dataset: SplitDataset, *extra_args, per_block_args=None, output_dir=None, output_shape_full=None, output_shape_block=None, process_shape_block=None, n_jobs=20, output_name=None, **kwargs): """ Runs a function over a split dataset in parallel :param function: the function to be applied (e.g. delta f over f or regression) :param dataset: the split dataset :param extra_args: the other positional arguments to the function :param per_block_args: a dictionary or list of extra arguments :param output_dir: (optional) the output directory :param output_shape_full: the output shape, if it will be different\ :param process_shape_block: the size of block to process :param output_shape_block: the output block size, if different :param n_jobs: number of jobs to parallelize to :param output_name: the name of the output dataset, the function name is used if left blank :param kwargs: extra keyword arguments to the function :return: the processed dataset """ # TODO avoid duplication of execution on first block # TODO figure out output_shape_full process_shape_block = process_shape_block or dataset.shape_block # Automatically determine the output shape processing_blocks = Blocks(shape_full=dataset.shape_full, shape_block=process_shape_block) _, new_block = list(processing_blocks.slices(as_tuples=True))[0] if output_shape_block is None: processed = function( dataset[Blocks.block_to_slices(new_block)], *extra_args, *([] if per_block_args is None else per_block_args[0]), **kwargs) output_shape_block = processed.shape new_dataset = EmptySplitDataset( root=output_dir or dataset.root.parent, name=output_name or function.__name__, shape_full=output_shape_full or dataset.shape, shape_block=output_shape_block or process_shape_block, resolution=dataset.resolution, ) def wrap_function(ds, *args, filename, new_block, **kwargs): original = ds[Blocks.block_to_slices(new_block)] processed = function(original, *args, **kwargs) fl.save(filename, {"stack_{}D".format(processed.ndim): processed}) Parallel(n_jobs=n_jobs)(delayed(wrap_function)( dataset, *extra_args, *([] if per_block_args is None else per_block_args[i_block]), new_block=new_block, filename=str(new_dataset.root / new_dataset.files[i_block]), **kwargs) for i_block, (( _, new_block)) in enumerate(processing_blocks.slices(as_tuples=True))) return new_dataset.finalize()