def _exclude_border(mask, exclude_border): """ Helper function to remove peaks near the borders """ # if a scalar is provided, expand it to the mask image dimension exclude_border = (exclude_border,) * mask.ndim if np.isscalar( exclude_border) \ else exclude_border # if the wrong size sequence is provided, raise an error if len(exclude_border) != mask.ndim: raise ValueError("exclude_border has to be boolean, int scalar\ or a sequence of length: number of dimensions of the image") # build a filter for the border by zero-padding a center dask array of ones center_dim = tuple(np.subtract(mask.shape, [2 * i for i in exclude_border])) borders = tuple([(i, ) * 2 for i in exclude_border]) border_filter = da.pad(da.ones(center_dim), borders, 'constant') assert border_filter.shape == mask.shape # filter the input mask by the border filter return mask * border_filter
def _rmatvec(self, x): # apply forward fft x = da.reshape(x, self.dimsd) y = sqrt(1. / self.nt) * da.fft.rfft(x, n=self.nt, axis=0) y = y.astype(self.cdtype) y = y[:self.nfmax] # apply batched matrix mult y = y.rechunk((self.G.chunks[0], self.nr, self.nv)) if self.saveGt: if self.conj: y = y.conj() y = da.matmul(self.GT, y) if self.conj: y = y.conj() else: if self.conj: y = da.matmul(y.transpose(0, 2, 1), self.G).transpose(0, 2, 1) else: y = da.matmul(y.transpose(0, 2, 1).conj(), self.G).transpose(0, 2, 1).conj() if not self.prescaled: y *= self.dr * self.dt * np.sqrt(self.nt) # apply inverse fft y = da.pad(y, ((0, self.nfft - self.nfmax), (0, 0), (0, 0)), mode='constant') y = y.rechunk(self.dimsdf) y = sqrt(self.nt) * da.fft.irfft(y, n=self.nt, axis=0) if self.twosided: y = da.fft.fftshift(y, axes=0) y = y.astype(self.dtype) y = da.real(y) return y.ravel()
def weight_block(block, blocksize, block_info=None): """ """ # compute fixed overlap size overlaps = np.array([int(round(x / 8)) for x in blocksize]) # determine which faces need linear weighting core_shape = [] pads = [] block_index = block_info[0]['chunk-location'] block_grid = block_info[0]['num-chunks'] for i in range(3): p, bl = overlaps[i], blocksize[i] bi, bg = block_index[i], block_grid[i] pad, core = [2 * p + 1, 2 * p + 1], bl - 2 * p if bi == 0: pad[0], core = 0, core + 2 * p + 1 if bi == bg - 1: pad[1], core = 0, core + 2 * p + 1 pads.append(tuple(pad)) core_shape.append(core) # create weights weights = da.ones(core_shape, dtype=np.float32) weights = da.pad(weights, pads, mode='linear_ramp', end_values=0) weights = weights[1:-1, 1:-1, 1:-1] weights = weights.reshape(weights.shape + (1, )) # multiply data by weights and return return da.multiply(block, weights)
def zero_pad(arr, shape, chunks): """Zero pad an array with zeros Args: arr: the array to pad shape: the shape of the new array chunks: how to rechunk the new array Returns: the new padded version of the array >>> print( ... zero_pad( ... np.arange(4).reshape([1, 2, 2, 1]), ... (1, 4, 5, 1), ... None ... )[0,...,0].compute() ... ) [[0 0 0 0 0] [0 0 0 1 0] [0 0 2 3 0] [0 0 0 0 0]] >>> print(zero_pad(np.arange(4).reshape([2, 2]), (4, 5), None).compute()) [[0 0 0 0 0] [0 0 0 1 0] [0 0 2 3 0] [0 0 0 0 0]] >>> zero_pad(zero_pad(np.arange(4).reshape([2, 2]), (4, 5, 1), None)) Traceback (most recent call last): ... RuntimeError: length of shape is incorrect >>> zero_pad(zero_pad(np.arange(4).reshape([2, 2]), (1, 2), None)) Traceback (most recent call last): ... RuntimeError: resize shape is too small >>> arr = da.from_array(np.arange(4).reshape((2, 2)), chunks=(2, 1)) >>> out = zero_pad(arr, (4, 3), (-1, 1)) >>> out.shape (4, 3) >>> out.chunks ((4,), (1, 1, 1)) """ if len(shape) != len(arr.shape): raise RuntimeError("length of shape is incorrect") if not np.all(shape >= arr.shape): raise RuntimeError("resize shape is too small") return pipe( np.array(shape) - np.array(arr.shape), lambda x: np.concatenate( ((x - (x // 2))[..., None], (x // 2)[..., None]), axis=1 ), fmap(tuple), tuple, lambda x: da.pad(arr, x, "constant", constant_values=0), lambda x: da.rechunk(x, chunks=chunks or x.shape), )
def test_pad(shape, chunks, pad_width, mode, kwargs): np_a = np.random.random(shape) da_a = da.from_array(np_a, chunks=chunks) np_r = np.pad(np_a, pad_width, mode, **kwargs) da_r = da.pad(da_a, pad_width, mode, **kwargs) assert_eq(np_r, da_r)
def test_pad_3d_data(dtype, pad_widths, mode): np_a = np.arange(2 * 3 * 4).reshape(2, 3, 4).astype(dtype) da_a = da.from_array(np_a, chunks="auto") np_r = np.pad(np_a, pad_widths, mode=mode) da_r = da.pad(da_a, pad_widths, mode=mode) assert_eq(np_r, da_r)
def test_pad(shape, chunks, pad_width, mode, kwargs): np_a = np.random.random(shape) da_a = da.from_array(np_a, chunks=chunks) np_r = np.pad(np_a, pad_width, mode, **kwargs) da_r = da.pad(da_a, pad_width, mode, **kwargs) assert_eq(np_r, da_r)
def weight_block(block, blocksize): """ """ overlaps = np.array([int(round(x/8)) for x in blocksize]) weights = da.ones(blocksize - 2*overlaps, dtype=np.float32) pads = [(2*p, 2*p) for p in overlaps] weights = da.pad(weights, pads, mode='linear_ramp', end_values=0) weights = weights.reshape(weights.shape + (1,)) return da.multiply(block, weights)
def _rmatvec(self, x): if not self.inplace: x = x.copy() if self.shape[0] == self.shape[1]: y = x elif self.shape[0] < self.shape[1]: y = da.pad(x, (0, self.shape[1] - self.shape[0]), mode='constant') else: y = x[:self.shape[1]] return y
def partition(image, folder): # create a dask array from the image in chunks (31 x 150) image_da = da.from_array(image, chunks = (windowSize,image.shape[1])) # padding the array before and after with 15 pixels image_pad = da.pad(image_da, windowSize//2, mode='constant') for i in range(0,windowSize): row = str(i) block_i = image_pad[i:,:] block_i_da = da.rechunk(block_i, chunks=(windowSize,image_pad.shape[1])) block_i_da.map_blocks(block2row, dtype=int, row=row, folder=folder).compute()
def partition(image, folder): image_da = da.from_array(image, chunks=(windowSize, image.shape[1])) image_pad = da.pad(image_da, windowSize // 2, mode='constant') for i in range(0, windowSize): row = str(i) block_i = image_pad[i:, :] block_i_da = da.rechunk(block_i, chunks=(windowSize, image_pad.shape[1])) block_i_da.map_blocks(block2row, dtype=int, row=row, folder=folder).compute()
def test_pad(shape, chunks, pad_width, mode, kwargs): np_a = np.random.random(shape) da_a = da.from_array(np_a, chunks=chunks) np_r = np.pad(np_a, pad_width, mode, **kwargs) da_r = da.pad(da_a, pad_width, mode, **kwargs) if mode == "empty": # empty pads lead to undefined values which may be different assert_eq(np_r[pad_width:-pad_width], da_r[pad_width:-pad_width]) else: assert_eq(np_r, da_r)
def pad(array, pad_width, mode="constant", **kwargs): padded = da.pad(array, pad_width, mode=mode, **kwargs) # workaround for inconsistency between numpy and dask: https://github.com/dask/dask/issues/5303 if mode == "mean" and issubclass(array.dtype.type, np.integer): warnings.warn( 'dask.array.pad(mode="mean") converts integers to floats. xarray converts ' "these floats back to integers to keep the interface consistent. There is a chance that " "this introduces rounding errors. If you wish to keep the values as floats, first change " "the dtype to a float before calling pad.", UserWarning, ) return da.round(padded).astype(array.dtype) _validate_pad_output_shape(array.shape, pad_width, padded.shape) return padded
def test_pad(shape, chunks, pad_width, mode, kwargs): np_a = np.random.random(shape) da_a = da.from_array(cupy.array(np_a), chunks=chunks) np_r = np.pad(np_a, pad_width, mode, **kwargs) da_r = da.pad(da_a, pad_width, mode, **kwargs) assert isinstance(da_r._meta, cupy.ndarray) assert isinstance(da_r.compute(), cupy.ndarray) if mode == "empty": # empty pads lead to undefined values which may be different assert_eq(np_r[pad_width:-pad_width], da_r[pad_width:-pad_width], check_type=False) else: assert_eq(np_r, da_r, check_type=False)
def test_pad_udf(kwargs): def udf_pad(vector, pad_width, iaxis, kwargs): scaler = kwargs.get("scaler", 1) vector[:pad_width[0]] = -scaler * pad_width[0] vector[-pad_width[1]:] = scaler * pad_width[1] return vector shape = (10, 11) chunks = (4, 5) pad_width = ((1, 2), (2, 3)) np_a = np.random.random(shape) da_a = da.from_array(np_a, chunks=chunks) np_r = np.pad(np_a, pad_width, udf_pad, kwargs=kwargs) da_r = da.pad(da_a, pad_width, udf_pad, kwargs=kwargs) assert_eq(np_r, da_r)
def test_pad_udf(kwargs): def udf_pad(vector, pad_width, iaxis, kwargs): scaler = kwargs.get("scaler", 1) vector[:pad_width[0]] = -scaler * pad_width[0] vector[-pad_width[1]:] = scaler * pad_width[1] return vector shape = (10, 11) chunks = (4, 5) pad_width = ((1, 2), (2, 3)) np_a = np.random.random(shape) da_a = da.from_array(np_a, chunks=chunks) np_r = np.pad(np_a, pad_width, udf_pad, kwargs=kwargs) da_r = da.pad(da_a, pad_width, udf_pad, kwargs=kwargs) assert_eq(np_r, da_r)
def register_stack(data, sigma=5, fftsize=256, dE=10, min_norm=0.15): """Top level convenience function to register a stack of images. `data` should be a stack of images stacked along axis 0 in the form of anything convertible to a dask array by `da.asarray()`. Quick and dirty function, should only be used for small stacks, as not all parameters are exposed, in particular strides/interpolation are unavailable. """ data = da.asarray(data, chunks=(dE, -1, -1)) sobel = crop_and_filter(data.rechunk({0: dE}), sigma=sigma, finalsize=2 * fftsize) sobel = (sobel - sobel.mean(axis=(1, 2), keepdims=True)).persist() corr = dask_cross_corr(sobel) W, M = calculate_halfmatrices(*max_and_argmax(corr), fftsize=fftsize) w_diag = np.atleast_2d(np.diag(W)) W_n = W / np.sqrt(w_diag.T * w_diag) nr = np.arange(data.shape[0]) coords, weightmatrix, DX, DY, row_mask = threshold_and_mask( min_norm, W, M, nr) dx, dy = calc_shift_vectors(DX, DY, weightmatrix) shifts = np.stack(interp_shifts(coords, [dx, dy], n=data.shape[0]), axis=1) neededMargins = np.ceil(shifts.max(axis=0)).astype(int) shifts = da.from_array(shifts, chunks=(dE, -1)) @da.as_gufunc(signature="(i,j),(2)->(i,j)", output_dtypes=data.dtype, vectorize=True) def shift_images(image, shifts): """Shift `image` by `shift` pixels.""" return ndi.shift(image, shift=shifts, order=1) padded = da.pad(data.rechunk({0: dE}), ((0, 0), (0, neededMargins[0]), (0, neededMargins[1])), mode='constant') corrected = shift_images(padded.rechunk({1: -1, 2: -1}), shifts) return corrected, shifts
def rolling_window(a, axis, window, center, fill_value): """Dask's equivalence to np.utils.rolling_window """ import dask.array as da if not hasattr(axis, "__len__"): axis = [axis] window = [window] center = [center] orig_shape = a.shape depth = {d: 0 for d in range(a.ndim)} offset = [0] * a.ndim drop_size = [0] * a.ndim pad_size = [0] * a.ndim for ax, win, cent in zip(axis, window, center): if ax < 0: ax = a.ndim + ax depth[ax] = int(win / 2) # For evenly sized window, we need to crop the first point of each block. offset[ax] = 1 if win % 2 == 0 else 0 if depth[ax] > min(a.chunks[ax]): raise ValueError( "For window size %d, every chunk should be larger than %d, " "but the smallest chunk size is %d. Rechunk your array\n" "with a larger chunk size or a chunk size that\n" "more evenly divides the shape of your array." % (win, depth[ax], min(a.chunks[ax]))) # Although da.overlap pads values to boundaries of the array, # the size of the generated array is smaller than what we want # if center == False. if cent: start = int(win / 2) # 10 -> 5, 9 -> 4 end = win - 1 - start else: start, end = win - 1, 0 pad_size[ax] = max(start, end) + offset[ax] - depth[ax] drop_size[ax] = 0 # pad_size becomes more than 0 when the overlapped array is smaller than # needed. In this case, we need to enlarge the original array by padding # before overlapping. if pad_size[ax] > 0: if pad_size[ax] < depth[ax]: # overlapping requires each chunk larger than depth. If pad_size is # smaller than the depth, we enlarge this and truncate it later. drop_size[ax] = depth[ax] - pad_size[ax] pad_size[ax] = depth[ax] # TODO maybe following two lines can be summarized. a = da.pad(a, [(p, 0) for p in pad_size], mode="constant", constant_values=fill_value) boundary = {d: fill_value for d in range(a.ndim)} # create overlap arrays ag = da.overlap.overlap(a, depth=depth, boundary=boundary) def func(x, window, axis): x = np.asarray(x) index = [slice(None)] * x.ndim for ax, win in zip(axis, window): x = nputils._rolling_window(x, win, ax) index[ax] = slice(offset[ax], None) return x[tuple(index)] chunks = list(a.chunks) + window new_axis = [a.ndim + i for i in range(len(axis))] out = ag.map_blocks(func, dtype=a.dtype, new_axis=new_axis, chunks=chunks, window=window, axis=axis) # crop boundary. index = [slice(None)] * a.ndim for ax in axis: index[ax] = slice(drop_size[ax], drop_size[ax] + orig_shape[ax]) return out[tuple(index)]
shifts # + #Step 9, the actual shifting of the original images #Inferring output dtype is not supported in dask yet, so we need original.dtype here. @da.as_gufunc(signature="(i,j),(2)->(i,j)", output_dtypes=original.dtype, vectorize=True) def shift_images(image, shift): """Shift `image` by `shift` pixels.""" return ndi.shift(image, shift=shift, order=1) padded = da.pad(original.rechunk({0:dE}), ((0, 0), (0, neededMargins[0]), (0, neededMargins[1]) ), mode='constant' ) corrected = shift_images(padded.rechunk({1:-1, 2:-1}), shifts) # - # Do an interactive viewer to inspect the results interactive(lambda n: plot_stack(corrected, n, grid=True), n=widgets.IntSlider(corrected.shape[0]//4,0,corrected.shape[0]-1,1, continuous_update=False) ) # ## Saving data # Save the resulting data in a new netCDF file xrcorrected = dataset.reindex({'x': np.arange(0, dataset.x[1]*corrected.shape[1], dataset.x[1]),
def distance_propagation(population, total_population, carrying_capacity, distance, csx, csy, **kwargs): """ 'distance propagation' Distance propagation is used to redistribute populations to distal locations based on density gradients. Portions of populations at each element (node, or grid cell) in the study area array (raster) are moved to a target element at a radius that is defined by the input distance (:math:`d`), as presented in the conceptual figure below. .. image:: images/distance_propagation_neighbourhood.png :align: center .. attention:: No dispersal will occur if the provided distance is less than the distance between elements (grid cells) in the model domain, as none will be included in the neighbourhood The density (:math:`\\rho`) of all distal elements (:math:`i`) is calculated as: .. math:: \\rho(i)=\\frac{pop_T(i)}{k_T(i)} where, :math:`pop_T` is the total population (of the entire species) at each element (:math:`i`); and\n :math:`k_T` is the total carrying capacity for the species The distal element with the minimum density is chosen as a candidate for population dispersal from the centroid element. If the density of distal elements is homogeneous, one element is picked at random. The density gradient :math:`\\Delta` is then calculated using the centroid element :math:`i_0` and the chosen distal element :math:`i_1`: .. math:: \\rho=\\frac{pop_T(i_0)/k_T(i_0)+pop_T(i_1)/k_T(i_1)}{2} .. math:: \\Delta(i)=\\frac{pop_T(i)}{k_T(i)}-\\rho If the centroid element is above the mean :math:`[\\Delta(i_0) > 0]`, and the distal element is below the mean :math:`[\\Delta(i_1) < 0]`, dispersal may take place. The total population dispersed is calculated by taking the minimum of the population constrained by the gradient: .. math:: dispersal=min\{|\\Delta(i_0)k_T(i_0)|, |\\Delta(i_1)k_T(i_1)|\} The population at the centroid element becomes: .. math:: pop_a(i_0)=pop_a(i_0)-dispersal where, :math:`pop_a` is the age (stage) group population, which is a sub-population of the total. The population at the distal element becomes (a net gain due to a negative gradient): .. math:: pop_a(i_1)=pop_a(i_1)-dispersal :param da.Array population: Sub-population to redistribute (subset of the ``total_population``) :param da.Array total_population: Total population :param da.Array carrying_capacity: Total Carrying Capacity (n) :param float distance: Maximum dispersal distance :param float csx: Cell size of the domain in the x-direction :param float csy: Cell size of the domain in the y-direction .. Attention:: Ensure the cell sizes are in the same units as the specified direction :return: Redistributed population """ # Check the inputs if any([ not isinstance(a, da.Array) for a in [population, total_population, carrying_capacity] ]): raise DispersalError('Inputs must be a dask arrays') if distance == 0: # Don't do anything return population chunks = tuple(c[0] if c else 0 for c in population.chunks)[:2] # Calculate the kernel indices and shape kernel = calculate_kernel(distance, csx, csy, True) if kernel is None: return population kernel, m, n = kernel m = int(m) n = int(n) # Dask does not like numpy types in depth a = da.pad(da.dstack([population, total_population, carrying_capacity]), ((m, m), (n, n), (0, 0)), 'constant', constant_values=0) # Perform the dispersal # args: population, total_population, carrying_capacity, kernel _m = -m if m == 0: _m = None _n = -n if n == 0: _n = None output = delayed(distance_propagation_task)(a, kernel, m, n)[m:_m, n:_n, 0] output = da.from_delayed(output, population.shape, np.float32) return output.rechunk(chunks)
def density_flux(population, total_population, carrying_capacity, distance, csx, csy, **kwargs): """ 'density-based dispersion' Dispersal is calculated using the following sequence of methods: Portions of populations at each element (node, or grid cell) in the study area array (raster) are moved to surrounding elements (a neighbourhood) within a radius that is defined by the input distance (:math:`d`), as presented in the conceptual figure below. .. image:: images/density_flux_neighbourhood.png :align: center .. attention:: No dispersal will occur if the provided distance is less than the distance between elements (grid cells) in the model domain, as none will be included in the neighbourhood The mean density (:math:`\\rho`) of all elements in the neighbourhood is calculated as: .. math:: \\rho=\\frac{\\sum_{i=1}^{n} \\frac{pop_T(i)}{k_T(i)}}{n} where, :math:`pop_T` is the total population (of the entire species) at each element (:math:`i`); and\n :math:`k_T` is the total carrying capacity for the species The density gradient at each element (:math:`\\Delta`) with respect to the mean is calculated as: .. math:: \\Delta(i)=\\frac{pop_T(i)}{k_T(i)}-\\rho If the centroid element is above the mean :math:`[\\Delta(i_0) > 0]`, it is able to release a portion of its population to elements in the neighbourhood. The eligible population to be received by surrounding elements is equal to the sum of populations at elements with negative density gradients, the :math:`candidates`: .. math:: candidates=\\sum_{i=1}^{n} \\Delta(i)[\\Delta(i) < 0]k_T(i) The minimum of either the population above the mean at the centroid element - :math:`source=\\Delta(i_0)*k_T(i_0)`, or the :math:`candidates` are used to determine the total population that is dispersed from the centroid element to the other elements in the neighbourhood: .. math:: dispersal=min\{source, candidates\} The population at the centroid element becomes: .. math:: pop_a(i_0)=pop_a(i_0)-\\frac{pop_a(i_0)}{pop_T(i_0)}dispersal where, :math:`pop_a` is the age (stage) group population, which is a sub-population of the total. The populations of the candidate elements in the neighbourhood become (a net gain due to negative gradients): .. math:: pop_a(i)=pop_a(i)-\\frac{\\Delta(i)[\\Delta(i) < 0]k_T(i)}{candidates}dispersal\\frac{pop_a(i)}{pop_T(i)} :param da.Array population: Sub-population to redistribute (subset of the ``total_population``) :param da.Array total_population: Total population :param da.Array carrying_capacity: Total Carrying Capacity (k) :param float distance: Maximum dispersal distance :param float csx: Cell size of the domain in the x-direction :param float csy: Cell size of the domain in the y-direction .. Attention:: Ensure the cell sizes are in the same units as the specified direction :Keyword Arguments: **mask** (*array*) -- A weighting mask that scales dispersal based on the normalized mask value (default: None) :return: Redistributed population """ if any([ not isinstance(a, da.Array) for a in [population, total_population, carrying_capacity] ]): raise DispersalError('Inputs must be a dask arrays') if distance == 0: # Don't do anything return population chunks = tuple(c[0] if c else 0 for c in population.chunks)[:2] mask = kwargs.get('mask', None) if mask is None: mask = da.ones(shape=population.shape, dtype='float32', chunks=chunks) # Normalize the mask mask_min = da.min(mask) _range = da.max(mask) - mask_min mask = da.where(_range > 0, (mask - mask_min) / _range, 1.) # Calculate the kernel indices and shape kernel = calculate_kernel(distance, csx, csy) if kernel is None: # Not enough distance to cover a grid cell return population kernel, m, n = kernel m = int(m) n = int(n) a = da.pad(da.dstack( [population, total_population, carrying_capacity, mask]), ((m, m), (n, n), (0, 0)), 'constant', constant_values=0) _m = -m if m == 0: _m = None _n = -n if n == 0: _n = None output = delayed(density_flux_task)(a, kernel, m, n)[m:_m, n:_n, 0] output = da.from_delayed(output, population.shape, np.float32) return output.rechunk(chunks)
def reorder_mpas_data(ds, var, client, comp, path_zarr): nCells = 41943042 perm_arr = np.fromfile( f'/glade/work/haiyingx/mpas_655362/mc2gv.dat.{nCells}', dtype='i4') print(perm_arr.shape) [future] = client.scatter([perm_arr], broadcast=True) arr_shape = ds[var].data.shape print(var, ds[var].dims, arr_shape) if len(ds[var].dims) == 3: var_arr = da.transpose(ds[var].data, (0, 2, 1)) else: var_arr = ds[var].data arr_size = var_arr.nbytes """Using Ellipsis ... here to deal with both 2D and 3D variables""" reindex_arr = da.map_blocks(lambda x, y: x[..., y], var_arr, perm_arr, dtype='f4') """Only pad the last dimension""" padded_tuple = ((0, 0), ) * (len(ds[var].dims) - 1) + ((0, 2046), ) padded_arr = da.pad(reindex_arr, padded_tuple, 'constant') print('var', var, padded_tuple) # arr = padded_arr.reshape(padded_arr.shape[0],padded_arr.shape[1],-1,2048) arr = padded_arr.reshape(padded_arr.shape[:-1] + (20481, 2048)) print(padded_arr.shape[:-1]) """Use persist() can save in the memory and speed up when call compute()""" pre_b = arr.mean().persist() print(arr.shape) encoding = {f'{var}': {'compressor': comp[var]}} ds = xr.DataArray(arr, name=f'{var}').to_dataset() filename = f'{path_zarr[:-4]}{var}.zarr' if exists(filename): shutil.rmtree(filename) ds.to_zarr(filename, encoding=encoding) """Read the compressed file to get mean(), and compare abs tol""" filesize = sum(p.stat().st_size for p in Path(filename).rglob('*')) decomp_f = xr.open_zarr(filename) decomp_arr = decomp_f[var].data print(comp[var]) if comp[var].codec_id == 'zfpy': tol = comp[var].tolerance else: tol = comp[var].level a = da.allclose(decomp_arr, arr, rtol=0.0, atol=tol).persist() b = decomp_f[var].mean().persist() """Save metric info to csv file""" results = [] res_dict = {} res_dict['var_name'] = var res_dict['orig_size'] = arr_size res_dict['recon_size'] = filesize res_dict['ratio'] = round(arr_size / filesize, 2) res_dict['abs_valid'] = a.compute() res_dict['orig_avg'] = f'{pre_b.compute():.7f}' res_dict['recon_avg'] = f'{b.compute().values:.7f}' results.append(res_dict) pd.DataFrame(results).to_csv( '/glade/scratch/haiyingx/Falko/hybrid/size.txt', index=False, sep=',', mode='a', header=False, )
data / rel_intens[:, np.newaxis, np.newaxis]) * weight_mask e_mask = da.from_array(np.pad(weight_mask, pad_width=((0, bsize - mask.shape[0]), (0, bsize - mask.shape[1]))), chunks=(-1, -1)) im_list = [] for i, x in enumerate(xedges): temp_im = [] for j, y in enumerate(yedges): mask_index = total_mask[i, j] if np.count_nonzero(mask_index) > 0: locdata = normalised_data[mask_index] locdata = da.pad(locdata, pad_width=((0, 0), (0, bsize - locdata.shape[1]), (0, bsize - locdata.shape[2])), mode='constant') locdata = locdata.rechunk({0: cs, 1: -1, 2: -1}) shifts = da.from_array([x, y] - pc.T[mask_index], chunks=(cs, -1)) image_sum = shift_images(locdata, shifts, 1).sum(axis=0) image_weight = shift_images( da.stack([e_mask] * mask_index.sum()).rechunk({0: cs}), shifts, 1).sum(axis=0) normed = (image_sum / image_weight) temp_im.append(normed) else: temp_im.append( da.full((bsize, bsize), fill_value=np.nan, chunks=(-1, -1))) im_list.append(temp_im) ims.append( da.block(im_list).to_zarr(os.path.join(folder, name, f'results.zarr'),
def h5ebsd2signaldict( scan_group: h5py.Group, manufacturer: str, version: str, lazy: bool = False, ) -> dict: """Return a dictionary with ``signal``, ``metadata`` and ``original_metadata`` from an h5ebsd scan. Parameters ---------- scan_group : h5py:Group HDF group of scan. manufacturer Manufacturer of file. Options are "kikuchipy"/"EDAX"/"Bruker Nano". version Version of manufacturer software. lazy Read dataset lazily. Returns ------- scan : dict Dictionary with patterns, ``metadata`` and ``original_metadata``. """ md, omd, scan_size = h5ebsdheader2dicts( scan_group, manufacturer, version, lazy ) md.set_item("Signal.signal_type", "EBSD") md.set_item("Signal.record_by", "image") scan = { "metadata": md.as_dictionary(), "original_metadata": omd.as_dictionary(), "attributes": {}, } # Get data dataset man_pats = manufacturer_pattern_names() data_dset = None for man, pats in man_pats.items(): if manufacturer.lower() == man.lower(): try: data_dset = scan_group["EBSD/Data/" + pats] except KeyError: raise KeyError( "Could not find patterns in the expected dataset " f"'EBSD/Data/{pats}'" ) break # Get data from group if lazy: if data_dset.chunks is None: chunks = "auto" else: chunks = data_dset.chunks data = da.from_array(data_dset, chunks=chunks) else: data = np.asanyarray(data_dset) sx, sy = scan_size.sx, scan_size.sy nx, ny = scan_size.nx, scan_size.ny try: data = data.reshape((ny, nx, sy, sx)).squeeze() except ValueError: warnings.warn( f"Pattern size ({sx} x {sy}) and scan size ({nx} x {ny}) larger " "than file size. Will attempt to load by zero padding incomplete " "frames." ) # Data is stored image by image pw = [(0, ny * nx * sy * sx - data.size)] if lazy: data = da.pad(data.flatten(), pw, mode="constant") else: data = np.pad(data.flatten(), pw, mode="constant") data = data.reshape((ny, nx, sy, sx)) scan["data"] = data units = ["um"] * 4 scales = np.ones(4) # Calibrate scan dimension and detector dimension scales[0] *= scan_size.step_y scales[1] *= scan_size.step_x scales[2] *= scan_size.delta scales[3] *= scan_size.delta # Set axes names names = ["y", "x", "dy", "dx"] if data.ndim == 3: if ny > nx: names.remove("x") scales = np.delete(scales, 1) else: names.remove("y") scales = np.delete(scales, 0) elif data.ndim == 2: names = names[2:] scales = scales[2:] # Create axis objects for each axis axes = [ { "size": data.shape[i], "index_in_array": i, "name": names[i], "scale": scales[i], "offset": 0.0, "units": units[i], } for i in range(data.ndim) ] scan["axes"] = axes return scan
def tiled_deformable_align( fixed, moving, fixed_spacing, moving_spacing, blocksize, transpose=[False] * 2, global_affine=None, local_affines=None, write_path=None, lazy=True, deform_kwargs={}, # cluster_kwargs={}, ): """ """ # get number of blocks required block_grid = np.ceil(np.array(fixed.shape) / blocksize) nblocks = np.prod(block_grid) # get true field shape original_shape = fixed.shape if transpose[0]: original_shape = original_shape[::-1] # get affine position field affine_pf = None if global_affine is not None or local_affines is not None: if local_affines is None: local_affines = np.empty( block_grid + (3, 4), dtype=np.float32, ) local_affines[..., :, :] = np.eye(4)[:3, :] affine_pf = transform.local_affines_to_position_field( original_shape, fixed_spacing, blocksize, local_affines, global_affine=global_affine, lazy=True, #cluster_kwargs=cluster_kwargs, ) # distributed computations done in cluster context #with ClusterWrap.cluster(**cluster_kwargs) as cluster: # if write_path is not None or not lazy: # cluster.scale_cluster(nblocks + WORKER_BUFFER) # wrap images as dask arrays fixed_da = da.from_array(fixed) moving_da = da.from_array(moving) # in case xyz convention is flipped for input file if transpose[0]: fixed_da = fixed_da.transpose(2, 1, 0) if transpose[1]: moving_da = moving_da.transpose(2, 1, 0) # pad the ends to fill in the last blocks pads = [] for x, y in zip(original_shape, blocksize): pads += [(0, y - x % y) if x % y > 0 else (0, 0)] fixed_da = da.pad(fixed_da, pads) moving_da = da.pad(moving_da, pads) # chunk to blocksize fixed_da = fixed_da.rechunk(tuple(blocksize)) moving_da = moving_da.rechunk(tuple(blocksize)) # wrap deformable function def wrapped_deformable_align(x, y): warp = deformable_align( x, y, fixed_spacing, moving_spacing, **deform_kwargs, ) return warp.reshape((1, 1, 1) + warp.shape) # deform all chunks overlaps = tuple([int(round(x / 8)) for x in blocksize]) out_blocks = [x + 2 * y for x, y in zip(blocksize, overlaps)] out_blocks = [1, 1, 1] + out_blocks + [ 3, ] warps = da.map_overlap( wrapped_deformable_align, fixed_da, moving_da, depth=overlaps, boundary=0, trim=False, align_arrays=False, dtype=np.float32, new_axis=[ 3, 4, 5, 6, ], chunks=out_blocks, ) # stitch neighboring displacement fields warps = stitch.stitch_fields(warps, blocksize) # crop any pads warps = warps[:original_shape[0], :original_shape[1], :original_shape[2]] # TODO refactor transform.compose_position_fields # replace this approximation # compose with affine position field if affine_pf is not None: final_field = affine_pf + warps else: final_field = warps + transform.position_grid_dask( original_shape, blocksize, ) # if user wants to write to disk if write_path is not None: compressor = Blosc(cname='zstd', clevel=9, shuffle=Blosc.BITSHUFFLE) final_field_disk = zarr.open( write_path, 'w', shape=final_field.shape, chunks=tuple(blocksize + [ 3, ]), dtype=final_field.dtype, compressor=compressor, ) da.to_zarr(final_field, final_field_disk) # if user wants to compute and return full field if not lazy: return final_field.compute() # if user wants to return compute graph w/o executing if lazy: return final_field
def prepare_piecewise_deformable_align( fix, mov, fix_spacing, mov_spacing, blocksize, transpose=[False] * 2, global_affine=None, local_affines=None, **kwargs, ): """ """ # get number of blocks required block_grid = np.ceil(np.array(fix.shape) / blocksize) # get true field shape original_shape = fix.shape if transpose[0]: original_shape = original_shape[::-1] # compose global/local affines total_affines = None if local_affines is not None and global_affine is not None: total_affines = transform.compose_affines( global_affine, local_affines, ) elif global_affine is not None: total_affines = np.empty(tuple(block_grid) + (4, 4)) total_affines[..., :, :] = global_affine elif local_affines is not None: total_affines = np.copy(local_affines) # get affine position field overlap = tuple([int(round(x / 8)) for x in blocksize]) affine_pf = None if total_affines is not None: affine_pf = ds.local_affine.local_affines_to_field( original_shape, fix_spacing, total_affines, blocksize, overlap, displacement=False, ) # wrap images as dask arrays fix_da = da.from_array(fix) mov_da = da.from_array(mov) # in case xyz convention is flipped for input file if transpose[0]: fix_da = fix_da.transpose(2, 1, 0) if transpose[1]: mov_da = mov_da.transpose(2, 1, 0) # pad the ends to fill in the last blocks pads = [] for x, y in zip(original_shape, blocksize): pads += [(0, y - x % y) if x % y > 0 else (0, 0)] fix_da = da.pad(fix_da, pads) mov_da = da.pad(mov_da, pads) # chunk to blocksize fix_da = fix_da.rechunk(tuple(blocksize)) mov_da = mov_da.rechunk(tuple(blocksize)) # wrap deformable function def wrapped_deformable_align(x, y): return deformable_align( x, y, fix_spacing, mov_spacing, **kwargs, ) # deform all chunks out_blocks = [x + 2 * y for x, y in zip(blocksize, overlap)] + [ 3, ] warps = da.map_overlap( wrapped_deformable_align, fix_da, mov_da, depth=overlap, boundary=0, trim=False, align_arrays=False, dtype=np.float32, new_axis=[ 3, ], chunks=out_blocks, ) # stitch neighboring displacement fields warps = ds.stitch.stitch_blocks(warps, blocksize, overlap) # crop any pads warps = warps[:original_shape[0], :original_shape[1], :original_shape[2]] # compose with affine position field # TODO refactor transform.compose_position_fields # replace this approximation if affine_pf is not None: final_field = affine_pf + warps else: final_field = warps + ds.local_affine.position_grid( original_shape, blocksize, ) return final_field
def pad_axis(array, dim, pad_width): padding = [(0, 0) if i != dim else (0, pad_width) for i in range(len(array.shape))] padded = da.pad(array, padding, "constant") return padded
def deformable_align_distributed( fixed, moving, fixed_vox, moving_vox, write_path, cc_radius, gradient_smoothing, field_smoothing, iterations, shrink_factors, smooth_sigmas, step, blocksize=[256,]*3, cluster_extra=["-P multifish"], transpose=False, ): """ """ # distributed computations done in cluster context with distributed.distributedState() as ds: # get number of blocks required block_grid = np.ceil(np.array(fixed.shape) / blocksize) nblocks = np.prod(block_grid) # set up the cluster ds.initializeLSFCluster( job_extra=cluster_extra, cores=4, memory="64GB", ncpus=4, threads_per_worker=8, mem=64000, ) ds.initializeClient() ds.scaleCluster(njobs=nblocks) # wrap images as dask arrays fixed_da = da.from_array(fixed) moving_da = da.from_array(moving) # in case xyz convention is flipped for input file if transpose: fixed_da = fixed_da.transpose(2,1,0) # pad the ends to fill in the last blocks orig_sh = fixed_da.shape pads = [(0, y - x % y) if x % y != 0 else (0, 0) for x, y in zip(orig_sh, blocksize)] fixed_da = da.pad(fixed_da, pads) moving_da = da.pad(moving_da, pads) fixed_da = fixed_da.rechunk(tuple(blocksize)) moving_da = moving_da.rechunk(tuple(blocksize)) # wrap deformable function to simplify passing parameters def my_deformable_align(x, y): return deformable_align( x, y, fixed_vox, moving_vox, cc_radius, gradient_smoothing, field_smoothing, iterations, shrink_factors, smooth_sigmas, step, ) # deform all chunks overlaps = tuple([int(round(x/8)) for x in blocksize]) out_blocks = [1,1,1] + [x + 2*y for x, y in zip(blocksize, overlaps)] + [3,] warps = da.map_overlap( my_deformable_align, fixed_da, moving_da, depth=overlaps, boundary='reflect', trim=False, align_arrays=False, dtype=np.float32, new_axis=[3,4,5,6,], chunks=out_blocks, ) # stitch neighboring displacement fields warps = stitch.stitch_fields(warps, blocksize) # crop any pads warps = warps[:orig_sh[0], :orig_sh[1], :orig_sh[2]] # convert to position field warps = warps + transform.position_grid_dask(orig_sh, blocksize) # write result to zarr file compressor = Blosc(cname='zstd', clevel=9, shuffle=Blosc.BITSHUFFLE) warps_disk = zarr.open(write_path, 'w', shape=warps.shape, chunks=tuple(blocksize + [3,]), dtype=warps.dtype, compressor=compressor, ) da.to_zarr(warps, warps_disk) # return reference to zarr data store return warps_disk
def h5ebsd2signaldict(scan_group, manufacturer, version, lazy=False): """Return a dictionary with signal, metadata and original metadata from an h5ebsd scan. Parameters ---------- scan_group : h5py.Group HDF group of scan. manufacturer : {'KikuchiPy', 'EDAX', 'Bruker Nano'} Manufacturer of file. version : str Version of manufacturer software. lazy : bool, optional Returns ------- scan : dictionary Dictionary with patterns, metadata and original metadata. """ md, omd, scan_size = h5ebsdheader2dicts(scan_group, manufacturer, version, lazy) md.set_item('Signal.signal_type', 'EBSD') md.set_item('Signal.record_by', 'image') scan = {'metadata': md.as_dictionary(), 'original_metadata': omd.as_dictionary(), 'attributes': {}} # Get data group man_pats = manufacturer_pattern_names() for man, pats in man_pats.items(): if manufacturer.lower() == man.lower(): data = scan_group['EBSD/Data/' + pats] # Get data from group if lazy: chunks = data.chunks if chunks is None: chunks = get_signal_chunks(data.shape, data.dtype, [1, 2]) data = da.from_array(data, chunks=chunks) scan['attributes']['_lazy'] = True else: data = np.asanyarray(data) sx, sy = scan_size.sx, scan_size.sy nx, ny = scan_size.nx, scan_size.ny try: data = data.reshape((ny, nx, sy, sx)).squeeze() except ValueError: warnings.warn("Pattern size ({} x {}) and scan size ({} x {}) larger " "than file size. Will attempt to load by zero padding " "incomplete frames.".format(sx, sy, nx, ny)) # Data is stored pattern by pattern pw = [(0, ny * nx * sy * sx - data.size)] if lazy: data = da.pad(data, pw, mode='constant') else: data = np.pad(data, pw, mode='constant') data = data.reshape((ny, nx, sy, sx)) scan['data'] = data units = np.repeat(u'\u03BC'+'m', 4) names = ['y', 'x', 'dy', 'dx'] scales = np.ones(4) # Calibrate scan dimension and detector dimension step_x, step_y = scan_size.step_x, scan_size.step_y scales[0] = scales[0] * step_x scales[1] = scales[1] * step_y detector_pixel_size = scan_size.delta scales[2] = scales[2] * detector_pixel_size scales[3] = scales[3] * detector_pixel_size # Create axis objects for each axis axes = [{'size': data.shape[i], 'index_in_array': i, 'name': names[i], 'scale': scales[i], 'offset': 0.0, 'units': units[i]} for i in range(data.ndim)] scan['axes'] = axes return scan