def test_dstack(): x = np.arange(5) y = np.ones(5) a = da.arange(5, chunks=2) b = da.ones(5, chunks=2) assert_eq(np.dstack((x[None, None, :], y[None, None, :])), da.dstack((a[None, None, :], b[None, None, :]))) assert_eq(np.dstack((x[None, :], y[None, :])), da.dstack((a[None, :], b[None, :]))) assert_eq(np.dstack((x, y)), da.dstack((a, b)))
def concat_bands(self): import dask.array as da self.logger.info('Concatenate RGB bands...') self._arrays['rgb'] = da.dstack((self._arrays['r'], self._arrays['g'], self._arrays['b'])) self._arrays['rgb'] = da.clip(self._arrays['rgb'], 0, 1) del self._arrays['r'], self._arrays['g'], self._arrays['b']
def _expand_tiepoint_array_5km(self, arr, lines, cols): if self.level == 2: # Repeat the last column to complete L2 data arr = da.dstack([arr, arr[:, :, -1]]) arr = da.repeat(arr, lines * 2, axis=1) if self.level == 1: arr = da.repeat(arr.reshape((-1, self.cscan_full_width - 1)), cols, axis=1) elif self.level == 2: arr = da.repeat(arr.reshape((-1, self.cscan_full_width)), cols, axis=1) return da.hstack((arr[:, :2], arr, arr[:, -2:]))
def test_get_sample_from_bil_info(self): """Test bilinear interpolation as a whole.""" import dask.array as da from xarray import DataArray from pyresample.bilinear.xarr import XArrayResamplerBilinear resampler = XArrayResamplerBilinear(self.source_def, self.target_def, self.radius) resampler.get_bil_info() # Sample from data1 res = resampler.get_sample_from_bil_info(self.data1) res = res.compute() # Check couple of values self.assertEqual(res.values[1, 1], 1.) self.assertTrue(np.isnan(res.values[0, 3])) # Check that the values haven't gone down or up a lot self.assertAlmostEqual(np.nanmin(res.values), 1.) self.assertAlmostEqual(np.nanmax(res.values), 1.) # Check that dimensions are the same self.assertEqual(res.dims, self.data1.dims) # Sample from data1, custom fill value res = resampler.get_sample_from_bil_info(self.data1, fill_value=-1.0) res = res.compute() self.assertEqual(np.nanmin(res.values), -1.) # Sample from integer data res = resampler.get_sample_from_bil_info(self.data1.astype(np.uint8), fill_value=None) res = res.compute() # Five values should be filled with zeros, which is the # default fill_value for integer data self.assertEqual(np.sum(res == 0), 6) # Output coordinates should have been set self.assertTrue(isinstance(resampler._out_coords, dict)) self.assertTrue( np.all(resampler._out_coords['x'] == resampler.out_coords_x)) self.assertTrue( np.all(resampler._out_coords['y'] == resampler.out_coords_y)) # 3D data data = da.moveaxis(da.dstack((self.data1, self.data1)), -1, 0) data = DataArray(data, dims=('bands', 'y', 'x')) res = resampler.get_sample_from_bil_info(data) assert res.shape == (2, ) + self.target_def.shape assert res.dims == data.dims
def _get_output_xy_dask(target_geo_def, proj): """Get x/y coordinates of the target grid.""" # Read output coordinates out_lons, out_lats = target_geo_def.get_lonlats_dask() # Mask invalid coordinates out_lons, out_lats = _mask_coordinates_dask(out_lons, out_lats) # Convert coordinates to output projection x/y space res = da.dstack(proj(out_lons.compute(), out_lats.compute())) # _run_proj(proj, out_lons, out_lats) #, # chunks=(out_lons.chunks[0], out_lons.chunks[1], 2), # new_axis=[2]) out_x = da.ravel(res[:, :, 0]) out_y = da.ravel(res[:, :, 1]) return out_x, out_y
def _stack_dims(data, cases, set_cases, exp): """Recursive function to stack multi-dimensional data """ from dask.array import stack as dstack from numpy import stack as nstack # print(set_cases) idx = len(set_cases) if idx >= len(cases): # print(" leaf") tup = exp.case_tuple(**set_cases) # print(tup) return data[tup].data else: new_set_cases = set_cases.copy() case = cases[idx] to_stack = [] for val in case.vals: new_set_cases[case.shortname] = val x = _stack_dims(data, cases, new_set_cases, exp) to_stack.append(x) return dstack(to_stack)
def _calc_unitcell_vectors(self): """The vectors that define the shape of the unit cell in each frame Returns ------- vectors : da.ndarray, shape(n_frames, 3, 3) Vectors defining the shape of the unit cell in each frame. The semantics of this array are that the shape of the unit cell in frame ``i`` are given by the three vectors, ``value[i, 0, :]``, ``value[i, 1, :]``, and ``value[i, 2, :]``. """ if self.unitcell_lengths is None or self.unitcell_angles is None: return None v1, v2, v3 = lengths_and_angles_to_box_vectors( self._unitcell_lengths[:, 0], # a self._unitcell_lengths[:, 1], # b self._unitcell_lengths[:, 2], # c self._unitcell_angles[:, 0], # alpha self._unitcell_angles[:, 1], # beta self._unitcell_angles[:, 2], # gamma ) return da.swapaxes(da.dstack((v1, v2, v3)), 1, 2)
def _run_proj(proj, lons, lats): return da.dstack(proj(lons, lats))
def dstack(dsts): """dask.array.dstack with one array is slow""" if len(dsts) > 1: return da.dstack(dsts) else: return da.atleast_3d(dsts[0])
def plsa_em_step_dask( block_rows_ndarray, block_cols_ndarray, block_vals_ndarray, p_w_given_z, p_z_given_d, block_row_size, block_col_size, e_step_thresh=1e-32, ): n_d_blocks = block_rows_ndarray.shape[0] n_w_blocks = block_rows_ndarray.shape[1] n = p_z_given_d.shape[0] m = p_w_given_z.shape[1] k = p_z_given_d.shape[1] result_p_w_given_z = [[] for i in range(n_w_blocks)] result_p_z_given_d = [[] for i in range(n_d_blocks)] result_norm_pwz = [] result_norm_pdz = [[] for i in range(n_d_blocks)] for i in range(n_d_blocks): row_start = block_row_size * i row_end = min(row_start + block_row_size, n) for j in range(n_w_blocks): col_start = block_col_size * j col_end = min(col_start + block_col_size, m) row_block = block_rows_ndarray[i, j] col_block = block_cols_ndarray[i, j] val_block = block_vals_ndarray[i, j] kernel_results = plsa_em_step_block_kernel( row_block, col_block, val_block, p_w_given_z[:, col_start:col_end], p_z_given_d[row_start:row_end, :], e_step_thresh=e_step_thresh, ) result_p_w_given_z[j].append( da.from_delayed(kernel_results[0], (k, block_col_size), dtype=np.float32)) result_p_z_given_d[i].append( da.from_delayed(kernel_results[1], (block_row_size, k), dtype=np.float32)) result_norm_pwz.append( da.from_delayed(kernel_results[2], (k, ), dtype=np.float32)) result_norm_pdz[i].append( da.from_delayed(kernel_results[3], (block_row_size, ), dtype=np.float32)) p_w_given_z_blocks = [ da.dstack(result_p_w_given_z[i]).sum(axis=-1) for i in range(n_w_blocks) ] p_z_given_d_blocks = [ da.dstack(result_p_z_given_d[i]).sum(axis=-1) for i in range(n_d_blocks) ] norm_pdz_blocks = [ da.dstack(result_norm_pdz[i]).sum(axis=-1) for i in range(n_d_blocks) ] p_w_given_z = (da.hstack(p_w_given_z_blocks) / da.dstack(result_norm_pwz).sum(axis=-1).T) p_z_given_d = da.vstack(p_z_given_d_blocks) / da.hstack(norm_pdz_blocks).T result = compute(p_w_given_z, p_z_given_d) return result
def density_flux(population, total_population, carrying_capacity, distance, csx, csy, **kwargs): """ 'density-based dispersion' Dispersal is calculated using the following sequence of methods: Portions of populations at each element (node, or grid cell) in the study area array (raster) are moved to surrounding elements (a neighbourhood) within a radius that is defined by the input distance (:math:`d`), as presented in the conceptual figure below. .. image:: images/density_flux_neighbourhood.png :align: center .. attention:: No dispersal will occur if the provided distance is less than the distance between elements (grid cells) in the model domain, as none will be included in the neighbourhood The mean density (:math:`\\rho`) of all elements in the neighbourhood is calculated as: .. math:: \\rho=\\frac{\\sum_{i=1}^{n} \\frac{pop_T(i)}{k_T(i)}}{n} where, :math:`pop_T` is the total population (of the entire species) at each element (:math:`i`); and\n :math:`k_T` is the total carrying capacity for the species The density gradient at each element (:math:`\\Delta`) with respect to the mean is calculated as: .. math:: \\Delta(i)=\\frac{pop_T(i)}{k_T(i)}-\\rho If the centroid element is above the mean :math:`[\\Delta(i_0) > 0]`, it is able to release a portion of its population to elements in the neighbourhood. The eligible population to be received by surrounding elements is equal to the sum of populations at elements with negative density gradients, the :math:`candidates`: .. math:: candidates=\\sum_{i=1}^{n} \\Delta(i)[\\Delta(i) < 0]k_T(i) The minimum of either the population above the mean at the centroid element - :math:`source=\\Delta(i_0)*k_T(i_0)`, or the :math:`candidates` are used to determine the total population that is dispersed from the centroid element to the other elements in the neighbourhood: .. math:: dispersal=min\{source, candidates\} The population at the centroid element becomes: .. math:: pop_a(i_0)=pop_a(i_0)-\\frac{pop_a(i_0)}{pop_T(i_0)}dispersal where, :math:`pop_a` is the age (stage) group population, which is a sub-population of the total. The populations of the candidate elements in the neighbourhood become (a net gain due to negative gradients): .. math:: pop_a(i)=pop_a(i)-\\frac{\\Delta(i)[\\Delta(i) < 0]k_T(i)}{candidates}dispersal\\frac{pop_a(i)}{pop_T(i)} :param da.Array population: Sub-population to redistribute (subset of the ``total_population``) :param da.Array total_population: Total population :param da.Array carrying_capacity: Total Carrying Capacity (k) :param float distance: Maximum dispersal distance :param float csx: Cell size of the domain in the x-direction :param float csy: Cell size of the domain in the y-direction .. Attention:: Ensure the cell sizes are in the same units as the specified direction :Keyword Arguments: **mask** (*array*) -- A weighting mask that scales dispersal based on the normalized mask value (default: None) :return: Redistributed population """ if any([ not isinstance(a, da.Array) for a in [population, total_population, carrying_capacity] ]): raise DispersalError('Inputs must be a dask arrays') if distance == 0: # Don't do anything return population chunks = tuple(c[0] if c else 0 for c in population.chunks)[:2] mask = kwargs.get('mask', None) if mask is None: mask = da.ones(shape=population.shape, dtype='float32', chunks=chunks) # Normalize the mask mask_min = da.min(mask) _range = da.max(mask) - mask_min mask = da.where(_range > 0, (mask - mask_min) / _range, 1.) # Calculate the kernel indices and shape kernel = calculate_kernel(distance, csx, csy) if kernel is None: # Not enough distance to cover a grid cell return population kernel, m, n = kernel m = int(m) n = int(n) a = da.pad(da.dstack( [population, total_population, carrying_capacity, mask]), ((m, m), (n, n), (0, 0)), 'constant', constant_values=0) _m = -m if m == 0: _m = None _n = -n if n == 0: _n = None output = delayed(density_flux_task)(a, kernel, m, n)[m:_m, n:_n, 0] output = da.from_delayed(output, population.shape, np.float32) return output.rechunk(chunks)
def distance_propagation(population, total_population, carrying_capacity, distance, csx, csy, **kwargs): """ 'distance propagation' Distance propagation is used to redistribute populations to distal locations based on density gradients. Portions of populations at each element (node, or grid cell) in the study area array (raster) are moved to a target element at a radius that is defined by the input distance (:math:`d`), as presented in the conceptual figure below. .. image:: images/distance_propagation_neighbourhood.png :align: center .. attention:: No dispersal will occur if the provided distance is less than the distance between elements (grid cells) in the model domain, as none will be included in the neighbourhood The density (:math:`\\rho`) of all distal elements (:math:`i`) is calculated as: .. math:: \\rho(i)=\\frac{pop_T(i)}{k_T(i)} where, :math:`pop_T` is the total population (of the entire species) at each element (:math:`i`); and\n :math:`k_T` is the total carrying capacity for the species The distal element with the minimum density is chosen as a candidate for population dispersal from the centroid element. If the density of distal elements is homogeneous, one element is picked at random. The density gradient :math:`\\Delta` is then calculated using the centroid element :math:`i_0` and the chosen distal element :math:`i_1`: .. math:: \\rho=\\frac{pop_T(i_0)/k_T(i_0)+pop_T(i_1)/k_T(i_1)}{2} .. math:: \\Delta(i)=\\frac{pop_T(i)}{k_T(i)}-\\rho If the centroid element is above the mean :math:`[\\Delta(i_0) > 0]`, and the distal element is below the mean :math:`[\\Delta(i_1) < 0]`, dispersal may take place. The total population dispersed is calculated by taking the minimum of the population constrained by the gradient: .. math:: dispersal=min\{|\\Delta(i_0)k_T(i_0)|, |\\Delta(i_1)k_T(i_1)|\} The population at the centroid element becomes: .. math:: pop_a(i_0)=pop_a(i_0)-dispersal where, :math:`pop_a` is the age (stage) group population, which is a sub-population of the total. The population at the distal element becomes (a net gain due to a negative gradient): .. math:: pop_a(i_1)=pop_a(i_1)-dispersal :param da.Array population: Sub-population to redistribute (subset of the ``total_population``) :param da.Array total_population: Total population :param da.Array carrying_capacity: Total Carrying Capacity (n) :param float distance: Maximum dispersal distance :param float csx: Cell size of the domain in the x-direction :param float csy: Cell size of the domain in the y-direction .. Attention:: Ensure the cell sizes are in the same units as the specified direction :return: Redistributed population """ # Check the inputs if any([ not isinstance(a, da.Array) for a in [population, total_population, carrying_capacity] ]): raise DispersalError('Inputs must be a dask arrays') if distance == 0: # Don't do anything return population chunks = tuple(c[0] if c else 0 for c in population.chunks)[:2] # Calculate the kernel indices and shape kernel = calculate_kernel(distance, csx, csy, True) if kernel is None: return population kernel, m, n = kernel m = int(m) n = int(n) # Dask does not like numpy types in depth a = da.pad(da.dstack([population, total_population, carrying_capacity]), ((m, m), (n, n), (0, 0)), 'constant', constant_values=0) # Perform the dispersal # args: population, total_population, carrying_capacity, kernel _m = -m if m == 0: _m = None _n = -n if n == 0: _n = None output = delayed(distance_propagation_task)(a, kernel, m, n)[m:_m, n:_n, 0] output = da.from_delayed(output, population.shape, np.float32) return output.rechunk(chunks)
def blob_doh(image, min_sigma=1, max_sigma=30, num_sigma=10, threshold=0.01, overlap=.5, log_scale=False): """Finds blobs in the given grayscale image. Adapted for dask from scikit-image.feature.blob_doh Blobs are found using the Determinant of Hessian method [1]_. For each blob found, the method returns its coordinates and the standard deviation of the Gaussian Kernel used for the Hessian matrix whose determinant detected the blob. Determinant of Hessians is approximated using [2]_. Parameters ---------- image : 2D dask array Input grayscale image. Blobs can either be light on dark or vice versa. min_sigma : float, optional The minimum standard deviation for Gaussian Kernel used to compute Hessian matrix. Keep this low to detect smaller blobs. max_sigma : float, optional The maximum standard deviation for Gaussian Kernel used to compute Hessian matrix. Keep this high to detect larger blobs. num_sigma : int, optional The number of intermediate values of standard deviations to consider between `min_sigma` and `max_sigma`. threshold : float, optional. The absolute lower bound for scale space maxima. Local maxima smaller than thresh are ignored. Reduce this to detect less prominent blobs. overlap : float, optional A value between 0 and 1. If the area of two blobs overlaps by a fraction greater than `threshold`, the smaller blob is eliminated. log_scale : bool, optional If set intermediate values of standard deviations are interpolated using a logarithmic scale to the base `10`. If not, linear interpolation is used. Returns ------- A : (n, 3) ndarray A 2d array with each row representing 3 values, ``(y,x,sigma)`` where ``(y,x)`` are coordinates of the blob and ``sigma`` is the standard deviation of the Gaussian kernel of the Hessian Matrix whose determinant detected the blob. References ---------- .. [1] https://en.wikipedia.org/wiki/Blob_detection#The_ determinant_of_the_Hessian .. [2] Herbert Bay, Andreas Ess, Tinne Tuytelaars, Luc Van Gool, "SURF: Speeded Up Robust Features" ftp://ftp.vision.ee.ethz.ch/publications/articles/eth_biwi_00517.pdf Examples -------- >>> from skimage import data >>> import dask.array as da >>> from dask_image.ndfeature import blob_doh >>> blob_dog(da.from_array(data.coins())) array([[270. , 363. , 30. ], [265. , 113. , 23.55555556], [262. , 243. , 23.55555556], [260. , 173. , 30. ], [197. , 153. , 20.33333333], [197. , 44. , 20.33333333], [195. , 100. , 23.55555556], [193. , 275. , 23.55555556], [192. , 212. , 23.55555556], [185. , 348. , 30. ], [156. , 302. , 30. ], [126. , 153. , 20.33333333], [126. , 101. , 20.33333333], [124. , 336. , 20.33333333], [123. , 205. , 20.33333333], [123. , 44. , 23.55555556], [121. , 271. , 30. ]]) Notes ----- The radius of each blob is approximately `sigma`. Computation of Determinant of Hessians is independent of the standard deviation. Therefore detecting larger blobs won't take more time. In methods line :py:meth:`blob_dog` and :py:meth:`blob_log` the computation of Gaussians for larger `sigma` takes more time. The downside is that this method can't be used for detecting blobs of radius less than `3px` due to the box filters used in the approximation of Hessian Determinant and that the algorithm is currently limited to 2 dimensions. """ # check that 2D limitation is met if image.ndim != 2: raise ValueError('Blob detection with determinant of hessian requires\ 2D array') # get float integral image to compute determinant of hessian image = _daskarray_to_float(image) image = integral_image(image) # get sequence of sigmas if log_scale is True: start, stop = math.log(min_sigma, 10), math.log(max_sigma, 10) sigma_list = np.logspace(start, stop, num_sigma) else: sigma_list = np.linspace(min_sigma, max_sigma, num_sigma) # map hessian determinant cython function to array chunks depth = int(np.ceil(max_sigma * math.sqrt(image.ndim))) hessian_images = [ image.map_overlap(_hessian_matrix_det, depth=depth, sigma=s, dtype=image.dtype) for s in sigma_list ] # stack transformed images image_stack = da.dstack(hessian_images) # rechunk along sigma axis chunk_shape = image_stack.chunks new_shape = chunk_shape[:-1] + ((sum(chunk_shape[-1]), ), ) image_stack = image_stack.rechunk(chunks=new_shape) # get coordinates of local maxima in transformed stack local_maxima = peak_local_max(image_stack, threshold=threshold, footprint=np.ones((3, ) * image_stack.ndim), exclude_border=False) # Catch no peaks if local_maxima.size == 0: return np.empty((0, 3)) # Convert local_maxima to float64 lm = local_maxima.astype(np.float64) # Convert the last index to its corresponding scale value lm[:, -1] = sigma_list[local_maxima[:, -1]] # prune blobs that are too close together return _prune_blobs(lm, overlap)