def test_dstack():
    x = np.arange(5)
    y = np.ones(5)
    a = da.arange(5, chunks=2)
    b = da.ones(5, chunks=2)

    assert_eq(np.dstack((x[None, None, :], y[None, None, :])),
              da.dstack((a[None, None, :], b[None, None, :])))
    assert_eq(np.dstack((x[None, :], y[None, :])),
              da.dstack((a[None, :], b[None, :])))
    assert_eq(np.dstack((x, y)), da.dstack((a, b)))
Esempio n. 2
0
def test_dstack():
    x = np.arange(5)
    y = np.ones(5)
    a = da.arange(5, chunks=2)
    b = da.ones(5, chunks=2)

    assert_eq(np.dstack((x[None, None, :], y[None, None, :])),
              da.dstack((a[None, None, :], b[None, None, :])))
    assert_eq(np.dstack((x[None, :], y[None, :])),
              da.dstack((a[None, :], b[None, :])))
    assert_eq(np.dstack((x, y)), da.dstack((a, b)))
Esempio n. 3
0
 def concat_bands(self):
     import dask.array as da
     self.logger.info('Concatenate RGB bands...')
     self._arrays['rgb'] = da.dstack((self._arrays['r'], self._arrays['g'],
         self._arrays['b']))
     self._arrays['rgb'] = da.clip(self._arrays['rgb'], 0, 1)
     del self._arrays['r'], self._arrays['g'], self._arrays['b']
 def _expand_tiepoint_array_5km(self, arr, lines, cols):
     if self.level == 2:  # Repeat the last column to complete L2 data
         arr = da.dstack([arr, arr[:, :, -1]])
     arr = da.repeat(arr, lines * 2, axis=1)
     if self.level == 1:
         arr = da.repeat(arr.reshape((-1, self.cscan_full_width - 1)),
                         cols,
                         axis=1)
     elif self.level == 2:
         arr = da.repeat(arr.reshape((-1, self.cscan_full_width)),
                         cols,
                         axis=1)
     return da.hstack((arr[:, :2], arr, arr[:, -2:]))
Esempio n. 5
0
    def test_get_sample_from_bil_info(self):
        """Test bilinear interpolation as a whole."""
        import dask.array as da
        from xarray import DataArray
        from pyresample.bilinear.xarr import XArrayResamplerBilinear

        resampler = XArrayResamplerBilinear(self.source_def, self.target_def,
                                            self.radius)
        resampler.get_bil_info()

        # Sample from data1
        res = resampler.get_sample_from_bil_info(self.data1)
        res = res.compute()
        # Check couple of values
        self.assertEqual(res.values[1, 1], 1.)
        self.assertTrue(np.isnan(res.values[0, 3]))
        # Check that the values haven't gone down or up a lot
        self.assertAlmostEqual(np.nanmin(res.values), 1.)
        self.assertAlmostEqual(np.nanmax(res.values), 1.)
        # Check that dimensions are the same
        self.assertEqual(res.dims, self.data1.dims)

        # Sample from data1, custom fill value
        res = resampler.get_sample_from_bil_info(self.data1, fill_value=-1.0)
        res = res.compute()
        self.assertEqual(np.nanmin(res.values), -1.)

        # Sample from integer data
        res = resampler.get_sample_from_bil_info(self.data1.astype(np.uint8),
                                                 fill_value=None)
        res = res.compute()
        # Five values should be filled with zeros, which is the
        # default fill_value for integer data
        self.assertEqual(np.sum(res == 0), 6)

        # Output coordinates should have been set
        self.assertTrue(isinstance(resampler._out_coords, dict))
        self.assertTrue(
            np.all(resampler._out_coords['x'] == resampler.out_coords_x))
        self.assertTrue(
            np.all(resampler._out_coords['y'] == resampler.out_coords_y))

        # 3D data
        data = da.moveaxis(da.dstack((self.data1, self.data1)), -1, 0)
        data = DataArray(data, dims=('bands', 'y', 'x'))
        res = resampler.get_sample_from_bil_info(data)
        assert res.shape == (2, ) + self.target_def.shape
        assert res.dims == data.dims
Esempio n. 6
0
def _get_output_xy_dask(target_geo_def, proj):
    """Get x/y coordinates of the target grid."""
    # Read output coordinates
    out_lons, out_lats = target_geo_def.get_lonlats_dask()

    # Mask invalid coordinates
    out_lons, out_lats = _mask_coordinates_dask(out_lons, out_lats)

    # Convert coordinates to output projection x/y space
    res = da.dstack(proj(out_lons.compute(), out_lats.compute()))
    # _run_proj(proj, out_lons, out_lats)
    #,
    #                    chunks=(out_lons.chunks[0], out_lons.chunks[1], 2),
    #                    new_axis=[2])
    out_x = da.ravel(res[:, :, 0])
    out_y = da.ravel(res[:, :, 1])

    return out_x, out_y
Esempio n. 7
0
def _get_output_xy_dask(target_geo_def, proj):
    """Get x/y coordinates of the target grid."""
    # Read output coordinates
    out_lons, out_lats = target_geo_def.get_lonlats_dask()

    # Mask invalid coordinates
    out_lons, out_lats = _mask_coordinates_dask(out_lons, out_lats)

    # Convert coordinates to output projection x/y space
    res = da.dstack(proj(out_lons.compute(), out_lats.compute()))
    # _run_proj(proj, out_lons, out_lats)
    #,
    #                    chunks=(out_lons.chunks[0], out_lons.chunks[1], 2),
    #                    new_axis=[2])
    out_x = da.ravel(res[:, :, 0])
    out_y = da.ravel(res[:, :, 1])

    return out_x, out_y
Esempio n. 8
0
def _stack_dims(data, cases, set_cases, exp):
    """Recursive function to stack multi-dimensional data
    """
    from dask.array import stack as dstack
    from numpy import stack as nstack
    # print(set_cases)
    idx = len(set_cases)
    if idx >= len(cases):
        # print("   leaf")
        tup = exp.case_tuple(**set_cases)
        # print(tup)
        return data[tup].data
    else:
        new_set_cases = set_cases.copy()
        case = cases[idx]

        to_stack = []
        for val in case.vals:
            new_set_cases[case.shortname] = val
            x = _stack_dims(data, cases, new_set_cases, exp)
            to_stack.append(x)

        return dstack(to_stack)
Esempio n. 9
0
    def _calc_unitcell_vectors(self):
        """The vectors that define the shape of the unit cell in each frame

        Returns
        -------
        vectors : da.ndarray, shape(n_frames, 3, 3)
            Vectors defining the shape of the unit cell in each frame.
            The semantics of this array are that the shape of the unit cell
            in frame ``i`` are given by the three vectors, ``value[i, 0, :]``,
            ``value[i, 1, :]``, and ``value[i, 2, :]``.
        """
        if self.unitcell_lengths is None or self.unitcell_angles is None:
            return None

        v1, v2, v3 = lengths_and_angles_to_box_vectors(
            self._unitcell_lengths[:, 0],  # a
            self._unitcell_lengths[:, 1],  # b
            self._unitcell_lengths[:, 2],  # c
            self._unitcell_angles[:, 0],  # alpha
            self._unitcell_angles[:, 1],  # beta
            self._unitcell_angles[:, 2],  # gamma
        )
        return da.swapaxes(da.dstack((v1, v2, v3)), 1, 2)
Esempio n. 10
0
def _run_proj(proj, lons, lats):
    return da.dstack(proj(lons, lats))
Esempio n. 11
0
def dstack(dsts):
    """dask.array.dstack with one array is slow"""
    if len(dsts) > 1:
        return da.dstack(dsts)
    else:
        return da.atleast_3d(dsts[0])
Esempio n. 12
0
def _run_proj(proj, lons, lats):
    return da.dstack(proj(lons, lats))
Esempio n. 13
0
def plsa_em_step_dask(
    block_rows_ndarray,
    block_cols_ndarray,
    block_vals_ndarray,
    p_w_given_z,
    p_z_given_d,
    block_row_size,
    block_col_size,
    e_step_thresh=1e-32,
):
    n_d_blocks = block_rows_ndarray.shape[0]
    n_w_blocks = block_rows_ndarray.shape[1]

    n = p_z_given_d.shape[0]
    m = p_w_given_z.shape[1]
    k = p_z_given_d.shape[1]

    result_p_w_given_z = [[] for i in range(n_w_blocks)]
    result_p_z_given_d = [[] for i in range(n_d_blocks)]
    result_norm_pwz = []
    result_norm_pdz = [[] for i in range(n_d_blocks)]

    for i in range(n_d_blocks):

        row_start = block_row_size * i
        row_end = min(row_start + block_row_size, n)

        for j in range(n_w_blocks):
            col_start = block_col_size * j
            col_end = min(col_start + block_col_size, m)

            row_block = block_rows_ndarray[i, j]
            col_block = block_cols_ndarray[i, j]
            val_block = block_vals_ndarray[i, j]

            kernel_results = plsa_em_step_block_kernel(
                row_block,
                col_block,
                val_block,
                p_w_given_z[:, col_start:col_end],
                p_z_given_d[row_start:row_end, :],
                e_step_thresh=e_step_thresh,
            )

            result_p_w_given_z[j].append(
                da.from_delayed(kernel_results[0], (k, block_col_size),
                                dtype=np.float32))
            result_p_z_given_d[i].append(
                da.from_delayed(kernel_results[1], (block_row_size, k),
                                dtype=np.float32))
            result_norm_pwz.append(
                da.from_delayed(kernel_results[2], (k, ), dtype=np.float32))

            result_norm_pdz[i].append(
                da.from_delayed(kernel_results[3], (block_row_size, ),
                                dtype=np.float32))

    p_w_given_z_blocks = [
        da.dstack(result_p_w_given_z[i]).sum(axis=-1)
        for i in range(n_w_blocks)
    ]
    p_z_given_d_blocks = [
        da.dstack(result_p_z_given_d[i]).sum(axis=-1)
        for i in range(n_d_blocks)
    ]
    norm_pdz_blocks = [
        da.dstack(result_norm_pdz[i]).sum(axis=-1) for i in range(n_d_blocks)
    ]

    p_w_given_z = (da.hstack(p_w_given_z_blocks) /
                   da.dstack(result_norm_pwz).sum(axis=-1).T)
    p_z_given_d = da.vstack(p_z_given_d_blocks) / da.hstack(norm_pdz_blocks).T

    result = compute(p_w_given_z, p_z_given_d)

    return result
Esempio n. 14
0
def density_flux(population, total_population, carrying_capacity, distance,
                 csx, csy, **kwargs):
    """
    'density-based dispersion'

    Dispersal is calculated using the following sequence of methods:

    Portions of populations at each element (node, or grid cell) in the study area array (raster) are moved to
    surrounding elements (a neighbourhood) within a radius that is defined by the input distance (:math:`d`), as
    presented in the conceptual figure below.

        .. image:: images/density_flux_neighbourhood.png
            :align: center

    .. attention:: No dispersal will occur if the provided distance is less than the distance between elements (grid cells) in the model domain, as none will be included in the neighbourhood

    The mean density (:math:`\\rho`) of all elements in the neighbourhood is calculated as:

    .. math::
       \\rho=\\frac{\\sum_{i=1}^{n} \\frac{pop_T(i)}{k_T(i)}}{n}

    where,

    :math:`pop_T` is the total population (of the entire species) at each element (:math:`i`); and\n
    :math:`k_T` is the total carrying capacity for the species

    The density gradient at each element (:math:`\\Delta`) with respect to the mean is calculated as:

    .. math::
        \\Delta(i)=\\frac{pop_T(i)}{k_T(i)}-\\rho

    If the centroid element is above the mean :math:`[\\Delta(i_0) > 0]`, it is able to release a portion of its
    population to elements in the neighbourhood. The eligible population to be received by surrounding elements is equal
    to the sum of populations at elements with negative density gradients, the :math:`candidates`:

    .. math::
        candidates=\\sum_{i=1}^{n} \\Delta(i)[\\Delta(i) < 0]k_T(i)

    The minimum of either the population above the mean at the centroid element - :math:`source=\\Delta(i_0)*k_T(i_0)`,
    or the :math:`candidates` are used to determine the total population that is dispersed from the centroid element to
    the other elements in the neighbourhood:

    .. math::
        dispersal=min\{source, candidates\}

    The population at the centroid element becomes:

    .. math::
        pop_a(i_0)=pop_a(i_0)-\\frac{pop_a(i_0)}{pop_T(i_0)}dispersal

    where,

    :math:`pop_a` is the age (stage) group population, which is a sub-population of the total.

    The populations of the candidate elements in the neighbourhood become (a net gain due to negative gradients):

    .. math::
        pop_a(i)=pop_a(i)-\\frac{\\Delta(i)[\\Delta(i) < 0]k_T(i)}{candidates}dispersal\\frac{pop_a(i)}{pop_T(i)}

    :param da.Array population: Sub-population to redistribute (subset of the ``total_population``)
    :param da.Array total_population: Total population
    :param da.Array carrying_capacity: Total Carrying Capacity (k)
    :param float distance: Maximum dispersal distance
    :param float csx: Cell size of the domain in the x-direction
    :param float csy: Cell size of the domain in the y-direction

    .. Attention:: Ensure the cell sizes are in the same units as the specified direction

    :Keyword Arguments:
        **mask** (*array*) --
            A weighting mask that scales dispersal based on the normalized mask value (default: None)
    :return: Redistributed population
    """
    if any([
            not isinstance(a, da.Array)
            for a in [population, total_population, carrying_capacity]
    ]):
        raise DispersalError('Inputs must be a dask arrays')

    if distance == 0:
        # Don't do anything
        return population

    chunks = tuple(c[0] if c else 0 for c in population.chunks)[:2]

    mask = kwargs.get('mask', None)
    if mask is None:
        mask = da.ones(shape=population.shape, dtype='float32', chunks=chunks)

    # Normalize the mask
    mask_min = da.min(mask)
    _range = da.max(mask) - mask_min
    mask = da.where(_range > 0, (mask - mask_min) / _range, 1.)

    # Calculate the kernel indices and shape
    kernel = calculate_kernel(distance, csx, csy)
    if kernel is None:
        # Not enough distance to cover a grid cell
        return population
    kernel, m, n = kernel
    m = int(m)
    n = int(n)

    a = da.pad(da.dstack(
        [population, total_population, carrying_capacity, mask]),
               ((m, m), (n, n), (0, 0)),
               'constant',
               constant_values=0)
    _m = -m
    if m == 0:
        _m = None
    _n = -n
    if n == 0:
        _n = None
    output = delayed(density_flux_task)(a, kernel, m, n)[m:_m, n:_n, 0]
    output = da.from_delayed(output, population.shape, np.float32)

    return output.rechunk(chunks)
Esempio n. 15
0
def distance_propagation(population, total_population, carrying_capacity,
                         distance, csx, csy, **kwargs):
    """
    'distance propagation'

    Distance propagation is used to redistribute populations to distal locations based on density gradients. Portions of
    populations at each element (node, or grid cell) in the study area array (raster) are moved to a target element
    at a radius that is defined by the input distance (:math:`d`), as presented in the conceptual
    figure below.

    .. image:: images/distance_propagation_neighbourhood.png
        :align: center

    .. attention:: No dispersal will occur if the provided distance is less than the distance between elements (grid cells) in the model domain, as none will be included in the neighbourhood

    The density (:math:`\\rho`) of all distal elements (:math:`i`) is calculated as:

    .. math::
       \\rho(i)=\\frac{pop_T(i)}{k_T(i)}

    where,

    :math:`pop_T` is the total population (of the entire species) at each element (:math:`i`); and\n
    :math:`k_T` is the total carrying capacity for the species

    The distal element with the minimum density is chosen as a candidate for population dispersal from the centroid
    element. If the density of distal elements is homogeneous, one element is picked at random. The density gradient
    :math:`\\Delta` is then calculated using the centroid element :math:`i_0` and the chosen distal element :math:`i_1`:

    .. math::
        \\rho=\\frac{pop_T(i_0)/k_T(i_0)+pop_T(i_1)/k_T(i_1)}{2}

    .. math::
        \\Delta(i)=\\frac{pop_T(i)}{k_T(i)}-\\rho

    If the centroid element is above the mean :math:`[\\Delta(i_0) > 0]`, and the distal element is below the mean
    :math:`[\\Delta(i_1) < 0]`, dispersal may take place. The total population dispersed is calculated by taking the
    minimum of the population constrained by the gradient:

    .. math::
        dispersal=min\{|\\Delta(i_0)k_T(i_0)|, |\\Delta(i_1)k_T(i_1)|\}

    The population at the centroid element becomes:

    .. math::
        pop_a(i_0)=pop_a(i_0)-dispersal

    where,

    :math:`pop_a` is the age (stage) group population, which is a sub-population of the total.

    The population at the distal element becomes (a net gain due to a negative gradient):

    .. math::
        pop_a(i_1)=pop_a(i_1)-dispersal

    :param da.Array population: Sub-population to redistribute (subset of the ``total_population``)
    :param da.Array total_population: Total population
    :param da.Array carrying_capacity: Total Carrying Capacity (n)
    :param float distance: Maximum dispersal distance
    :param float csx: Cell size of the domain in the x-direction
    :param float csy: Cell size of the domain in the y-direction

    .. Attention:: Ensure the cell sizes are in the same units as the specified direction

    :return: Redistributed population
    """
    # Check the inputs
    if any([
            not isinstance(a, da.Array)
            for a in [population, total_population, carrying_capacity]
    ]):
        raise DispersalError('Inputs must be a dask arrays')

    if distance == 0:
        # Don't do anything
        return population

    chunks = tuple(c[0] if c else 0 for c in population.chunks)[:2]

    # Calculate the kernel indices and shape
    kernel = calculate_kernel(distance, csx, csy, True)
    if kernel is None:
        return population
    kernel, m, n = kernel
    m = int(m)
    n = int(n)

    # Dask does not like numpy types in depth
    a = da.pad(da.dstack([population, total_population, carrying_capacity]),
               ((m, m), (n, n), (0, 0)),
               'constant',
               constant_values=0)

    # Perform the dispersal
    # args: population, total_population, carrying_capacity, kernel
    _m = -m
    if m == 0:
        _m = None
    _n = -n
    if n == 0:
        _n = None
    output = delayed(distance_propagation_task)(a, kernel, m, n)[m:_m, n:_n, 0]
    output = da.from_delayed(output, population.shape, np.float32)

    return output.rechunk(chunks)
Esempio n. 16
0
def blob_doh(image,
             min_sigma=1,
             max_sigma=30,
             num_sigma=10,
             threshold=0.01,
             overlap=.5,
             log_scale=False):
    """Finds blobs in the given grayscale image.

    Adapted for dask from scikit-image.feature.blob_doh
    Blobs are found using the Determinant of Hessian method [1]_. For each blob
    found, the method returns its coordinates and the standard deviation
    of the Gaussian Kernel used for the Hessian matrix whose determinant
    detected the blob. Determinant of Hessians is approximated using [2]_.
    Parameters
    ----------
    image : 2D dask array
        Input grayscale image. Blobs can either be light on dark or vice versa.
    min_sigma : float, optional
        The minimum standard deviation for Gaussian Kernel used to compute
        Hessian matrix. Keep this low to detect smaller blobs.
    max_sigma : float, optional
        The maximum standard deviation for Gaussian Kernel used to compute
        Hessian matrix. Keep this high to detect larger blobs.
    num_sigma : int, optional
        The number of intermediate values of standard deviations to consider
        between `min_sigma` and `max_sigma`.
    threshold : float, optional.
        The absolute lower bound for scale space maxima. Local maxima smaller
        than thresh are ignored. Reduce this to detect less prominent blobs.
    overlap : float, optional
        A value between 0 and 1. If the area of two blobs overlaps by a
        fraction greater than `threshold`, the smaller blob is eliminated.
    log_scale : bool, optional
        If set intermediate values of standard deviations are interpolated
        using a logarithmic scale to the base `10`. If not, linear
        interpolation is used.
    Returns
    -------
    A : (n, 3) ndarray
        A 2d array with each row representing 3 values, ``(y,x,sigma)``
        where ``(y,x)`` are coordinates of the blob and ``sigma`` is the
        standard deviation of the Gaussian kernel of the Hessian Matrix whose
        determinant detected the blob.
    References
    ----------
    .. [1] https://en.wikipedia.org/wiki/Blob_detection#The_
    determinant_of_the_Hessian
    .. [2] Herbert Bay, Andreas Ess, Tinne Tuytelaars, Luc Van Gool,
           "SURF: Speeded Up Robust Features"
           ftp://ftp.vision.ee.ethz.ch/publications/articles/eth_biwi_00517.pdf
    Examples
    --------
    >>> from skimage import data
    >>> import dask.array as da
    >>> from dask_image.ndfeature import blob_doh
    >>> blob_dog(da.from_array(data.coins()))
    array([[270.        , 363.        ,  30.        ],
           [265.        , 113.        ,  23.55555556],
           [262.        , 243.        ,  23.55555556],
           [260.        , 173.        ,  30.        ],
           [197.        , 153.        ,  20.33333333],
           [197.        ,  44.        ,  20.33333333],
           [195.        , 100.        ,  23.55555556],
           [193.        , 275.        ,  23.55555556],
           [192.        , 212.        ,  23.55555556],
           [185.        , 348.        ,  30.        ],
           [156.        , 302.        ,  30.        ],
           [126.        , 153.        ,  20.33333333],
           [126.        , 101.        ,  20.33333333],
           [124.        , 336.        ,  20.33333333],
           [123.        , 205.        ,  20.33333333],
           [123.        ,  44.        ,  23.55555556],
           [121.        , 271.        ,  30.        ]])
    Notes
    -----
    The radius of each blob is approximately `sigma`.
    Computation of Determinant of Hessians is independent of the standard
    deviation. Therefore detecting larger blobs won't take more time. In
    methods line :py:meth:`blob_dog` and :py:meth:`blob_log` the computation
    of Gaussians for larger `sigma` takes more time. The downside is that
    this method can't be used for detecting blobs of radius less than `3px`
    due to the box filters used in the approximation of Hessian Determinant
    and that the algorithm is currently limited to 2 dimensions.
    """

    # check that 2D limitation is met
    if image.ndim != 2:
        raise ValueError('Blob detection with determinant of hessian requires\
         2D array')

    # get float integral image to compute determinant of hessian
    image = _daskarray_to_float(image)
    image = integral_image(image)

    # get sequence of sigmas
    if log_scale is True:
        start, stop = math.log(min_sigma, 10), math.log(max_sigma, 10)
        sigma_list = np.logspace(start, stop, num_sigma)
    else:
        sigma_list = np.linspace(min_sigma, max_sigma, num_sigma)

    # map hessian determinant cython function to array chunks
    depth = int(np.ceil(max_sigma * math.sqrt(image.ndim)))
    hessian_images = [
        image.map_overlap(_hessian_matrix_det,
                          depth=depth,
                          sigma=s,
                          dtype=image.dtype) for s in sigma_list
    ]

    # stack transformed images
    image_stack = da.dstack(hessian_images)

    # rechunk along sigma axis
    chunk_shape = image_stack.chunks
    new_shape = chunk_shape[:-1] + ((sum(chunk_shape[-1]), ), )
    image_stack = image_stack.rechunk(chunks=new_shape)

    # get coordinates of local maxima in transformed stack
    local_maxima = peak_local_max(image_stack,
                                  threshold=threshold,
                                  footprint=np.ones((3, ) * image_stack.ndim),
                                  exclude_border=False)

    # Catch no peaks
    if local_maxima.size == 0:
        return np.empty((0, 3))

    # Convert local_maxima to float64
    lm = local_maxima.astype(np.float64)

    # Convert the last index to its corresponding scale value
    lm[:, -1] = sigma_list[local_maxima[:, -1]]

    # prune blobs that are too close together
    return _prune_blobs(lm, overlap)