Ejemplo n.º 1
0
def test_coarsen_with_excess():
    x = da.arange(10, chunks=5)
    assert_eq(da.coarsen(np.min, x, {0: 3}, trim_excess=True), np.array([0, 5]))
    assert_eq(
        da.coarsen(np.sum, x, {0: 3}, trim_excess=True),
        np.array([0 + 1 + 2, 5 + 6 + 7]),
    )
Ejemplo n.º 2
0
def test_coarsen():
    x = np.random.randint(10, size=(24, 24))
    d = da.from_array(x, chunks=(4, 8))

    assert_eq(da.chunk.coarsen(np.sum, x, {0: 2, 1: 4}),
              da.coarsen(np.sum, d, {0: 2, 1: 4}))
    assert_eq(da.chunk.coarsen(np.sum, x, {0: 2, 1: 4}),
              da.coarsen(da.sum, d, {0: 2, 1: 4}))
Ejemplo n.º 3
0
def coarsen(A, fun=np.mean, **kwargs):
    """Coarsen DataArray using reduction

    This function works with dask arrays.

    Parameters
    ----------
    A: DataArray
        Can be a dask array
    fun:
        reduction operator. Default is np.mean.
    **kwargs
        coarsening information along a dimension. Passed as 'dim=coarsening'
        pairs. Multidimensional coordinates are not supported.

    Returns
    -------
    y: DataArray
        Coarsened data.

    Examples
    --------

    Load data and coarsen along the x dimension
    >>> name = "/scratch/noah/Data/SAM6.10.9/OUT_2D/HOMO_2km_16384x1_64_2000m_5s.HOMO_2K.smagor_16.2Dcom_*.nc"

    >>> ds = xr.open_mfdataset(name, chunks={'time': 100})
    >>> # tb = ds.apply(lambda x: x.meanevery('x', 32))
    >>> def f(x):
    ...     return x.coarsen(x=16)
    >>> dsc = ds.apply(f)
    >>> print("saving to netcdf")
    >>> dsc.to_netcdf("2dcoarsened.nc")
    """

    # this function needs a dask array to work
    if A.chunks is None:
        A = A.chunk()

    coarse_dict = {A.get_axis_num(k): v for k,v in kwargs.items()}
    vals = da.coarsen(fun, A.data, coarse_dict)

    # coarsen dimension
    coords = {}
    for k in A.coords:
        if k in kwargs:
            c  = A[k].data
            dim = da.from_array(c, chunks=(len(c), ))

            q = kwargs[k]
            dim = da.coarsen(np.mean, dim, {0: q}).compute()
            coords[k] = dim
        else:
            coords[k] = A.coords[k]

    return xr.DataArray(vals, dims=A.dims, coords=coords, attrs=A.attrs,
                        name=A.name)
Ejemplo n.º 4
0
def coarsen(f):
    '''
    Create data pyramid.
    '''
    grid = f['resolutions']['1']['values']
    top_n = grid.shape[0]
    tile_size = 256

    max_zoom = math.ceil(math.log(top_n / tile_size) / math.log(2))
    max_width = tile_size * 2**max_zoom

    chunk_size = tile_size * 16
    curr_size = grid.shape
    dask_dset = da.from_array(grid, chunks=(chunk_size, chunk_size))

    r = f['resolutions']
    curr_resolution = 1

    while curr_resolution < 2**max_zoom:
        curr_size = tuple(np.array(curr_size) / 2)
        print('coarsening')
        curr_resolution *= 2

        print("curr_size:", curr_size)
        g = r.create_group(str(curr_resolution))
        values = g.require_dataset('values',
                                   curr_size,
                                   dtype='f4',
                                   compression='lzf',
                                   fillvalue=np.nan)

        dask_dset = dask_dset.rechunk((chunk_size, chunk_size))
        dask_dset = da.coarsen(np.nansum, dask_dset, {0: 2, 1: 2})
        da.store(dask_dset, values)
Ejemplo n.º 5
0
def coarsen(f, type, tile_size=256):
    '''
    Create data pyramid.
    '''
    grid = f['resolutions']['1'][type]
    top_n = grid.shape[0]
    max_zoom = math.ceil(math.log(top_n / tile_size) / math.log(2))

    chunk_size = tile_size * 16
    curr_size = grid.shape
    dask_dset = da.from_array(grid, chunks=(chunk_size, chunk_size))

    r = f['resolutions']
    curr_resolution = 1

    while curr_resolution < 2 ** max_zoom:
        curr_size = tuple(np.array(curr_size) / 2)
        print('coarsening')
        curr_resolution *= 2

        print("curr_size:", curr_size)
        group_name = '{}{}'.format(
            curr_resolution, '' if type == 'values' else '-' + type
        )
        g = r.create_group(group_name)
        values = g.require_dataset(type, curr_size, dtype='f4',
                                   compression='lzf', fillvalue=np.nan)

        dask_dset = dask_dset.rechunk((chunk_size, chunk_size))
        dask_dset = da.coarsen(np.nansum, dask_dset, {0: 2, 1: 2})
        da.store(dask_dset, values)
Ejemplo n.º 6
0
def coarsen(f, tile_size=256):
    """
    Create data pyramid.
    """
    grid = f["resolutions"]["1"]["values"]
    top_n = grid.shape[0]

    max_zoom = math.ceil(math.log(top_n / tile_size) / math.log(2))
    max_width = tile_size * 2**max_zoom

    chunk_size = tile_size * 16
    curr_size = grid.shape
    dask_dset = da.from_array(grid, chunks=(chunk_size, chunk_size))

    r = f["resolutions"]
    curr_resolution = 1

    while curr_resolution < 2**max_zoom:
        curr_size = tuple(np.array(curr_size) / 2)
        print("coarsening")
        curr_resolution *= 2

        print("curr_size:", curr_size)
        g = r.create_group(str(curr_resolution))
        values = g.require_dataset("values",
                                   curr_size,
                                   dtype="f4",
                                   compression="lzf",
                                   fillvalue=np.nan)

        dask_dset = dask_dset.rechunk((chunk_size, chunk_size))
        dask_dset = da.coarsen(np.nansum, dask_dset, {0: 2, 1: 2})
        da.store(dask_dset, values)
def test_gh3937():
    # test for github issue #3937
    x = da.from_array([1, 2, 3.0], (2, ))
    x = da.concatenate((x, [x[-1]]))
    y = x.rechunk((2, ))
    # This will produce Integral type indices that are not ints (np.int64), failing
    # the optimizer
    y = da.coarsen(np.sum, y, {0: 2})
    # How to trigger the optimizer explicitly?
    y.compute()
Ejemplo n.º 8
0
def test_gh3937():
    # test for github issue #3937
    x = da.from_array([1, 2, 3.], (2,))
    x = da.concatenate((x, [x[-1]]))
    y = x.rechunk((2,))
    # This will produce Integral type indices that are not ints (np.int64), failing
    # the optimizer
    y = da.coarsen(np.sum, y, {0: 2})
    # How to trigger the optimizer explicitly?
    y.compute()
Ejemplo n.º 9
0
def resample_ndimage(
        image: NDImage,
        scale: Union[float, Tuple[float, float]] = 1,
        offset: Union[float, Tuple[float, float]] = None,
        shape: Union[int, Tuple[int, int]] = None,
        chunks: Sequence[int] = None,
        spline_order: int = 1,
        aggregator: Optional[Aggregator] = np.nanmean,
        recover_nan: bool = False
) -> da.Array:
    image = da.asarray(image)
    offset = _normalize_offset(offset, image.ndim)
    scale = _normalize_scale(scale, image.ndim)
    if shape is None:
        shape = resize_shape(image.shape, scale)
    else:
        shape = _normalize_shape(shape, image)
    chunks = _normalize_chunks(chunks, shape)
    scale_y, scale_x = scale[-2], scale[-1]
    divisor_x = math.ceil(abs(scale_x))
    divisor_y = math.ceil(abs(scale_y))
    if (divisor_x >= 2 or divisor_y >= 2) and aggregator is not None:
        # Downsampling
        # ------------
        axes = {image.ndim - 2: divisor_y, image.ndim - 1: divisor_x}
        elongation = _normalize_scale((scale_y / divisor_y,
                                       scale_x / divisor_x), image.ndim)
        larger_shape = resize_shape(shape, (divisor_y, divisor_x),
                                    divisor_x=divisor_x,
                                    divisor_y=divisor_y)
        # print('Downsampling: ', scale)
        # print('  divisor:', (divisor_y, divisor_x))
        # print('  elongation:', elongation)
        # print('  shape:', shape)
        # print('  larger_shape:', larger_shape)
        divisible_chunks = _make_divisible_tiles(larger_shape,
                                                 divisor_x, divisor_y)
        image = _transform_array(image,
                                 elongation, offset,
                                 larger_shape, divisible_chunks,
                                 spline_order, recover_nan)
        image = da.coarsen(aggregator, image, axes)
        if shape != image.shape:
            image = image[..., 0:shape[-2], 0:shape[-1]]
        if chunks is not None:
            image = image.rechunk(chunks)
    else:
        # Upsampling
        # ----------
        # print('Upsampling: ', scale)
        image = _transform_array(image,
                                 scale, offset,
                                 shape, chunks,
                                 spline_order, recover_nan)
    return image
def reduce_along_axis(func, struct_array, axis=0):
    if 'dask' in inspect.getfile(make_like_reduction(func)):
        raise InputError(
            'For some weird reason the reduction function must not contain the string "dask".'
        )

    shape = struct_array.shape
    squeezed_shape = np.asarray(shape)[np.arange(len(shape)) != axis]
    return da.coarsen(make_like_reduction(func), struct_array,
                      mapping_reduction_along_axes(
                          struct_array, axes=axis)).reshape(squeezed_shape)
Ejemplo n.º 11
0
def coarsen_destagger_dask(x, blocks, stagger=None, mode='wrap'):
    """


    Examples
    --------
    >>> x = da.arange(6, chunks=6)
    >>> xc = coarsen_destagger_dask(x, {0: 2}, stagger=0)
    >>> xc.compute()
    array([ 1. ,  3. ,  3.5])
    >>> x = da.from_array(x, chunks=x.shape)
    >>> xc = coarsen_destagger_dask(x, {0: 2}, stagger=0)
    >>> xc.compute()
    array([ 1. ,  3. ,  3.5])
    """
    output_numpy = False

    try:
        x._keys
    except AttributeError:
        output_numpy = True
        x = da.from_array(x, x.shape)

    xcoarse = coarsen_centered_np(x, blocks)
    # TODO refactor this code to another function
    if stagger is not None:
        blk = {key: val
               for key, val in blocks.items()
               if key != stagger}

        left_inds = np.arange(0, x.shape[stagger], blocks[stagger])
        left = da.coarsen(np.sum, da.take(x, left_inds, stagger), blk)
        n = left.shape[stagger]
        # handle boundary conditions
        if mode == 'wrap':
            bc = da.take(left, [0], axis=stagger)
        elif mode == 'clip':
            bc = da.take(left, [-1], axis=stagger)
        else:
            raise ValueError(f"Unknown boundary `mode` given: {mode}")

        right = da.take(left, np.arange(1, n), axis=stagger)
        right = da.concatenate((right, bc), axis=stagger)
        xcoarse = xcoarse + (right - left)/2

    n = np.prod(list(blocks.values()))
    ans = xcoarse/n

    if output_numpy:
        return ans.compute()
    else:
        return ans
Ejemplo n.º 12
0
def rebin(a, new_shape):
    """Rebin array.

    rebin ndarray data into a smaller ndarray of the same rank whose dimensions
    are factors of the original dimensions. eg. An array with 6 columns and 4
    rows can be reduced to have 6,3,2 or 1 columns and 4,2 or 1 rows.

    Parameters
    ----------
    a : numpy array
    new_shape : tuple
        shape after binning

    Returns
    -------
    numpy array

    Examples
    --------
    >>> a=rand(6,4); b=rebin(a,(3,2))
    >>> a=rand(6); b=rebin(a,(2,))

    Notes
    -----
    Adapted from scipy cookbook

    """
    lenShape = len(a.shape)
    # ensure the new shape is integers
    new_shape = tuple(int(ns) for ns in new_shape)
    factor = np.asarray(a.shape) // np.asarray(new_shape)
    if factor.max() < 2:
        return a.copy()
    if isinstance(a, np.ndarray):
        # most of the operations will fall here and dask is not imported
        rshape = ()
        for athing in zip(new_shape, factor):
            rshape += athing
        return a.reshape(rshape).sum(axis=tuple(2 * i + 1
                                                for i in range(lenShape)))
    else:
        import dask.array as da
        try:
            return da.coarsen(np.sum, a,
                              {i: int(f)
                               for i, f in enumerate(factor)})
        # we provide slightly better error message in hypersy context
        except ValueError:
            raise ValueError("Rebinning does not allign with data dask chunks."
                             " Rebin fewer dimensions at a time to avoid this"
                             " error")
Ejemplo n.º 13
0
def rebin(a, new_shape):
    """Rebin array.

    rebin ndarray data into a smaller ndarray of the same rank whose dimensions
    are factors of the original dimensions. eg. An array with 6 columns and 4
    rows can be reduced to have 6,3,2 or 1 columns and 4,2 or 1 rows.

    Parameters
    ----------
    a : numpy array
    new_shape : tuple
        shape after binning

    Returns
    -------
    numpy array

    Examples
    --------
    >>> a=rand(6,4); b=rebin(a,(3,2))
    >>> a=rand(6); b=rebin(a,(2,))

    Notes
    -----
    Adapted from scipy cookbook

    """
    lenShape = len(a.shape)
    # ensure the new shape is integers
    new_shape = tuple(int(ns) for ns in new_shape)
    factor = np.asarray(a.shape) // np.asarray(new_shape)
    if factor.max() < 2:
        return a.copy()
    if isinstance(a, np.ndarray):
        # most of the operations will fall here and dask is not imported
        rshape = ()
        for athing in zip(new_shape, factor):
            rshape += athing
        return a.reshape(rshape).sum(axis=tuple(
            2 * i + 1 for i in range(lenShape)))
    else:
        import dask.array as da
        try:
            return da.coarsen(np.sum, a, {i: int(f) for i, f in enumerate(factor)})
        # we provide slightly better error message in hypersy context
        except ValueError:
            raise ValueError("Rebinning does not allign with data dask chunks."
                             " Rebin fewer dimensions at a time to avoid this"
                             " error")
Ejemplo n.º 14
0
def compute_sub_res(
    zarray: da.Array,
    pyr_level: int,
    tile_size: int,
    is_rgb: bool,
    im_dtype: np.dtype,
) -> da.Array:
    """
    Compute factor-of-2 sub-resolutions from dask array for pyramidalization using dask.

    Parameters
    ----------
    zarray: da.Array
        Dask array to be downsampled
    pyr_level: int
        level of the pyramid. 0 = base, 1 = 2x downsampled, 2=4x downsampled...
    tile_size: int
        Size of tiles in dask array after downsampling
    is_rgb: bool
        whether dask array is RGB interleaved
    im_dtype: np.dtype
        dtype of the output da.Array

    Returns
    -------
    resampled_zarray_subres: da.Array
        Dask array (unprocessed) to be written
    """
    if is_rgb:
        resampling_axis = {0: 2**pyr_level, 1: 2**pyr_level, 2: 1}
        tiling = (tile_size, tile_size, 3)
    else:
        resampling_axis = {0: 1, 1: 2**pyr_level, 2: 2**pyr_level}
        tiling = (1, tile_size, tile_size)

    resampled_zarray_subres = da.coarsen(
        np.mean,
        zarray,
        resampling_axis,
        trim_excess=True,
    )
    resampled_zarray_subres = resampled_zarray_subres.astype(im_dtype)
    resampled_zarray_subres = resampled_zarray_subres.rechunk(tiling)

    return resampled_zarray_subres
Ejemplo n.º 15
0
def coarsen(reduction, x, factor):
    """

    >>> x = np.arange(10)
    >>> coarsen(np.max, x, 2)
    array([1, 3, 5, 7, 9])

    >>> coarsen(np.min, x, 5)
    array([0, 5])
    """
    axis = {0: factor, 1: factor}
    # Ensure that shape is divisible by coarsening factor
    slops = [-(d % factor) for d in x.shape]
    slops = [slop or None for slop in slops]
    x = x[tuple(slice(0, slop) for slop in slops)]
 
    if isinstance(x, np.ndarray):
        return da.chunk.coarsen(reduction, x, axis)
    if isinstance(x, da.Array):
        return da.coarsen(reduction, x, axis)
    raise NotImplementedError()
Ejemplo n.º 16
0
def coarsen(reduction, x, factor):
    """

    >>> x = np.arange(10)
    >>> coarsen(np.max, x, 2)
    array([1, 3, 5, 7, 9])

    >>> coarsen(np.min, x, 5)
    array([0, 5])
    """
    axis = {0: factor, 1: factor}
    # Ensure that shape is divisible by coarsening factor
    slops = [-(d % factor) for d in x.shape]
    slops = [slop or None for slop in slops]
    x = x[tuple(slice(0, slop) for slop in slops)]

    if isinstance(x, np.ndarray):
        return da.chunk.coarsen(reduction, x, axis)
    if isinstance(x, da.Array):
        return da.coarsen(reduction, x, axis)
    raise NotImplementedError()
Ejemplo n.º 17
0
def rebin(a, new_shape=None, scale=None, crop=True):
    """Rebin array.

    rebin ndarray data into a smaller or larger array based on a linear
    interpolation. Specify either a new_shape or a scale. Scale of 1== no
    binning. Scale less than one results in up-sampling.

    Parameters
    ----------
    a : numpy array
    new_shape : a list of floats or integer, default None
        For each dimension specify the new_shape of the np.array. This will
        then be converted into a scale.
    scale : a list of floats or integer, default None
        For each dimension specify the new:old pixel ratio, e.g. a ratio of 1
        is no binning and a ratio of 2 means that each pixel in the new
        spectrum is twice the size of the pixels in the old spectrum.
        The length of the list should match the dimension of the numpy array.
        ***Note : Only one of scale or new_shape should be specified otherwise
        the function will not run***
    crop: bool, default True
        When binning by a non-integer number of pixels it is likely that
        the final row in each dimension contains less than the full quota to
        fill one pixel.

        e.g. 5*5 array binned by 2.1 will produce two rows containing 2.1
        pixels and one row containing only 0.8 pixels worth. Selection of
        crop='True' or crop='False' determines whether or not this
        'black' line is cropped from the final binned array or not.

        *Please note that if crop=False is used, the final row in each
        dimension may appear black, if a fractional number of pixels are left
        over. It can be removed but has been left to preserve total counts
        before and after binning.*

    Returns
    -------
    numpy array

    Examples
    --------
    >>> a=rand(6,4); b=rebin(a,scale=(3,2))
    >>> a=rand(6); b=rebin(a,scale=(2,))

    Notes
    -----
    Fast re_bin function Adapted from scipy cookbook
    If rebin function fails with error stating that the function is 'not binned
    and therefore cannot be rebinned', add binned to axes parameters with:
    >>> s.axes_manager[axis].is_binned = True

    """
    # Series of if statements to check that only one out of new_shape or scale
    # has been given. New_shape is then converted to scale. If both or neither
    # are given the function raises and error and wont run.
    if new_shape is None and scale is None:
        raise ValueError("One of new_shape, or scale must be specified")
    elif new_shape is not None and scale is not None:
        raise ValueError(
            "Only one out of new_shape or scale should be specified")
    elif new_shape is not None:
        scale = []
        for i, _ in enumerate(a.shape):
            scale.append(a.shape[i] / new_shape[i])
    else:
        new_shape = new_shape
        scale = scale
    # check whether or not interpolation is needed.
    if _requires_linear_rebin(arr=a, scale=scale):
        _logger.debug("Using linear_bin")
        if np.issubdtype(a.dtype, np.integer):
            # The _linear_bin function below requires a float dtype
            # because of the default numpy casting rule ('same_kind').
            a = a.astype("float", casting="safe", copy=False)
        return _linear_bin(a, scale, crop)
    else:
        _logger.debug("Using standard rebin with lazy support")
        # if interpolation is not needed run fast re_bin function.
        # Adapted from scipy cookbook.
        lenShape = len(a.shape)
        new_shape = np.asarray(a.shape) // np.asarray(scale)
        # ensure the new shape is integers
        new_shape = tuple(int(ns) for ns in new_shape)
        # check function wont bin to zero.
        for item in new_shape:
            if item == 0:
                raise ValueError(
                    "One of your dimensions collapses to zero. "
                    "Re-adjust your scale values or run code with "
                    "crop=False to avoid this error.")
        scale = np.asarray(a.shape) // np.asarray(new_shape)
        if scale.max() < 2:
            return a.copy()
        if isinstance(a, np.ndarray):
            # most of the operations will fall here and dask is not imported
            rshape = ()
            for athing in zip(new_shape, scale):
                rshape += athing
            return a.reshape(rshape).sum(axis=tuple(2 * i + 1
                                                    for i in range(lenShape)))
        else:
            import dask.array as da

            try:
                return da.coarsen(np.sum, a,
                                  {i: int(f)
                                   for i, f in enumerate(scale)})
            # we provide slightly better error message in hyperspy context
            except ValueError:
                raise ValueError(
                    "Rebinning does not align with data dask chunks. "
                    "Rebin fewer dimensions at a time to avoid this error")
Ejemplo n.º 18
0
    def get_daily_percenile_fields_interpolated_to(
            self,
            lons_target,
            lats_target,
            start_year=-np.Inf,
            end_year=np.Inf,
            percentile=0.5,
            rolling_mean_window_days=None):
        target_scale_deg = (lons_target[1, 1] - lons_target[0, 0] +
                            lats_target[1, 1] - lats_target[0, 0]) / 2.0

        coarsening = int(target_scale_deg / self.characteristic_scale_deg +
                         0.5)
        print("source_scale: {}\ntarget_scale: {}\ncoarsening coefficient: {}".
              format(self.characteristic_scale_deg, target_scale_deg,
                     coarsening))

        def coarsening_func(x, axis=None):
            _mask = np.less(np.abs(x - self.missing_value), 1.0e-6)

            if np.all(_mask):
                return self.missing_value * np.ma.ones(
                    _mask.shape).mean(axis=axis)

            y = np.ma.masked_where(_mask, x)

            return y.mean(axis=axis)

        # aggregate the data
        trim_excess = True
        data = da.coarsen(coarsening_func,
                          self.data,
                          axes={
                              1: coarsening,
                              2: coarsening
                          },
                          trim_excess=trim_excess)
        lons_s = da.coarsen(np.mean,
                            da.from_array(self.lons, self.chunks[1:]),
                            axes={
                                0: coarsening,
                                1: coarsening
                            },
                            trim_excess=trim_excess).compute()
        lats_s = da.coarsen(np.mean,
                            da.from_array(self.lats, self.chunks[1:]),
                            axes={
                                0: coarsening,
                                1: coarsening
                            },
                            trim_excess=trim_excess).compute()

        source_grid = list(
            zip(*lat_lon.lon_lat_to_cartesian(lons_s.flatten(),
                                              lats_s.flatten())))
        print(np.shape(source_grid))
        ktree = KDTree(source_grid)

        dists, inds = ktree.query(
            list(
                zip(*lat_lon.lon_lat_to_cartesian(lons_target.flatten(),
                                                  lats_target.flatten()))))

        perc_daily, mask = self.get_daily_percenile_fields_lazy(
            data,
            start_year=start_year,
            end_year=end_year,
            percentile=percentile,
            rolling_mean_window_days=rolling_mean_window_days)

        print("perc_daily.shape=", perc_daily.shape)

        # do the interpolation for each day
        perc_daily_interpolated = []
        for perc_field in perc_daily:
            print(perc_field.shape)
            field = np.ma.masked_where(
                mask, perc_field.compute()).flatten()[inds].reshape(
                    lons_target.shape)
            perc_daily_interpolated.append(field)

        return np.array(perc_daily_interpolated)
Ejemplo n.º 19
0
def rebin(a, new_shape=None, scale=None, crop=True, dtype=None):
    """Rebin data into a smaller or larger array based on a linear
    interpolation. Specify either a new_shape or a scale. Scale of 1 means no
    binning. Scale less than one results in up-sampling.

    Parameters
    ----------
    a : numpy array
        The array to rebin.
    %s

    Returns
    -------
    numpy array

    Examples
    --------
    >>> a=rand(6,4); b=rebin(a,scale=(3,2))
    >>> a=rand(6); b=rebin(a,scale=(2,))

    Notes
    -----
    Fast ``re_bin`` function Adapted from scipy cookbook
    If rebin function fails with error stating that the function is 'not binned
    and therefore cannot be rebinned', add binned to axes parameters with:
    >>> s.axes_manager[axis].is_binned = True

    """
    # Series of if statements to check that only one out of new_shape or scale
    # has been given. New_shape is then converted to scale. If both or neither
    # are given the function raises and error and wont run.
    if new_shape is None and scale is None:
        raise ValueError("One of new_shape, or scale must be specified")
    elif new_shape is not None and scale is not None:
        raise ValueError(
            "Only one out of new_shape or scale should be specified")
    elif new_shape is not None:
        scale = []
        for i, _ in enumerate(a.shape):
            scale.append(a.shape[i] / new_shape[i])
    else:
        new_shape = new_shape
        scale = scale
    if isinstance(dtype, str) and dtype != 'same':
        raise ValueError('`dtype` argument needs to be None, a numpy dtype or '
                         'the string "same".')

    # check whether or not interpolation is needed.
    if _requires_linear_rebin(arr=a, scale=scale):
        _logger.debug("Using linear_bin")
        return _linear_bin(a, scale, crop, dtype=dtype)
    else:
        if dtype == 'same':
            dtype = a.dtype
        _logger.debug("Using standard rebin with lazy support")
        # if interpolation is not needed run fast re_bin function.
        # Adapted from scipy cookbook.
        lenShape = len(a.shape)
        new_shape = np.asarray(a.shape) // np.asarray(scale)
        # ensure the new shape is integers
        new_shape = tuple(int(ns) for ns in new_shape)
        # check function wont bin to zero.
        for item in new_shape:
            if item == 0:
                raise ValueError(
                    "One of your dimensions collapses to zero. "
                    "Re-adjust your scale values or run code with "
                    "crop=False to avoid this error.")
        scale = np.asarray(a.shape) // np.asarray(new_shape)
        if scale.max() < 2:
            return a.copy()

        if isinstance(a, np.ndarray):
            # most of the operations will fall here and dask is not imported
            rshape = ()
            for athing in zip(new_shape, scale):
                rshape += athing
            return a.reshape(rshape).sum(axis=tuple(2 * i + 1
                                                    for i in range(lenShape)),
                                         dtype=dtype)
        else:
            try:
                return da.coarsen(np.sum,
                                  a, {i: int(f)
                                      for i, f in enumerate(scale)},
                                  dtype=dtype)
            # we provide slightly better error message in hyperspy context
            except ValueError:
                raise ValueError(
                    "Rebinning does not align with data dask chunks. "
                    "Rebin fewer dimensions at a time to avoid this error")
Ejemplo n.º 20
0
def aggregate(da, blocks, func=np.nanmean, debug=False):
    """
    Performs efficient block averaging in one or multiple dimensions.
    Only works on regular grid dimensions.

    Parameters
    ----------
    da : xarray DataArray (must be a dask array!)
    blocks : list
        List of tuples containing the dimension and interval to aggregate over
    func : function
        Aggregation function.Defaults to numpy.nanmean

    Returns
    -------
    da_agg : xarray Data
        Aggregated array

    Examples
    --------
    >>> from xarrayutils import aggregate
    >>> import numpy as np
    >>> import xarray as xr
    >>> import matplotlib.pyplot as plt
    >>> %matplotlib inline
    >>> import dask.array as da

    >>> x = np.arange(-10,10)
    >>> y = np.arange(-10,10)
    >>> xx,yy = np.meshgrid(x,y)
    >>> z = xx**2-yy**2
    >>> a = xr.DataArray(da.from_array(z, chunks=(20, 20)),
                         coords={'x':x,'y':y}, dims=['y','x'])
    >>> print a

    <xarray.DataArray 'array-7e422c91624f207a5f7ebac426c01769' (y: 20, x: 20)>
    dask.array<array-7..., shape=(20, 20), dtype=int64, chunksize=(20, 20)>
    Coordinates:
      * y        (y) int64 -10 -9 -8 -7 -6 -5 -4 -3 -2 -1 0 1 2 3 4 5 6 7 8 9
      * x        (x) int64 -10 -9 -8 -7 -6 -5 -4 -3 -2 -1 0 1 2 3 4 5 6 7 8 9

    >>> blocks = [('x',2),('y',5)]
    >>> a_coarse = aggregate(a,blocks,func=np.mean)
    >>> print a_coarse

    <xarray.DataArray 'array-7e422c91624f207a5f7ebac426c01769' (y: 2, x: 10)>
    dask.array<coarsen..., shape=(2, 10), dtype=float64, chunksize=(2, 10)>
    Coordinates:
      * y        (y) int64 -10 0
      * x        (x) int64 -10 -8 -6 -4 -2 0 2 4 6 8
    Attributes:
        Coarsened with: <function mean at 0x111754230>
        Coarsenblocks: [('x', 2), ('y', 10)]
    """
    # Check if the input is a dask array (I might want to convert this
    # automaticlaly in the future)
    if not isinstance(da.data, Array):
        raise RuntimeError('data array data must be a dask array')
    # Check data type of blocks
    # TODO write test
    if (not all(isinstance(n[0], str) for n in blocks)
            or not all(isinstance(n[1], int) for n in blocks)):

        print('blocks input', str(blocks))
        raise RuntimeError("block dimension must be dtype(str), \
        e.g. ('lon',4)")

    # Check if the given array has the dimension specified in blocks
    try:
        block_dict = dict((da.get_axis_num(x), y) for x, y in blocks)
    except ValueError:
        raise RuntimeError("'blocks' contains non matching dimension")

    # Check the size of the excess in each aggregated axis
    blocks = [(a[0], a[1], da.shape[da.get_axis_num(a[0])] % a[1])
              for a in blocks]

    # for now default to trimming the excess
    da_coarse = coarsen(func, da.data, block_dict, trim_excess=True)

    # for now default to only the dims
    new_coords = dict([])
    # for cc in da.coords.keys():
    warnings.warn("WARNING: only dimensions are carried over as coordinates")
    for cc in list(da.dims):
        new_coords[cc] = da.coords[cc]
        for dd in blocks:
            if dd[0] in list(da.coords[cc].dims):
                new_coords[cc] = \
                    new_coords[cc].isel(
                        **{dd[0]: slice(0, -(1 + dd[2]), dd[1])})

    attrs = {'Coarsened with': str(func), 'Coarsenblocks': str(blocks)}
    da_coarse = xr.DataArray(da_coarse,
                             dims=da.dims,
                             coords=new_coords,
                             name=da.name,
                             attrs=attrs)
    return da_coarse
Ejemplo n.º 21
0
    def get_seasonal_means_with_ttest_stats_interpolated_to(
            self,
            lons_target,
            lats_target,
            season_to_monthperiod=None,
            start_year=-np.Inf,
            end_year=np.Inf,
            convert_monthly_accumulators_to_daily=False):
        """

        :param lons_target, lats_target: 2d arrays of target longitudes and latitudes
        :param season_to_monthperiod:
        :param start_year:
        :param end_year:
        :param convert_monthly_accumulators_to_daily: if true converts monthly accumulators to daily,
        :return dict(season: [mean, std, nobs])


        # coarsen the data and coordinates to the target scale and interpolate using nearest neighbours
        """

        target_scale_deg = (lons_target[1, 1] - lons_target[0, 0] +
                            lats_target[1, 1] - lats_target[0, 0]) / 2.0

        coarsening = int(target_scale_deg / self.characteristic_scale_deg +
                         0.5)
        print("source_scale: {}\ntarget_scale: {}\ncoarsening coefficient: {}".
              format(self.characteristic_scale_deg, target_scale_deg,
                     coarsening))

        def coarsening_func(x, axis=None):
            _mask = np.less(np.abs(x - self.missing_value), 1.0e-6)

            if np.all(_mask):
                return self.missing_value * np.ma.ones(
                    _mask.shape).mean(axis=axis)

            y = np.ma.masked_where(_mask, x)

            return y.mean(axis=axis)

        # aggregate the data
        trim_excess = True
        data = da.coarsen(coarsening_func,
                          self.data,
                          axes={
                              1: coarsening,
                              2: coarsening
                          },
                          trim_excess=trim_excess)
        lons_s = da.coarsen(np.mean,
                            da.from_array(self.lons, self.chunks[1:]),
                            axes={
                                0: coarsening,
                                1: coarsening
                            },
                            trim_excess=trim_excess).compute()
        lats_s = da.coarsen(np.mean,
                            da.from_array(self.lats, self.chunks[1:]),
                            axes={
                                0: coarsening,
                                1: coarsening
                            },
                            trim_excess=trim_excess).compute()

        source_grid = list(
            zip(*lat_lon.lon_lat_to_cartesian(lons_s.flatten(),
                                              lats_s.flatten())))
        print(np.shape(source_grid))
        ktree = KDTree(source_grid)

        dists, inds = ktree.query(
            list(
                zip(*lat_lon.lon_lat_to_cartesian(lons_target.flatten(),
                                                  lats_target.flatten()))))

        print("data.shape = ", data.shape)
        result, mask = self.__get_seasonal_means_with_ttest_stats_dask_lazy(
            data,
            season_to_monthperiod=season_to_monthperiod,
            start_year=start_year,
            end_year=end_year,
            convert_monthly_accumulators_to_daily=
            convert_monthly_accumulators_to_daily)

        # invoke the computations and interpolate the result
        for season in result:
            print("Computing for {}".format(season))
            for i in range(len(result[season]) - 1):

                result[season][i] = np.ma.masked_where(
                    mask, result[season][i].compute()).flatten()[inds].reshape(
                        lons_target.shape)

        return result
Ejemplo n.º 22
0
def test_coarsen_with_excess():
    x = da.arange(10, chunks=5)
    assert_eq(da.coarsen(np.min, x, {0: 3}, trim_excess=True),
              np.array([0, 5]))
    assert_eq(da.coarsen(np.sum, x, {0: 3}, trim_excess=True),
              np.array([0 + 1 + 2, 5 + 6 + 7]))
Ejemplo n.º 23
0
def rebin(a, new_shape=None, scale=None, crop=True):
    """Rebin array.

    rebin ndarray data into a smaller or larger array based on a linear
    interpolation. Specify either a new_shape or a scale. Scale of 1== no
    binning. Scale less than one results in up-sampling.

    Parameters
    ----------
    a : numpy array
    new_shape : a list of floats or integer, default None
        For each dimension specify the new_shape of the np.array. This will
        then be converted into a scale.
    scale : a list of floats or integer, default None
        For each dimension specify the new:old pixel ratio, e.g. a ratio of 1
        is no binning and a ratio of 2 means that each pixel in the new
        spectrum is twice the size of the pixels in the old spectrum.
        The length of the list should match the dimension of the numpy array.
        ***Note : Only one of scale or new_shape should be specified otherwise
        the function will not run***
    crop: bool, default True
        When binning by a non-integer number of pixels it is likely that
        the final row in each dimension contains less than the full quota to
        fill one pixel.

        e.g. 5*5 array binned by 2.1 will produce two rows containing 2.1
        pixels and one row containing only 0.8 pixels worth. Selection of
        crop='True' or crop='False' determines whether or not this
        'black' line is cropped from the final binned array or not.

        *Please note that if crop=False is used, the final row in each
        dimension may appear black, if a fractional number of pixels are left
        over. It can be removed but has been left to preserve total counts
        before and after binning.*

    Returns
    -------
    numpy array

    Examples
    --------
    >>> a=rand(6,4); b=rebin(a,scale=(3,2))
    >>> a=rand(6); b=rebin(a,scale=(2,))

    Notes
    -----
    Fast re_bin function Adapted from scipy cookbook
    If rebin function fails with error stating that the function is 'not binned
    and therefore cannot be rebinned', add binned to metadata with:
    >>> s.metadata.Signal.binned = True

    """
    # Series of if statements to check that only one out of new_shape or scale
    # has been given. New_shape is then converted to scale. If both or neither
    # are given the function raises and error and wont run.
    if new_shape is None and scale is None:
        raise ValueError("One of new_shape, or scale must be specified")
    elif new_shape is not None and scale is not None:
        raise ValueError("Only one out of new_shape or scale should be specified.\
                        Not both.")
    elif new_shape is not None:
        scale = []
        for i, axis in enumerate(a.shape):
            scale.append(a.shape[i] / new_shape[i])
    else:
        new_shape = new_shape
        scale = scale
    # check whether or not interpolation is needed.
    if _requires_linear_rebin(arr=a, scale=scale):
        _logger.debug("Using linear_bin")
        if np.issubdtype(a.dtype, np.integer):
            # The _linear_bin function below requires a float dtype
            # because of the default numpy casting rule ('same_kind').
            a = a.astype("float", casting="safe", copy=False)
        return _linear_bin(a, scale, crop)
    else:
        _logger.debug("Using standard rebin with lazy support")
        # if interpolation is not needed run fast re_bin function.
        # Adapted from scipy cookbook.
        lenShape = len(a.shape)
        new_shape = np.asarray(a.shape) // np.asarray(scale)
        # ensure the new shape is integers
        new_shape = tuple(int(ns) for ns in new_shape)
        # check function wont bin to zero.
        for item in new_shape:
            if item == 0:
                raise ValueError("One of your dimensions collapses to zero.\
                Re-adjust your scale values or run code with crop=False to\
                avoid this.")
        scale = np.asarray(a.shape) // np.asarray(new_shape)
        if scale.max() < 2:
            return a.copy()
        if isinstance(a, np.ndarray):
            # most of the operations will fall here and dask is not imported
            rshape = ()
            for athing in zip(new_shape, scale):
                rshape += athing
            return a.reshape(rshape).sum(axis=tuple(
                2 * i + 1 for i in range(lenShape)))
        else:
            import dask.array as da
            try:
                return da.coarsen(np.sum, a, {i: int(f)
                                              for i, f in enumerate(scale)})
            # we provide slightly better error message in hyperspy context
            except ValueError:
                raise ValueError("Rebinning does not align with data dask chunks."
                                 " Rebin fewer dimensions at a time to avoid this"
                                 " error")
def _create_pyramid(
    base_image_path,
    max_level=None,
    compressor=None,
):
    pyramid_path = Path(base_image_path).parent
    root = zarr.open_group(str(pyramid_path), mode="a")
    root.create_group(PYRAMID_GROUP_NAME, overwrite=True)

    # Gather metadata about store
    z = zarr.open(base_image_path)

    is_rgb = guess_rgb(z.shape)
    if is_rgb:
        # Assume last three dims are YXC
        y_size, x_size, _ = z.shape[-3:]
        y_dim = len(z.shape) - 3
        x_dim = len(z.shape) - 2
    else:
        # Assume last two dims are YX
        y_size, x_size = z.shape[-2:]
        y_dim = len(z.shape) - 2
        x_dim = len(z.shape) - 1

    # For now we are going to respect initial chunking to
    # determine pyramidal chuck sizes.
    tile_size = z.chunks[x_dim]
    chunks = z.chunks
    dtype = z.dtype

    # We want to read the image from zarr with a "good" chunksizes for computation.
    # Creating many small chunks is not ideal for dask and has large overhead.
    # https://docs.dask.org/en/latest/array-best-practices.html#select-a-good-chunk-size
    img = da.from_zarr(base_image_path, chunks="auto")

    if max_level is None:
        # create all levels up to 512 x 512
        max_level = int(
            np.ceil(np.log2(np.maximum(y_size, x_size))) - np.log2(tile_size))
    if compressor is None:
        compressor = DEFAULT_COMPRESSOR

    # Halving of the last two dims per round
    downsample_scheme = {
        y_dim: 2,
        x_dim: 2,
    }

    for i in range(1, max_level):
        img = da.coarsen(np.mean, img, downsample_scheme, trim_excess=True)

        # Edge Case: Need to pad smallest thumbnail sometimes.
        if img.shape[y_dim] < tile_size:
            img = pad_axis(img, y_dim, tile_size - img.shape[y_dim])

        if img.shape[x_dim] < tile_size:
            img = pad_axis(img, x_dim, tile_size - img.shape[x_dim])

        # Define pyramid level path
        out_path = str(pyramid_path / PYRAMID_GROUP_NAME / str(i).zfill(2))

        # Write to zarr store
        # Ensure correct dtype and chunksizes for store
        img.astype(dtype).rechunk(chunks).to_zarr(out_path,
                                                  compressor=compressor)

        # Read from last store so dask doesn't need to re-compute starting at base.
        img = da.from_zarr(out_path, chunks="auto")
Ejemplo n.º 25
0
def array_to_hitile(old_data, filename, zoom_step=8, chunks=(1e6,), agg_function=np.sum):
    '''
    Downsample a dataset so that it's compatible with HiGlass (filetype: hitile, datatype: vector)
    
    Parameters
    ----------
    old_data: np.array
        A numpy array containing the data to be downsampled
    filename: string
        The output filename where the resulting multi-resolution
        data will be stored.
    zoom_step: int
        The number of zoom levels to skip when aggregating
    '''
    import dask.array as da

    if op.exists(filename):
        os.remove(filename)
    
    f_new = h5py.File(filename, 'w')
    
    tile_size = 1024
    min_pos = 0
    max_pos = len(old_data)


    # we store every n'th zoom level
    zoom_factor = 2 ** zoom_step
    
    max_zoom = math.ceil(math.log(max_pos / tile_size) / math.log(2))
        
    meta = f_new.create_dataset('meta', (1,), dtype='f')
    meta.attrs['tile-size'] = tile_size
    meta.attrs['zoom-step'] = zoom_step
    meta.attrs['max-length'] = max_pos
    meta.attrs['max-zoom'] = max_zoom

    meta.attrs['max-width'] = tile_size * 2 ** max_zoom

    prev_length = max_pos  

    min_data = da.from_array(old_data, chunks)
    max_data = da.from_array(old_data, chunks)

    for z in range(0, max_zoom, zoom_step):
        dset_length = math.ceil(max_pos / 2 ** z)
        print('z:', z, 'dset_length:', dset_length)

        values_dset = f_new.require_dataset('values_' + str(z), (len(old_data),), 
                             dtype='f', compression='gzip' )

        mins_dset = f_new.require_dataset('mins_' + str(z), (len(old_data),), 
                             dtype='f', compression='gzip' )
        maxs_dset = f_new.require_dataset('maxs_' + str(z), (len(old_data),), 
                             dtype='f', compression='gzip' )

        nan_values_dset = f_new.require_dataset('nan_values_' + str(z), (len(old_data),), 
                             dtype='f', compression='gzip')

        da.store(old_data, values_dset)
        da.store(min_data, mins_dset)
        da.store(max_data, maxs_dset)
        # f_new['values_' + str(z)][:] = old_data

        # see if we need to pad the end of the dataset
        # if so, use the previous last value
        if len(old_data) % zoom_factor != 0:
            old_data = da.concatenate(
                (old_data, [old_data[-1]] * ( zoom_factor - len(old_data) % zoom_factor )))
            min_data = da.concatenate(
                (min_data, [max_data[-1]] * ( zoom_factor - len(min_data) % zoom_factor )))
            max_data = da.concatenate(
                (max_data, [max_data[-1]] * ( zoom_factor - len(max_data) % zoom_factor )))

        # aggregate the data by summing adjacent datapoints
        sys.stdout.write('summing...')
        sys.stdout.flush()
        print("fdsdsfs:", math.ceil(len(old_data) / zoom_factor), zoom_factor)
        print("chunks:", chunks, zoom_factor, 'len:', len(old_data))

        old_data = old_data.rechunk(chunks)
        min_data = old_data.rechunk(chunks)
        max_data = old_data.rechunk(chunks)
        print('zoom_factor', zoom_factor, old_data.shape)

        old_data = da.coarsen(agg_function, old_data, {0: zoom_factor})
        min_data = da.coarsen(np.min, max_data, {0: zoom_factor})
        max_data = da.coarsen(np.max, max_data, {0: zoom_factor})
        
        # reshape( (math.ceil(len(old_data) / zoom_factor), zoom_factor)).sum(axis=1)
        sys.stdout.write(' done\n')
        sys.stdout.flush()

        '''
        if len(old_data) < 10000:
            plt.plot(old_data)
        '''

    #plt.plot(old_data) 
    f_new.close()
Ejemplo n.º 26
0
"""
Displays the allen brain reference atlas at 10 um resolution
"""

from napari import Viewer, gui_qt
import dask.array as da
from dask.cache import Cache
import numpy as np

cache = Cache(2e9)  # Leverage two gigabytes of memory
cache.register()

base = da.random.randint(0, 255, (100, 2000, 5000), dtype='uint8')
pyramid = [base]
image = pyramid[0]
for i in range(4):
    image = da.coarsen(np.mean, image, {0: 1, 1: 2, 2: 2}, trim_excess=True)
    pyramid.append(image)
print('pyramid level shapes: ', [p.shape for p in pyramid])

with gui_qt():
    # create an empty viewer
    viewer = Viewer()
    # layer = viewer.add_image(base, name='base')
    layer = viewer.add_pyramid(pyramid, name='pyramid')
Ejemplo n.º 27
0
ax.set_aspect('equal')
plt.show()

# +
fig, ax = plt.subplots(figsize=[10, 10], constrained_layout=True)
base_extent = np.array(
    [-dims[1] // 2, dims[1] // 2, -dims[2] // 2, dims[2] // 2])

ax.scatter(*cpc,
           c=cropindices,
           cmap='nipy_spectral',
           zorder=5,
           linewidths=1,
           edgecolors='black')
cfac = 4
coarse_mask = da.coarsen(np.all, da.asarray(mask), {0: cfac, 1: cfac})
cropdata = da.coarsen(np.mean, data[cropindices], {1: cfac, 2: cfac}).persist()

xlim, ylim = np.array([ax.get_xlim(), ax.get_ylim()])
vmin, vmax = da.nanmin(cropdata).compute(), da.nanmax(cropdata).compute()
for i in range(len(cropdata)):
    plt.imshow(
        np.where(coarse_mask, cropdata[i], np.nan).T,
        extent=base_extent +
        np.array([cpc[0, i], cpc[0, i], cpc[1, i], cpc[1, i]]),
        origin='lower',
        #alpha=0.5,
        cmap='gray',
        vmin=vmin,
        vmax=vmax,
    )