Ejemplo n.º 1
0
def calc_image_cell_size(vis_dataset, global_dataset, pixels_per_beam=7):
    """
    Calculates the image and and cell size needed for imaging a vis_dataset.
    It uses the perfectly-illuminated circular aperture approximation to determine the field of view
    and pixels_per_beam for the cell size.

    Parameters
    ----------
    vis_dataset : xarray.core.dataset.Dataset
        Input visibility dataset.
    global_dataset : xarray.core.dataset.Dataset
        Input global dataset (needed for antenna diameter).
    Returns
    -------
    imsize : list of ints
        Number of pixels for each spatial dimension.
    cell : list of ints, units = arcseconds
        Cell size.
    """
    import xarray
    import numpy as np
    import dask.array as da
    rad_to_arc = (3600 * 180) / np.pi  # Radians to arcseconds
    c = 299792458

    f_min = da.nanmin(vis_dataset.chan)
    f_max = da.nanmax(vis_dataset.chan)
    D_min = np.nanmin(global_dataset.ANT_DISH_DIAMETER)
    #D_min = min_dish_diameter

    # Calculate cell size using pixels_per_beam
    cell = rad_to_arc * np.array([
        c / (da.nanmax(vis_dataset.UVW[:, :, 0].data) * f_max), c /
        (da.nanmax(vis_dataset.UVW[:, :, 1].data) * f_max)
    ]) / pixels_per_beam

    # If cell sizes are within 20% of each other use the smaller cell size for both.
    if (cell[0] / cell[1] < 1.2) and (cell[1] / cell[0] < 1.2):
        cell[:] = np.min(cell)

    # Calculate imsize using the perfectly-illuminated circular aperture approximation
    FWHM_max = np.array((rad_to_arc * (1.02 * c / (D_min * f_min))))
    imsize = FWHM_max / cell

    # Find an image size that is (2^n)*10 when muliplied with the gridding padding and n is an integer
    padding = 1.2

    if imsize[0] < 1:
        imsize[0] = 1

    if imsize[1] < 1:
        imsize[1] = 1

    n_power = np.ceil(np.log2(imsize / 10))
    imsize = np.ceil(((2**n_power) * 10) / padding)

    return cell, imsize
Ejemplo n.º 2
0
    def get_sample_from_bil_info(self, data, fill_value=None,
                                 output_shape=None):
        """Resample using pre-computed resampling LUTs."""
        del output_shape
        fill_value = _check_fill_value(fill_value, data.dtype)

        p_1, p_2, p_3, p_4 = self._slice_data(data, fill_value)
        s__, t__ = self.bilinear_s, self.bilinear_t

        res = (p_1 * (1 - s__) * (1 - t__) +
               p_2 * s__ * (1 - t__) +
               p_3 * (1 - s__) * t__ +
               p_4 * s__ * t__)

        epsilon = 1e-6
        data_min = da.nanmin(data) - epsilon
        data_max = da.nanmax(data) + epsilon

        idxs = (res > data_max) | (res < data_min)
        res = da.where(idxs, fill_value, res)
        res = da.where(np.isnan(res), fill_value, res)
        shp = self.target_geo_def.shape
        if data.ndim == 3:
            res = da.reshape(res, (res.shape[0], shp[0], shp[1]))
        else:
            res = da.reshape(res, (shp[0], shp[1]))

        # Add missing coordinates
        self._add_missing_coordinates(data)

        res = DataArray(res, dims=data.dims, coords=self.out_coords)

        return res
Ejemplo n.º 3
0
    def _limit_output_values_to_input(self, data, res, fill_value):
        epsilon = 1e-6
        data_min = da.nanmin(data) - epsilon
        data_max = da.nanmax(data) + epsilon

        res = da.where(
            find_indices_outside_min_and_max(res, data_min, data_max),
            fill_value, res)

        return da.where(np.isnan(res), fill_value, res)
Ejemplo n.º 4
0
def _normalize_data_dask(data, pixel_max, c, th):
    min_val = da.nanmin(data)
    max_val = da.nanmax(data)
    out = da.map_blocks(_normalize_data_cpu,
                        data,
                        min_val,
                        max_val,
                        pixel_max,
                        c,
                        th,
                        meta=np.array(()))
    return out
Ejemplo n.º 5
0
 def _calculate_summary_statistics(self):
     data = self._lazy_data()
     _raveled = data.ravel()
     _mean, _std, _min, _q1, _q2, _q3, _max = da.compute(
         da.nanmean(data),
         da.nanstd(data),
         da.nanmin(data),
         da.percentile(_raveled, [25, ]),
         da.percentile(_raveled, [50, ]),
         da.percentile(_raveled, [75, ]),
         da.nanmax(data), )
     return _mean, _std, _min, _q1, _q2, _q3, _max
Ejemplo n.º 6
0
 def _calculate_summary_statistics(self):
     data = self._lazy_data()
     _raveled = data.ravel()
     _mean, _std, _min, _q1, _q2, _q3, _max = da.compute(
         da.nanmean(data),
         da.nanstd(data),
         da.nanmin(data),
         da.percentile(_raveled, [25, ]),
         da.percentile(_raveled, [50, ]),
         da.percentile(_raveled, [75, ]),
         da.nanmax(data), )
     return _mean, _std, _min, _q1, _q2, _q3, _max
Ejemplo n.º 7
0
Archivo: ds.py Proyecto: elaeon/ML
    def stadistics(self):
        headers = ["group", "mean", "std dev", "min", "25%", "50%", "75%", "max", "nonzero", "nonan", "unique", "dtype"]
        self.chunksize = Chunks.build_from_shape(self.shape, self.dtypes)
        table = []
        for group, (dtype, _) in self.dtypes.fields.items():
            values = dict()
            values["dtype"] = dtype
            values["group"] = group
            darray = self.data[group].da
            if dtype == np.dtype(float) or dtype == np.dtype(int):
                da_mean = da.around(darray.mean(), decimals=3)
                da_std = da.around(darray.std(), decimals=3)
                da_min = da.around(darray.min(), decimals=3)
                da_max = da.around(darray.max(), decimals=3)
                result = dask.compute([da_mean, da_std, da_min, da_max])[0]
                values["mean"] = result[0] if not np.isnan(result[0]) else da.around(da.nanmean(darray), decimals=3).compute()
                values["std dev"] = result[1] if not np.isnan(result[0]) else da.around(da.nanstd(darray), decimals=3).compute()
                values["min"] = result[2] if not np.isnan(result[0]) else da.around(da.nanmin(darray), decimals=3).compute()
                values["max"] = result[3] if not np.isnan(result[0]) else da.around(da.nanmax(darray), decimals=3).compute()
                if len(self.shape[group]) == 1:
                    da_percentile = da.around(da.percentile(darray, [25, 50, 75]), decimals=3)
                    result = da_percentile.compute()
                    values["25%"] = result[0]
                    values["50%"] = result[1]
                    values["75%"] = result[2]
                else:
                    values["25%"] = "-"
                    values["50%"] = "-"
                    values["75%"] = "-"
                values["nonzero"] = da.count_nonzero(darray).compute()
                values["nonan"] = da.count_nonzero(da.notnull(darray)).compute()
                values["unique"] = "-"
            else:
                values["mean"] = "-"
                values["std dev"] = "-"
                values["min"] = "-"
                values["max"] = "-"
                values["25%"] = "-"
                values["50%"] = "-"
                values["75%"] = "-"
                values["nonzero"] = "-"
                values["nonan"] = da.count_nonzero(da.notnull(darray)).compute()
                vunique = darray.to_dask_dataframe().fillna('').nunique().compute()
                values["unique"] = vunique

            row = []
            for column in headers:
                row.append(values[column])
            table.append(row)

        print("# rows {}".format(self.shape[0]))
        return tabulate(table, headers)
Ejemplo n.º 8
0
def _run_dask_numpy_equal_interval(data, k):
    max_data = da.nanmax(data)
    min_data = da.nanmin(data)
    width = (max_data - min_data) / k
    cuts = da.arange(min_data + width, max_data + width, width)
    l_cuts = cuts.shape[0]
    if l_cuts > k:
        # handle overshooting
        cuts = cuts[0:k]
    # work around to assign cuts[-1] = max_data
    bins = da.concatenate([cuts[:k - 1], [max_data]])
    out = _bin(data, bins, np.arange(l_cuts))
    return out
Ejemplo n.º 9
0
def test_nan():
    x = np.array([[1, np.nan, 3, 4], [5, 6, 7, np.nan], [9, 10, 11, 12]])
    d = da.from_array(x, chunks=(2, 2))

    assert_eq(np.nansum(x), da.nansum(d))
    assert_eq(np.nansum(x, axis=0), da.nansum(d, axis=0))
    assert_eq(np.nanmean(x, axis=1), da.nanmean(d, axis=1))
    assert_eq(np.nanmin(x, axis=1), da.nanmin(d, axis=1))
    assert_eq(np.nanmax(x, axis=(0, 1)), da.nanmax(d, axis=(0, 1)))
    assert_eq(np.nanvar(x), da.nanvar(d))
    assert_eq(np.nanstd(x, axis=0), da.nanstd(d, axis=0))
    assert_eq(np.nanargmin(x, axis=0), da.nanargmin(d, axis=0))
    assert_eq(np.nanargmax(x, axis=0), da.nanargmax(d, axis=0))
    assert_eq(np.nanprod(x), da.nanprod(d))
Ejemplo n.º 10
0
def test_nan():
    x = np.array([[1, np.nan, 3, 4], [5, 6, 7, np.nan], [9, 10, 11, 12]])
    d = da.from_array(x, blockshape=(2, 2))

    assert eq(np.nansum(x), da.nansum(d))
    assert eq(np.nansum(x, axis=0), da.nansum(d, axis=0))
    assert eq(np.nanmean(x, axis=1), da.nanmean(d, axis=1))
    assert eq(np.nanmin(x, axis=1), da.nanmin(d, axis=1))
    assert eq(np.nanmax(x, axis=(0, 1)), da.nanmax(d, axis=(0, 1)))
    assert eq(np.nanvar(x), da.nanvar(d))
    assert eq(np.nanstd(x, axis=0), da.nanstd(d, axis=0))
    assert eq(np.nanargmin(x, axis=0), da.nanargmin(d, axis=0))
    assert eq(np.nanargmax(x, axis=0), da.nanargmax(d, axis=0))
    with ignoring(AttributeError):
        assert eq(np.nanprod(x), da.nanprod(d))
Ejemplo n.º 11
0
 def _calculate_summary_statistics(self, rechunk=True):
     if rechunk is True:
         # Use dask auto rechunk instead of HyperSpy's one, what should be
         # better for these operations
         rechunk = "dask_auto"
     data = self._lazy_data(rechunk=rechunk)
     _raveled = data.ravel()
     _mean, _std, _min, _q1, _q2, _q3, _max = da.compute(
         da.nanmean(data),
         da.nanstd(data),
         da.nanmin(data),
         da.percentile(_raveled, [25, ]),
         da.percentile(_raveled, [50, ]),
         da.percentile(_raveled, [75, ]),
         da.nanmax(data), )
     return _mean, _std, _min, _q1, _q2, _q3, _max
Ejemplo n.º 12
0
 def _calculate_summary_statistics(self, rechunk=True):
     if rechunk is True:
         # Use dask auto rechunk instead of HyperSpy's one, what should be
         # better for these operations
         rechunk = "dask_auto"
     data = self._lazy_data(rechunk=rechunk)
     _raveled = data.ravel()
     _mean, _std, _min, _q1, _q2, _q3, _max = da.compute(
         da.nanmean(data),
         da.nanstd(data),
         da.nanmin(data),
         da.percentile(_raveled, [25, ]),
         da.percentile(_raveled, [50, ]),
         da.percentile(_raveled, [75, ]),
         da.nanmax(data), )
     return _mean, _std, _min, _q1, _q2, _q3, _max
Ejemplo n.º 13
0
def test_nan():
    x = np.array([[1, np.nan, 3, 4],
                  [5, 6, 7, np.nan],
                  [9, 10, 11, 12]])
    d = da.from_array(x, chunks=(2, 2))

    assert_eq(np.nansum(x), da.nansum(d))
    assert_eq(np.nansum(x, axis=0), da.nansum(d, axis=0))
    assert_eq(np.nanmean(x, axis=1), da.nanmean(d, axis=1))
    assert_eq(np.nanmin(x, axis=1), da.nanmin(d, axis=1))
    assert_eq(np.nanmax(x, axis=(0, 1)), da.nanmax(d, axis=(0, 1)))
    assert_eq(np.nanvar(x), da.nanvar(d))
    assert_eq(np.nanstd(x, axis=0), da.nanstd(d, axis=0))
    assert_eq(np.nanargmin(x, axis=0), da.nanargmin(d, axis=0))
    assert_eq(np.nanargmax(x, axis=0), da.nanargmax(d, axis=0))
    assert_eq(nanprod(x), da.nanprod(d))
Ejemplo n.º 14
0
def test_nan():
    x = np.array([[1, np.nan, 3, 4],
                  [5, 6, 7, np.nan],
                  [9, 10, 11, 12]])
    d = da.from_array(x, blockshape=(2, 2))

    assert eq(np.nansum(x), da.nansum(d))
    assert eq(np.nansum(x, axis=0), da.nansum(d, axis=0))
    assert eq(np.nanmean(x, axis=1), da.nanmean(d, axis=1))
    assert eq(np.nanmin(x, axis=1), da.nanmin(d, axis=1))
    assert eq(np.nanmax(x, axis=(0, 1)), da.nanmax(d, axis=(0, 1)))
    assert eq(np.nanvar(x), da.nanvar(d))
    assert eq(np.nanstd(x, axis=0), da.nanstd(d, axis=0))
    assert eq(np.nanargmin(x, axis=0), da.nanargmin(d, axis=0))
    assert eq(np.nanargmax(x, axis=0), da.nanargmax(d, axis=0))
    with ignoring(AttributeError):
        assert eq(np.nanprod(x), da.nanprod(d))
Ejemplo n.º 15
0
def test_reductions():
    x = np.arange(5).astype('f4')
    a = da.from_array(x, chunks=(2,))

    assert eq(da.all(a), np.all(x))
    assert eq(da.any(a), np.any(x))
    assert eq(da.argmax(a, axis=0), np.argmax(x, axis=0))
    assert eq(da.argmin(a, axis=0), np.argmin(x, axis=0))
    assert eq(da.max(a), np.max(x))
    assert eq(da.mean(a), np.mean(x))
    assert eq(da.min(a), np.min(x))
    assert eq(da.nanargmax(a, axis=0), np.nanargmax(x, axis=0))
    assert eq(da.nanargmin(a, axis=0), np.nanargmin(x, axis=0))
    assert eq(da.nanmax(a), np.nanmax(x))
    assert eq(da.nanmin(a), np.nanmin(x))
    assert eq(da.nansum(a), np.nansum(x))
    assert eq(da.nanvar(a), np.nanvar(x))
    assert eq(da.nanstd(a), np.nanstd(x))
Ejemplo n.º 16
0
def test_reductions():
    x = np.arange(5).astype('f4')
    a = da.from_array(x, blockshape=(2, ))

    assert eq(da.all(a), np.all(x))
    assert eq(da.any(a), np.any(x))
    assert eq(da.argmax(a, axis=0), np.argmax(x, axis=0))
    assert eq(da.argmin(a, axis=0), np.argmin(x, axis=0))
    assert eq(da.max(a), np.max(x))
    assert eq(da.mean(a), np.mean(x))
    assert eq(da.min(a), np.min(x))
    assert eq(da.nanargmax(a, axis=0), np.nanargmax(x, axis=0))
    assert eq(da.nanargmin(a, axis=0), np.nanargmin(x, axis=0))
    assert eq(da.nanmax(a), np.nanmax(x))
    assert eq(da.nanmin(a), np.nanmin(x))
    assert eq(da.nansum(a), np.nansum(x))
    assert eq(da.nanvar(a), np.nanvar(x))
    assert eq(da.nanstd(a), np.nanstd(x))
Ejemplo n.º 17
0
def _concatenate_chunks(chunks):
    """Concatenate chunks to full output array."""
    # Form the full array
    col, res = [], []
    prev_y = 0
    for y, x in sorted(chunks):
        if len(chunks[(y, x)]) > 1:
            chunk = da.nanmax(da.stack(chunks[(y, x)], axis=-1), axis=-1)
        else:
            chunk = chunks[(y, x)][0]
        if y == prev_y:
            col.append(chunk)
            continue
        res.append(da.concatenate(col, axis=1))
        col = [chunk]
        prev_y = y
    res.append(da.concatenate(col, axis=1))

    res = da.concatenate(res, axis=2)

    return res
Ejemplo n.º 18
0
def parallel_gradient_search(data, src_x, src_y, dst_x, dst_y, **kwargs):
    """Run gradient search in parallel in input area coordinates."""
    if data.ndim not in [2, 3]:
        raise NotImplementedError(
            'Gradient search resampling only supports 2D or 3D arrays.')
    if data.ndim == 2:
        data = data[np.newaxis, :, :]
    # TODO: Make sure the data is uniformly chunked.
    src_gradient_xl, src_gradient_xp = np.gradient(src_x, axis=[0, 1])
    src_gradient_yl, src_gradient_yp = np.gradient(src_y, axis=[0, 1])
    arrays = reshape_arrays_in_stacked_chunks(
        (src_x, src_y, src_gradient_xl, src_gradient_xp, src_gradient_yl,
         src_gradient_yp), src_x.chunks)
    # TODO: rechunk and reformat the data array
    src_x, src_y, src_gradient_xl, src_gradient_xp, src_gradient_yl, src_gradient_yp = arrays
    data = reshape_to_stacked_3d(data)
    res = da.blockwise(_gradient_resample_data,
                       'bmnz',
                       data.astype(np.float64),
                       'bijz',
                       src_x,
                       'ijz',
                       src_y,
                       'ijz',
                       src_gradient_xl,
                       'ijz',
                       src_gradient_xp,
                       'ijz',
                       src_gradient_yl,
                       'ijz',
                       src_gradient_yp,
                       'ijz',
                       dst_x,
                       'mn',
                       dst_y,
                       'mn',
                       dtype=np.float64,
                       method=kwargs.get('method', 'bilinear'))
    return da.nanmax(res, axis=-1).squeeze()
Ejemplo n.º 19
0
data_dask = da.from_array(data, chunks=(1, 444, 922))

# Yields an unevaluated dask array
data_dask.min()
# dask.array<amin-aggregate, shape=(), dtype=float64, chunksize=()>
# Force computation
data_dask.min().compute()
# nan
# No min(), because there are NaN values

# Dask has NaN-aware computations
da.nanmin(data_dask).compute()
# -22.329354809176536

lo = da.nanmin(data_dask).compute()
hi = da.nanmax(data_dask).compute()
print(lo, hi)
# -22.3293548092 47.7625806255

# Visualizing the temperature maps -

# Number of images
N_months = data_dask.shape[0]

import matplotlib.pyplot as plt

fig, panels = plt.subplots(nrows=4, ncols=3)
for month, panel in zip(range(N_months), panels.flatten()):
    im = panel.imshow(data_dask[month, :, :], origin='lower', vmin=lo, vmax=hi)
    panel.set_title('2008-{:02d}'.format(month + 1))
    panel.axis('off')
Ejemplo n.º 20
0
fig, ax = plt.subplots(figsize=[10, 10], constrained_layout=True)
base_extent = np.array(
    [-dims[1] // 2, dims[1] // 2, -dims[2] // 2, dims[2] // 2])

ax.scatter(*cpc,
           c=cropindices,
           cmap='nipy_spectral',
           zorder=5,
           linewidths=1,
           edgecolors='black')
cfac = 4
coarse_mask = da.coarsen(np.all, da.asarray(mask), {0: cfac, 1: cfac})
cropdata = da.coarsen(np.mean, data[cropindices], {1: cfac, 2: cfac}).persist()

xlim, ylim = np.array([ax.get_xlim(), ax.get_ylim()])
vmin, vmax = da.nanmin(cropdata).compute(), da.nanmax(cropdata).compute()
for i in range(len(cropdata)):
    plt.imshow(
        np.where(coarse_mask, cropdata[i], np.nan).T,
        extent=base_extent +
        np.array([cpc[0, i], cpc[0, i], cpc[1, i], cpc[1, i]]),
        origin='lower',
        #alpha=0.5,
        cmap='gray',
        vmin=vmin,
        vmax=vmax,
    )
    plt.annotate(str(cropindices[i]),
                 +cpc[:, i],
                 bbox=dict(facecolor='white', alpha=0.4, edgecolor='none'))
plt.colorbar()
Ejemplo n.º 21
0
        
        will fetch the data at the URL: http://localhost/tiles/0/1
        
        This assumes that the data is in CoverageJSON format, and does the work of fetching the data, parsing it, and extracting the actual
        data as a numpy array.
    """
    for axis, tile_index in zip(axis_names, tile_indices):
        url_template = url_template.replace('{' + axis + '}', str(tile_index))
    # Debug line: uncomment to see which tiles are fetched.
    # Note that when printing these may get confused due to multithreading
    #print 'fetching tile from',url_template
    tile_data = json.loads(get_data(url_template))
    tile_values = np.array(tile_data['values'], dtype=float).reshape(tile_data['shape'])
    return tile_values

if __name__ == '__main__':
    # Usage example.
    arrs = get_dask_arrays('http://godiva.rdg.ac.uk/coverage/sst-tiled.json')
    print "Created dask array"
    sst = arrs['analysed_sst-yx_tiling']
    print 'Shape:',sst.shape
    print "Got array, calculating means:"
    print 'Northern Eighth', da.nanmean(sst[0,:450,:]).compute()
    print 'Equatorial Quarter', da.nanmean(sst[0,1350:2250,:]).compute()
    print 'Southern Eighth', da.nanmean(sst[0,3150:,:]).compute()
    # Note that even though we defined c100, each tile is still fetched for each calculation.
    # That's because we've used a naive fetch method, with no caching
    c100 = sst[0,1700:1900,3500:3700]
    print 'Central 100 points', da.nanmean(c100).compute()
    print 'Central 100 points Min/Max', da.nanmin(c100).compute(), da.nanmax(c100).compute()
Ejemplo n.º 22
0
    def get_sample_from_bil_info(self, data, fill_value=np.nan,
                                 output_shape=None):
        if fill_value is None:
            fill_value = np.nan
        # FIXME: can be this made into a dask construct ?
        cols, lines = np.meshgrid(np.arange(data['x'].size),
                                  np.arange(data['y'].size))
        cols = da.ravel(cols)
        lines = da.ravel(lines)
        try:
            self.valid_input_index = self.valid_input_index.compute()
        except AttributeError:
            pass
        vii = self.valid_input_index.squeeze()
        try:
            self.index_array = self.index_array.compute()
        except AttributeError:
            pass

        # ia contains reduced (valid) indices of the source array, and has the
        # shape of the destination array
        ia = self.index_array
        rlines = lines[vii][ia]
        rcols = cols[vii][ia]

        slices = []
        mask_slices = []
        mask_2d_added = False
        coords = {}
        try:
            # FIXME: Use same chunk size as input data
            coord_x, coord_y = self.target_geo_def.get_proj_vectors_dask()
        except AttributeError:
            coord_x, coord_y = None, None

        for _, dim in enumerate(data.dims):
            if dim == 'y':
                slices.append(rlines)
                if not mask_2d_added:
                    mask_slices.append(ia >= self.target_geo_def.size)
                    mask_2d_added = True
                if coord_y is not None:
                    coords[dim] = coord_y
            elif dim == 'x':
                slices.append(rcols)
                if not mask_2d_added:
                    mask_slices.append(ia >= self.target_geo_def.size)
                    mask_2d_added = True
                if coord_x is not None:
                    coords[dim] = coord_x
            else:
                slices.append(slice(None))
                mask_slices.append(slice(None))
                try:
                    coords[dim] = data.coords[dim]
                except KeyError:
                    pass

        res = data.values[slices]
        res[mask_slices] = fill_value

        try:
            p_1 = res[:, :, 0]
            p_2 = res[:, :, 1]
            p_3 = res[:, :, 2]
            p_4 = res[:, :, 3]
        except IndexError:
            p_1 = res[:, 0]
            p_2 = res[:, 1]
            p_3 = res[:, 2]
            p_4 = res[:, 3]

        s__, t__ = self.bilinear_s, self.bilinear_t

        res = (p_1 * (1 - s__) * (1 - t__) +
               p_2 * s__ * (1 - t__) +
               p_3 * (1 - s__) * t__ +
               p_4 * s__ * t__)

        epsilon = 1e-6
        data_min = da.nanmin(data) - epsilon
        data_max = da.nanmax(data) + epsilon

        idxs = (res > data_max) | (res < data_min)
        res = da.where(idxs, fill_value, res)
        shp = self.target_geo_def.shape
        if data.ndim == 3:
            res = da.reshape(res, (res.shape[0], shp[0], shp[1]))
        else:
            res = da.reshape(res, (shp[0], shp[1]))
        res = DataArray(da.from_array(res, chunks=CHUNK_SIZE),
                        dims=data.dims, coords=coords)

        return res
Ejemplo n.º 23
0
    def get_sample_from_bil_info(self, data, fill_value=np.nan,
                                 output_shape=None):
        if fill_value is None:
            fill_value = np.nan
        # FIXME: can be this made into a dask construct ?
        cols, lines = np.meshgrid(np.arange(data['x'].size),
                                  np.arange(data['y'].size))
        cols = da.ravel(cols)
        lines = da.ravel(lines)
        try:
            self.valid_input_index = self.valid_input_index.compute()
        except AttributeError:
            pass
        vii = self.valid_input_index.squeeze()
        try:
            self.index_array = self.index_array.compute()
        except AttributeError:
            pass

        # ia contains reduced (valid) indices of the source array, and has the
        # shape of the destination array
        ia = self.index_array
        rlines = lines[vii][ia]
        rcols = cols[vii][ia]

        slices = []
        mask_slices = []
        mask_2d_added = False
        coords = {}
        try:
            # FIXME: Use same chunk size as input data
            coord_x, coord_y = self.target_geo_def.get_proj_vectors_dask()
        except AttributeError:
            coord_x, coord_y = None, None

        for _, dim in enumerate(data.dims):
            if dim == 'y':
                slices.append(rlines)
                if not mask_2d_added:
                    mask_slices.append(ia >= self.target_geo_def.size)
                    mask_2d_added = True
                if coord_y is not None:
                    coords[dim] = coord_y
            elif dim == 'x':
                slices.append(rcols)
                if not mask_2d_added:
                    mask_slices.append(ia >= self.target_geo_def.size)
                    mask_2d_added = True
                if coord_x is not None:
                    coords[dim] = coord_x
            else:
                slices.append(slice(None))
                mask_slices.append(slice(None))
                try:
                    coords[dim] = data.coords[dim]
                except KeyError:
                    pass

        res = data.values[slices]
        res[mask_slices] = fill_value

        try:
            p_1 = res[:, :, 0]
            p_2 = res[:, :, 1]
            p_3 = res[:, :, 2]
            p_4 = res[:, :, 3]
        except IndexError:
            p_1 = res[:, 0]
            p_2 = res[:, 1]
            p_3 = res[:, 2]
            p_4 = res[:, 3]

        s__, t__ = self.bilinear_s, self.bilinear_t

        res = (p_1 * (1 - s__) * (1 - t__) +
               p_2 * s__ * (1 - t__) +
               p_3 * (1 - s__) * t__ +
               p_4 * s__ * t__)

        epsilon = 1e-6
        data_min = da.nanmin(data) - epsilon
        data_max = da.nanmax(data) + epsilon

        idxs = (res > data_max) | (res < data_min)
        res = da.where(idxs, fill_value, res)
        shp = self.target_geo_def.shape
        if data.ndim == 3:
            res = da.reshape(res, (res.shape[0], shp[0], shp[1]))
        else:
            res = da.reshape(res, (shp[0], shp[1]))
        res = DataArray(da.from_array(res, chunks=CHUNK_SIZE),
                        dims=data.dims, coords=coords)

        return res
def composite(src_fps, save_loc, save_nam, method="mean", dt="default"):
    """Creates a composite from multiple rasters. Individual rasters have to be
    of the same size (extents, pixel size, data type). Multiple compositing
    are available, including mean, min, max, median etc.

    Parameters
    ----------
    src_fps : list(str)
        List of paths to source files.
    save_loc : str
        Path to save folder.
    save_nam : str
        Name of the file to be saved.
    method : str
        Compositing method, either "mean", "min", "max" or "median".
    dt : str(optional)
        Orbit direction, either "DES" or "ASC" (required for generating
        previews).

    Returns
    -------
    out_pth : str
        Absolute path to the product.
    """
    # Make sure save location exists
    os.makedirs(save_loc, exist_ok=True)

    # Save TIFF metadata for output
    with rasterio.open(src_fps[0]) as rst:
        out_meta = rst.profile.copy()

    # Lazily load files into DASK ARRAYS
    print(f"#\n# Preparing Dask arrays...")
    chunks = {'band': 1, 'x': 1024, 'y': 1024}
    lazy_arrays = [xr.open_rasterio(fp, chunks=chunks) for fp in src_fps]
    stacked = da.concatenate(lazy_arrays, axis=0)
    stacked[stacked == 0] = np.nan
    # Calculate composite for selected method with dask
    print(f"# Compositing ({method}) using Dask...")
    if method == 'mean':
        comp_out = da.nanmean(stacked, axis=0, keepdims=True).compute()
    elif method == 'median':
        comp_out = da.nanmedian(stacked, axis=0, keepdims=True).compute()
    elif method == 'max':
        comp_out = da.nanmax(stacked, axis=0, keepdims=True).compute()
    elif method == 'min':
        comp_out = da.nanmin(stacked, axis=0, keepdims=True).compute()
    else:
        raise Exception('{} is not a valid compositing '
                        'method!'.format(method))

    # ----------------------------------------------------------------------------
    # SAVE RESULTS TO FILES
    # ----------------------------------------------------------------------------
    # Save composite to GeoTIFF
    tif_time = time.time()
    print("#\n# Saving composite image to TIFF...")

    out_nam = save_nam + ".tif"
    out_pth = os.path.join(save_loc, out_nam)
    out_meta.update(bigtiff="yes", compress='lzw')

    with rasterio.open(out_pth, "w", **out_meta) as dest:
        dest.write(comp_out)

    tif_time = time.time() - tif_time
    print(f"#  Time (TIFF): {tif_time:.2f} seconds")

    # # Save preview file as JPEG
    # jpg_time = time.time()
    # print("#\n# Saving preview image to JPEG...")
    # # Pickle array for passing it to plot_preview()
    # spt = os.path.join(save_loc, "temp_array.p")
    # with open(spt, "wb") as pf:
    #     pickle.dump(comp_out, pf)
    # comp_out = None
    # try:
    #     plot_preview(spt, dt, out_pth[:-3] + "jpg")
    # except MemoryError as me:
    #     print("#  Memory error occurred, could not save to JPEG")
    #     print(me)
    # finally:
    #     # delete pickle
    #     os.remove(spt)
    # jpg_time = time.time() - jpg_time
    # print(f"#  Time (JPEG): {jpg_time:.2f} seconds")

    return out_pth
Ejemplo n.º 25
0
def composite(src_fps, save_loc, save_nam,
              method="median", comp_mask="all_bad", bbox=None):

    # Prepare save location
    save_dir = os.path.join(save_loc, save_nam)
    if not os.path.exists(save_dir):
        os.mkdir(save_dir)

    # Get extents
    main_extents = output_image_extent(src_fps, bbox)

    # Obtain propertis of output array (same for all bands/images)
    out_extents = main_extents['bounds']
    out_w = main_extents['width']
    out_h = main_extents['height']
    nr_bands = main_extents['bandsCount']

    # Initiate arrays for storing noumber of available & good observations
    nobs = np.zeros((out_h, out_w), dtype=np.int8)
    nok = nobs.copy()

    # Create temp dir if it doesn't exist
    sav_dir = '.\\tmp'
    if not os.path.exists(sav_dir):
        os.mkdir(sav_dir)

    # MAIN LOOP FOR COMPOSITING
    tTim_A = time.time()
    tmp_sav_pth = []
    for band in range(nr_bands):
        print("#\n# Creating composite for Band {}".format(band+1))
        comp_stack = []
        # Loop all images
        for i, fp in enumerate(src_fps):
            str_time = time.time()

            # Open data set
            src = rasterio.open(fp)

            # Save copy of profile for writing tiff at the end
            if band == 0 and i == 0:
                out_meta = src.profile.copy()

            print("#   Processing Image {}.".format(i+1))

            # Skip Reading the image if bbox is out of bounds
            xL, yD, xR, yU = [xy for xy in src.bounds]
            xL_out, yD_out, xR_out, yU_out = out_extents
            chk_bbox = (xL > xR_out or yD > yU_out or
                        xR < xL_out or yU < yD_out)
            if chk_bbox:
                print('#   Image {} not included (out of bounds).'.format(i))
                break

            # Calculate offset for reading and slicing
            win, sl_x, sl_y = image_offset(out_extents, src)

            # ------------------------------
            # Read image and store to pickle
            # ------------------------------
            # Set offset Window for reading of TIF subset
            offset = win

            # Initiate array for output
            comp_band = np.full((out_h, out_w), np.nan, dtype=np.float32)

            # Read image and save to pickle
            print("#     Reading the image.")
            if band == 0:
                tmp_read = src.read(window=offset)
                for nc in range(1, nr_bands):
                    img_nam = ('img' + str(i+1).zfill(2) + "_b"
                               + str(nc+1).zfill(2) + '.p')
                    img_pth = os.path.join(sav_dir, img_nam)
                    pickle.dump(tmp_read[nc], open(img_pth, "wb"))
                tmp_read = tmp_read[0]
            else:
                img_nam = ('img' + str(i+1).zfill(2) + "_b"
                           + str(band+1).zfill(2) + '.p')
                img_pth = os.path.join(sav_dir, img_nam)
                tmp_read = pickle.load(open(img_pth, "rb"))

            # Read the image into the array
            comp_band[sl_y[0]:sl_y[1], sl_x[0]:sl_x[1]] = tmp_read
            tmp_read = None
            src.close()

            # ------------------------------
            # determine bad pixels from mask
            # ------------------------------
            print("#     Determining bad pixels.")
            if band == 0:

                # Get index of mask
                idx_bad = get_mask_idx(fp, offset, comp_mask, dilate=-1)

                # Get index of background
                idx_bck = get_mask_idx(fp, offset, "background")

                # Update nok and nobs
                nobs[sl_y[0]:sl_y[1], sl_x[0]:sl_x[1]] += 1
                nok[sl_y[0]:sl_y[1], sl_x[0]:sl_x[1]] += 1

                nok[idx_bad[0][0]+sl_y[0], idx_bad[0][1]+sl_x[0]] += -1
                nobs[idx_bck[0][0]+sl_y[0], idx_bck[0][1]+sl_x[0]] += -1

                # Save index to pickle for later use
                idx_nam = 'idxBad_' + str(i+1).zfill(2) + '.p'
                idx_pth = os.path.join(sav_dir, idx_nam)
                pickle.dump(idx_bad, open(idx_pth, "wb"))
                idx_bck = None

            else:
                # Read from Pickle
                idx_nam = 'idxBad_' + str(i+1).zfill(2) + '.p'
                idx_pth = os.path.join(sav_dir, idx_nam)
                idx_bad = pickle.load(open(idx_pth, "rb"))

            # Apply mask to image
            if idx_bad[1] > 0:
                comp_band[idx_bad[0][0]+sl_y[0],
                          idx_bad[0][1]+sl_x[0]] = np.nan
                idx_bad = None

            # Stack comp_band array into Dask Array
            comp_stack.append(da.from_array(comp_band, chunks=(1024, 1024)))

            # Close the array to save memory
            comp_band = None

            end_time = time.time()
            print('#   --- Time: %s seconds ---' % (end_time-str_time))

        # Stack all images into 1 array
        stacked = da.stack(comp_stack, axis=0)

        # Calculate composite for selected method with dask
        print("# Compositing Band {}".format(band+1))
        str_time = time.time()
        if method == 'mean':
            comp_out = da.nanmean(stacked, axis=0, keepdims=True).compute()
        elif method == 'median':
            comp_out = da.nanmedian(stacked, axis=0, keepdims=True).compute()
        elif method == 'max':
            comp_out = da.nanmax(stacked, axis=0, keepdims=True).compute()
        elif method == 'min':
            comp_out = da.nanmin(stacked, axis=0, keepdims=True).compute()
        else:
            raise Exception('{} is not a valid compositing '
                            'method!'.format(method))
        end_time = time.time()
        print('# --- Time: %s seconds ---' % (end_time-str_time))

        # After one band is resolved, save to temp file and release memory by
        # deleting the array
        if nr_bands > 1:

            print('# Saving temporary composite file for this band.')

            # Create file name and save using pickle
            sav_fil = 'b_' + str(band+1).zfill(2) + '.p'
            sav_pth = os.path.join(sav_dir, sav_fil)
            pickle.dump(comp_out, open(sav_pth, "wb"))

            # Add to savePth list with filenames
            tmp_sav_pth.append(sav_pth)

            #  Clean up workspace
            comp_out = None

        tTim_B = time.time()
        print('--- Total time: %s seconds --- \n' % (tTim_B - tTim_A))

    # ----------------------------------------------------------------------------
    # OUT OF THE COMPOSITE LOOP RESTORE SAVED FILES AND BUIL TIF
    # ----------------------------------------------------------------------------
    if nr_bands > 1:

        print("# Restoring saved bands.")
        str_time = time.time()

        # Initiate output array
        comp_out = np.full((nr_bands, out_h, out_w), np.nan, dtype=np.float32)

        for bnd, pth in enumerate(tmp_sav_pth):
            comp_out[bnd, :, :] = pickle.load(open(pth, "rb"))

        # Remove temporary folder
        rmtree(sav_dir, ignore_errors=True)
        end_time = time.time()
        print('--- Time: %s seconds ---' % (end_time-str_time))

    # ----------------------------------------------------------------------------
    # SAVE RESULTS TO TIF
    # ----------------------------------------------------------------------------
    print("# Saving composite image to TIFF.")
    str_time = time.time()

    # Save composite
    out_nam = save_nam + "_composite.tif"
    out_pth = os.path.join(save_dir, out_nam)

    out_px = out_meta["transform"][0]
    out_py = out_meta["transform"][4]
    out_trans = Affine(out_px, 0.0, xL_out, 0.0, out_py, yU_out)

    out_meta.update(
        height=comp_out.shape[1], width=comp_out.shape[2],
        transform=out_trans, bigtiff="yes"
        )

    with rasterio.open(out_pth, "w", **out_meta) as dest:
        dest.write(comp_out)

    # Save nok mask
    out_nam = save_nam + "_nok.tif"
    out_pth = os.path.join(save_dir, out_nam)
    nok_meta = out_meta.copy()
    nok_meta.update(
        count=1,
        dtype="int8"
        )

    with rasterio.open(out_pth, "w", **nok_meta) as dest:
        dest.write(np.expand_dims(nok, axis=0))

    # Save nobs mask
    out_nam = save_nam + "_nobs.tif"
    out_pth = os.path.join(save_dir, out_nam)
    with rasterio.open(out_pth, "w", **nok_meta) as dest:
        dest.write(np.expand_dims(nobs, axis=0))

    end_time = time.time()
    print('--- Time: %s seconds ---' % (end_time-str_time))

    tTim_B = time.time()
    print('\n--- Total time: %s seconds --- \n' % (tTim_B - tTim_A))
Ejemplo n.º 26
0
def scale_varr_da(varr, scale=(0, 1)):
    return ((varr - darr.nanmin(varr)) * (scale[1] - scale[0]) /
            (darr.nanmax(varr) - darr.nanmin(varr))) + scale[0]
Ejemplo n.º 27
0
def _apply(func,
           datasets,
           chunk=CHUNK,
           pad=None,
           relabel=False,
           stack=False,
           compute=True,
           out=None,
           normalize=False,
           **kwargs):
    """
    Appplies a function to a given set of datasets. Wraps a standard
    function call of the form:

        func(*datasets, **kwargs)

    Named parameters gives extra functionality.

    Parameters
    ----------
    func: callable
        Function to be mapped across datasets.
    datasets: list of numpy array-like
        Input datasets.
    chunk: boolean
        If `True` then input datasets will be assumed tobe `Dask.Array`s and
        the function will be mapped across arrays blocks.
    pad: None, int or iterable
        The padding to apply (only if `chunk = True`). If `pad != None` then
        `dask.array.ghost.map_overlap` will be used to map the function across
        overlapping blocks, otherwise `dask.array.map_blocks` will be used.
    relabel: boolean
        Some of the labelling functions will yield local labelling if `chunk=True`.
        If `func` is a labelling function, set `relabel = True` to map the result
        for global consistency. See `survos2.improc.utils.dask_relabel_chunks` for
        more details.
    compute: boolean
        If `True` the result will be computed and returned in numpy array form,
        otherwise a `dask.delayed` will be returned if `chunk = True`.
    out: None or numpy array-like
        if `out != None` then the result will be stored in there.
    **kwargs: other keyword arguments
        Arguments to be passed to `func`.

    Returns
    -------
    result: numpy array-like
        The computed result if `compute = True` or `chunk = False`, the result
        of the lazy wrapping otherwise.
    """
    if stack and len(datasets) > 1:
        dataset = da.stack(datasets, axis=0)
        dataset = da.rechunk(dataset,
                             chunks=(dataset.shape[0], ) + dataset.chunks[1:])
        datasets = [dataset]

    if chunk == True:
        kwargs.setdefault('dtype', out.dtype if out else datasets[0].dtype)
        kwargs.setdefault('drop_axis', 0 if stack else None)
        if pad is None or pad == False:
            result = da.map_blocks(func, *datasets, **kwargs)
        elif len(datasets) == 1:
            if np.isscalar(pad):
                pad = [pad] * datasets[0].ndim

            if stack:
                pad[0] = 0  # don't pad feature channel
                depth = {i: d for i, d in enumerate(pad)}
                trim = {i: d for i, d in enumerate(pad[1:])}
            else:
                depth = trim = {i: d for i, d in enumerate(pad)}

            g = da.ghost.ghost(datasets[0], depth=depth, boundary='reflect')
            r = g.map_blocks(func, **kwargs)
            result = da.ghost.trim_internal(r, trim)
        else:
            raise ValueError('`pad` only works with single')

        rchunks = result.chunks

        if not relabel and normalize:
            result = result / da.nanmax(da.fabs(result))

        if out is not None:
            result.store(out, compute=True)
        elif compute:
            result = result.compute()

        if relabel:
            if out is not None:
                result = dask_relabel_chunks(da.from_array(out,
                                                           chunks=rchunks))
                result.store(out, compute=True)
            else:
                result = dask_relabel_chunks(
                    da.from_array(result, chunks=rchunks))
                if compute:
                    result = result.compute()
    else:
        result = func(*datasets, **kwargs)
        if out is not None:
            out[...] = result

    if out is None:
        return result