예제 #1
0
def reshape(a, nstep, mstep, chunksize, aligned=0):
    shape = a.shape
    dtype = a.dtype

    if aligned != 0:
        newshape = shape[:-1] + (aligned, )
        fill = da.full(newshape,
                       com.get_fill(dtype),
                       dtype=dtype,
                       chunks=chunksize)
        a = da.concatenate((fill, a), axis=-1)
        shape = a.shape

    fill_len = (mstep - shape[-1] % mstep) % mstep
    if fill_len != 0:
        newshape = shape[:-1] + (fill_len, )
        fill = da.full(newshape,
                       com.get_fill(dtype),
                       dtype=dtype,
                       chunks=chunksize)
        a = da.concatenate((a, fill), axis=-1)

    newshape = shape[:-1] + (nstep, mstep)
    a = a.reshape(newshape).rechunk(chunksize)
    return a
예제 #2
0
def test_wrap_consistent_names():
    assert (sorted(ones(10, dtype='i4', chunks=(4,)).dask) ==
            sorted(ones(10, dtype='i4', chunks=(4,)).dask))
    assert (sorted(ones(10, dtype='i4', chunks=(4,)).dask) !=
            sorted(ones(10, chunks=(4,)).dask))
    assert (sorted(da.full((3, 3), 100, chunks=(2, 2), dtype='f8').dask) ==
            sorted(da.full((3, 3), 100, chunks=(2, 2), dtype='f8').dask))
    assert (sorted(da.full((3, 3), 100, chunks=(2, 2), dtype='i2').dask) !=
            sorted(da.full((3, 3), 100, chunks=(2, 2)).dask))
예제 #3
0
파일: test_wrap.py 프로젝트: ankravch/dask
def test_wrap_consistent_names():
    assert sorted(ones(10, dtype='i4', chunks=(4,)).dask) ==\
           sorted(ones(10, dtype='i4', chunks=(4,)).dask)
    assert sorted(ones(10, dtype='i4', chunks=(4,)).dask) !=\
           sorted(ones(10, chunks=(4,)).dask)
    assert sorted(da.full((3, 3), 100, chunks=(2, 2), dtype='f8').dask) ==\
           sorted(da.full((3, 3), 100, chunks=(2, 2), dtype='f8').dask)
    assert sorted(da.full((3, 3), 100, chunks=(2, 2), dtype='f8').dask) !=\
           sorted(da.full((3, 3), 100, chunks=(2, 2)).dask)
예제 #4
0
def test_ms_create_and_update(Dataset, tmp_path, chunks):
    """ Test that we can update and append at the same time """
    filename = str(tmp_path / "create-and-update.ms")

    rs = np.random.RandomState(42)

    # Create a dataset of 10 rows with DATA and DATA_DESC_ID
    dims = ("row", "chan", "corr")
    row, chan, corr = tuple(sum(chunks[d]) for d in dims)
    ms_datasets = []
    np_data = (rs.normal(size=(row, chan, corr)) +
               1j * rs.normal(size=(row, chan, corr))).astype(np.complex64)

    data_chunks = tuple((chunks['row'], chan, corr))
    dask_data = da.from_array(np_data, chunks=data_chunks)
    # Create dask ddid column
    dask_ddid = da.full(row, 0, chunks=chunks['row'], dtype=np.int32)
    dataset = Dataset({
        'DATA': (dims, dask_data),
        'DATA_DESC_ID': (("row", ), dask_ddid),
    })
    ms_datasets.append(dataset)

    # Write it
    writes = xds_to_table(ms_datasets, filename, ["DATA", "DATA_DESC_ID"])
    dask.compute(writes)

    ms_datasets = xds_from_ms(filename)

    # Now add another dataset (different DDID), with no ROWID
    np_data = (rs.normal(size=(row, chan, corr)) +
               1j * rs.normal(size=(row, chan, corr))).astype(np.complex64)
    data_chunks = tuple((chunks['row'], chan, corr))
    dask_data = da.from_array(np_data, chunks=data_chunks)
    # Create dask ddid column
    dask_ddid = da.full(row, 1, chunks=chunks['row'], dtype=np.int32)
    dataset = Dataset({
        'DATA': (dims, dask_data),
        'DATA_DESC_ID': (("row", ), dask_ddid),
    })
    ms_datasets.append(dataset)

    # Write it
    writes = xds_to_table(ms_datasets, filename, ["DATA", "DATA_DESC_ID"])
    dask.compute(writes)

    # Rows have been added and additional data is present
    with pt.table(filename, ack=False, readonly=True) as T:
        first_data_desc_id = da.full(row,
                                     ms_datasets[0].DATA_DESC_ID,
                                     chunks=chunks['row'])
        ds_data = da.concatenate(
            [ms_datasets[0].DATA.data, ms_datasets[1].DATA.data])
        ds_ddid = da.concatenate(
            [first_data_desc_id, ms_datasets[1].DATA_DESC_ID.data])
        assert_array_equal(T.getcol("DATA"), ds_data)
        assert_array_equal(T.getcol("DATA_DESC_ID"), ds_ddid)
예제 #5
0
def test_wrap_consistent_names():
    assert sorted(ones(10, dtype="i4", chunks=(4, )).dask) == sorted(
        ones(10, dtype="i4", chunks=(4, )).dask)
    assert sorted(ones(10, dtype="i4", chunks=(4, )).dask) != sorted(
        ones(10, chunks=(4, )).dask)
    assert sorted(da.full(
        (3, 3), 100, chunks=(2, 2), dtype="f8").dask) == sorted(
            da.full((3, 3), 100, chunks=(2, 2), dtype="f8").dask)
    assert sorted(da.full(
        (3, 3), 100, chunks=(2, 2), dtype="i2").dask) != sorted(
            da.full((3, 3), 100, chunks=(2, 2)).dask)
예제 #6
0
 def test_lettered_tiles_no_valid_data(self):
     """Test creating a lettered grid with no valid data."""
     from satpy.writers.awips_tiled import AWIPSTiledWriter
     from xarray import DataArray
     from pyresample.geometry import AreaDefinition
     from pyresample.utils import proj4_str_to_dict
     w = AWIPSTiledWriter(base_dir=self.base_dir, compress=True)
     area_def = AreaDefinition(
         'test',
         'test',
         'test',
         proj4_str_to_dict('+proj=lcc +datum=WGS84 +ellps=WGS84 +lon_0=-95. '
                           '+lat_0=25 +lat_1=25 +units=m +no_defs'),
         1000,
         2000,
         (-1000000., -1500000., 1000000., 1500000.),
     )
     now = datetime(2018, 1, 1, 12, 0, 0)
     ds = DataArray(
         da.full((2000, 1000), np.nan, chunks=500, dtype=np.float32),
         attrs=dict(
             name='test_ds',
             platform_name='PLAT',
             sensor='SENSOR',
             units='1',
             area=area_def,
             start_time=now,
             end_time=now + timedelta(minutes=20))
     )
     w.save_datasets([ds], sector_id='LCC', source_name="TESTS", tile_count=(3, 3), lettered_grid=True)
     # No files created - all NaNs should result in no tiles being created
     all_files = glob(os.path.join(self.base_dir, 'TESTS_AII*.nc'))
     assert not all_files
예제 #7
0
 def test_get_padding_area_float():
     """Test padding area generator for floats."""
     shape = (10, 10)
     dtype = np.float
     res = get_padding_area(shape, dtype)
     expected = da.full(shape, np.nan, dtype=dtype, chunks=CHUNK_SIZE)
     np.testing.assert_array_equal(res, expected)
예제 #8
0
def test_full():
    a = da.full((3, 3), 100, chunks=(2, 2), dtype='i8')

    assert (a.compute() == 100).all()
    assert a.dtype == a.compute(scheduler='sync').dtype == 'i8'

    assert a.name.startswith('full-')
예제 #9
0
 def test_get_padding_area_int():
     """Test padding area generator for integers."""
     shape = (10, 10)
     dtype = np.int64
     res = get_padding_area(shape, dtype)
     expected = da.full(shape, 0, dtype=dtype, chunks=CHUNK_SIZE)
     np.testing.assert_array_equal(res, expected)
예제 #10
0
def test_full():
    a = da.full((3, 3), 100, chunks=(2, 2), dtype="i8")

    assert (a.compute() == 100).all()
    assert a.dtype == a.compute(scheduler="sync").dtype == "i8"

    assert a.name.startswith("full_like-")
예제 #11
0
    def _ir_calibrate(self, radiance, measured, root):
        """IR channel calibration."""
        coef = self[measured + "/radiance_unit_conversion_coefficient"]
        wl_c = self[root + "/central_wavelength_actual"]

        a = self[measured + "/radiance_to_bt_conversion_coefficient_a"]
        b = self[measured + "/radiance_to_bt_conversion_coefficient_b"]

        c1 = self[measured + "/radiance_to_bt_conversion_constant_c1"]
        c2 = self[measured + "/radiance_to_bt_conversion_constant_c2"]

        for v in (coef, wl_c, a, b, c1, c2):
            if v == v.attrs.get("FillValue",
                                default_fillvals.get(v.dtype.str[1:])):
                logger.error("{:s} set to fill value, cannot produce "
                             "brightness temperatures for {:s}.".format(
                                 v.attrs.get(
                                     "long_name",
                                     "at least one necessary coefficient"),
                                 root))
                return xr.DataArray(da.full(shape=radiance.shape,
                                            chunks=radiance.chunks,
                                            fill_value=np.nan),
                                    dims=radiance.dims,
                                    coords=radiance.coords,
                                    attrs=radiance.attrs)

        Lv = radiance * coef
        vc = 1e6 / wl_c  # from wl in um to wn in m^-1
        nom = c2 * vc
        denom = a * np.log(1 + (c1 * vc**3) / Lv)

        res = nom / denom - b / a
        res.attrs["units"] = "K"
        return res
예제 #12
0
파일: test_wrap.py 프로젝트: m-rossi/dask
def test_full_detects_da_dtype():
    x = da.from_array(100)
    with pytest.warns(FutureWarning, match="not implemented by Dask array") as record:
        # This shall not raise an NotImplementedError due to dtype detected as object.
        a = da.full(shape=(3, 3), fill_value=x)
        assert a.dtype == x.dtype
        assert_eq(a, np.full(shape=(3, 3), fill_value=100))
    assert len(record) == 1
예제 #13
0
def test_inlined_array():
    A = da.ones((10, 10), chunks=(2, 2), dtype=np.float64)
    B = da.full((10, 10), np.float64(2), chunks=(2, 2))
    C = A + B
    E = C + 1

    D = inlined_array(C)
    assert len(C.__dask_graph__().layers) == 3
    assert D.name == C.name
    assert D.name in D.__dask_graph__().layers
    assert A.name not in D.__dask_graph__().layers
    assert B.name not in D.__dask_graph__().layers
    graph_keys = set(flatten(D.__dask_graph__().keys()))
    assert graph_keys == set(flatten(D.__dask_keys__()))
    assert_array_equal(D, C)

    D = inlined_array(C, [A, B])
    assert len(D.__dask_graph__().layers) == 1
    assert D.name == C.name
    assert D.name in D.__dask_graph__().layers
    assert A.name not in D.__dask_graph__().layers
    assert B.name not in D.__dask_graph__().layers
    graph_keys = set(flatten(D.__dask_graph__().keys()))
    assert graph_keys == set(flatten(D.__dask_keys__()))
    assert_array_equal(D, C)

    D = inlined_array(C, [A])
    assert len(D.__dask_graph__().layers) == 2
    assert D.name == C.name
    assert D.name in D.__dask_graph__().layers
    assert A.name not in D.__dask_graph__().layers
    assert B.name in D.__dask_graph__().layers
    graph_keys = set(flatten(D.__dask_graph__().keys()))
    assert graph_keys == set(flatten([a.__dask_keys__() for a in [D, B]]))
    assert_array_equal(D, C)

    D = inlined_array(C, [B])
    assert len(D.__dask_graph__().layers) == 2
    assert D.name == C.name
    assert D.name in D.__dask_graph__().layers
    assert A.name in D.__dask_graph__().layers
    assert B.name not in D.__dask_graph__().layers
    graph_keys = set(flatten(D.__dask_graph__().keys()))
    assert graph_keys == set(flatten([a.__dask_keys__() for a in [D, A]]))
    assert_array_equal(D, C)

    D = inlined_array(E, [A])
    assert len(D.__dask_graph__().layers) == 3
    assert D.name == E.name
    assert D.name in D.__dask_graph__().layers
    assert B.name in D.__dask_graph__().layers
    assert A.name not in D.__dask_graph__().layers
    assert C.name in D.__dask_graph__().layers
    graph_keys = set(flatten(D.__dask_graph__().keys()))
    assert graph_keys == set(flatten([a.__dask_keys__() for a in [D, B, C]]))
    assert_array_equal(D, E)
예제 #14
0
 def _pad_dask_pieces_after(self, pieces, dask_pieces, chunks):
     """Pad the dask pieces after."""
     last_x = max(arr.coords['x'][-1] for arr in pieces)
     if last_x < self._image_shape[1] - 1:
         missing_x = np.arange(last_x + 1, self._image_shape[1])
         missing_y = pieces[-1].coords['y']
         new_piece = da.full((len(missing_y), len(missing_x)),
                             np.nan,
                             chunks=chunks)
         dask_pieces.append(new_piece)
예제 #15
0
 def _pad_dask_pieces_before(pieces, dask_pieces, chunks):
     """Pad the dask pieces before."""
     first_x = min(arr.coords['x'][0] for arr in pieces)
     if first_x > 0:
         missing_x = np.arange(first_x)
         missing_y = pieces[0].coords['y']
         new_piece = da.full((len(missing_y), len(missing_x)),
                             np.nan,
                             chunks=chunks)
         dask_pieces.insert(0, new_piece)
예제 #16
0
def get_padding_area(shape, dtype):
    """Create a padding area filled with no data."""
    if np.issubdtype(dtype, np.floating):
        init_value = np.nan
    else:
        init_value = 0

    padding_area = da.full(shape, init_value, dtype=dtype, chunks=CHUNK_SIZE)

    return padding_area
예제 #17
0
    def setup(self):
        CHUNK_SIZE = 10
        NCHUNKS = 9000
        SIZE = CHUNK_SIZE * NCHUNKS

        base = [
            da.full((SIZE, ), i, dtype=np.int8, chunks=CHUNK_SIZE)
            for i in range(4)
        ]
        self.base = base
예제 #18
0
 def test_resample_area_to_area_2d_fill_value(self):
     """Resample area to area, 2d, use fill value."""
     data = xr.DataArray(da.full(self.src_area.shape,
                                 np.nan,
                                 dtype=np.float64),
                         dims=['y', 'x'])
     res = self.resampler.compute(
         data, method='bil',
         fill_value=2.0).compute(scheduler='single-threaded')
     assert res.shape == self.dst_area.shape
     assert np.allclose(res, 2.0)
예제 #19
0
 def test_lettered_tiles_no_valid_data(self):
     """Test creating a lettered grid with no valid data."""
     from satpy.writers.awips_tiled import AWIPSTiledWriter
     w = AWIPSTiledWriter(base_dir=self.base_dir, compress=True)
     data = da.full((2000, 1000), np.nan, chunks=500, dtype=np.float32)
     area_def = self._get_test_area(shape=(2000, 1000),
                                    extents=(-1000000., -1500000., 1000000., 1500000.))
     ds = self._get_test_lcc_data(data, area_def)
     w.save_datasets([ds], sector_id='LCC', source_name="TESTS", tile_count=(3, 3), lettered_grid=True)
     # No files created - all NaNs should result in no tiles being created
     all_files = glob(os.path.join(self.base_dir, 'TESTS_AII*.nc'))
     assert not all_files
예제 #20
0
def _multimodel_mask_cubes(cubes, shape):
    """Apply common mask to all cubes in-place."""
    # Create mask
    mask = da.full(shape, False, dtype=bool)
    for cube in cubes:
        new_mask = da.ma.getmaskarray(cube.core_data())
        mask |= new_mask

    # Apply common mask
    for cube in cubes:
        cube.data = da.ma.masked_array(cube.core_data(), mask=mask)

    return cubes
예제 #21
0
def average_spw(spw_ds, chan_bin_size):
    """
    Parameters
    ----------
    spw_ds : list of Datasets
        list of Datasets, each describing a single Spectral Window
    chan_bin_size : int
        Number of channels in an averaging bin

    Returns
    -------
    spw_ds : list of Datasets
        list of Datasets, each describing an averaged Spectral Window
    """

    new_spw_ds = []

    for r, spw in enumerate(spw_ds):
        # Get the dataset variables as a mutable dictionary
        dv = dict(spw.data_vars)

        # Extract arrays we wish to average
        chan_freq = dv['CHAN_FREQ'].data[0]
        chan_width = dv['CHAN_WIDTH'].data[0]
        effective_bw = dv['EFFECTIVE_BW'].data[0]
        resolution = dv['RESOLUTION'].data[0]

        # Construct channel metadata
        chan_arrays = (chan_freq, chan_width, effective_bw, resolution)
        chan_meta = chan_metadata((), chan_arrays, chan_bin_size)
        # Average channel based data
        avg = dask_chan_avg(chan_meta,
                            chan_freq=chan_freq,
                            chan_width=chan_width,
                            effective_bw=effective_bw,
                            resolution=resolution,
                            chan_bin_size=chan_bin_size)

        num_chan = da.full((1, ), avg.chan_freq.shape[0], dtype=np.int32)

        # These columns change, re-create them
        dv['NUM_CHAN'] = (("row", ), num_chan)
        dv['CHAN_FREQ'] = (("row", "chan"), avg.chan_freq[None, :])
        dv['CHAN_WIDTH'] = (("row", "chan"), avg.chan_width[None, :])
        dv['EFFECTIVE_BW'] = (("row", "chan"), avg.effective_bw[None, :])
        dv['RESOLUTION'] = (("row", "chan"), avg.resolution[None, :])

        # But re-use all the others
        new_spw_ds.append(Dataset(dv))

    return new_spw_ds
예제 #22
0
    def data(self):
        """
        Get the buffer contents in shape that corresponds to the
        original dataset shape, using a lazy Dask array.

        Copied largely from BufferWrapper with modifications to ensure
        Dask arrays are correctly unpacked into the result array.

        #TODO consider if this needs to be cached to avoid creating
        multiple copies in the task graph ?

        If a ROI is set, embed the result into a new
        array; unset values have NaN value for floating point types,
        False for boolean, 0 for integer types and structs,
        '' for string types and None for objects.
        """
        if isinstance(self._data, DaskInplaceWrapper):
            self._data = self._data.data
        if self._contiguous_cache:
            raise RuntimeError("Cache is not empty, has to be flushed")
        if self._roi is None or self._kind != 'nav':
            return self._data.reshape(
                self._shape_for_kind(self._kind, self._ds_shape))
        shape = self._shape_for_kind(self._kind, self._ds_shape)
        if shape == self._data.shape:
            # preallocated and already wrapped
            return self._data
        # Integer types and "void" (structs and such)
        if self.dtype.kind in ('i', 'u', 'V'):
            fill = 0
        # Bytes and Unicode strings
        elif self.dtype.kind in ('S', 'U'):
            fill = ''
        else:
            # 'b' (boolean): False
            # 'f', 'c': NaN
            # 'm', 'M' (datetime, timedelta): NaT
            # 'O' (object): None
            fill = None

        flat_chunking = tuple(p.slice.shape[0] for p in self._ds_partitions)
        flat_chunking = (flat_chunking, ) + self._extra_chunking
        flat_shape = (prod(self._ds_shape.nav), ) + self._extra_shape
        flat_wrapper = da.full(flat_shape,
                               fill,
                               dtype=self._dtype,
                               chunks=flat_chunking)
        flat_wrapper[self._roi, ...] = self._data
        wrapper = flat_wrapper.reshape(self._ds_shape.nav + self._extra_shape)
        return wrapper
예제 #23
0
def test_array_creation_blockwise_fusion():
    """
    Check that certain array creation routines work with blockwise and can be
    fused with other blockwise operations.
    """
    x = da.ones(3, chunks=(3,))
    y = da.zeros(3, chunks=(3,))
    z = da.full(3, fill_value=2, chunks=(3,))
    a = x + y + z
    dsk1 = a.__dask_graph__()
    assert len(dsk1) == 5
    dsk2 = optimize_blockwise(dsk1)
    assert len(dsk2) == 1
    assert_eq(a, np.full(3, 3))
예제 #24
0
def full(shape, *args, **kwargs):
    try:
        array_used_to_infere_type = kwargs.pop('as_type_of')
    except KeyError:
        msg = 'as_type_of is mandatory:  This is an array to infer the type '
        msg += 'of the generated array'
        raise ValueError(msg)

    if isinstance(array_used_to_infere_type, da.Array):
        return da.full(shape, *args, **kwargs)
    elif isinstance(array_used_to_infere_type, np.ndarray):
        return np.full(shape, *args, **kwargs)
    else:
        msg = 'Not implemeted for type not in dask array or numpy ndarray'
        raise NotImplementedError(msg)
def apply_common_mask(cfg, input_data):
    """Apply common mask to all datasets."""
    if not cfg.get('apply_common_mask'):
        return input_data
    logger.info("Applying common mask to all cubes")
    shapes = {data['cube'].shape for data in input_data}
    if len(shapes) > 1:
        raise ValueError(
            f"Expected cubes with identical shapes when 'apply_common_mask' "
            f"is set to 'True', got shapes {shapes}")
    common_mask = da.full(list(shapes)[0], False)
    for data in input_data:
        common_mask |= da.ma.getmaskarray(data['cube'].core_data())
    for data in input_data:
        data['cube'].data = da.ma.masked_array(data['cube'].core_data(),
                                               mask=common_mask)
    return input_data
예제 #26
0
def parallel_gradient_search(data, src_x, src_y, dst_x, dst_y, src_gradient_xl,
                             src_gradient_xp, src_gradient_yl, src_gradient_yp,
                             dst_mosaic_locations, dst_slices, **kwargs):
    """Run gradient search in parallel in input area coordinates."""
    method = kwargs.get('method', 'bilinear')
    # Determine the number of bands
    bands = np.array([arr.shape[0] for arr in data if arr is not None])
    num_bands = np.max(bands)
    if np.any(bands != num_bands):
        raise ValueError(
            "All source data chunks have to have the same number of bands")
    chunks = {}
    is_pad = False
    # Collect co-located target chunks
    for i, arr in enumerate(data):
        if arr is None:
            is_pad = True
            res = da.full((num_bands, dst_slices[i][1] - dst_slices[i][0],
                           dst_slices[i][3] - dst_slices[i][2]), np.nan)
        else:
            is_pad = False
            res = dask.delayed(_gradient_resample_data)(arr.astype(np.float64),
                                                        src_x[i],
                                                        src_y[i],
                                                        src_gradient_xl[i],
                                                        src_gradient_xp[i],
                                                        src_gradient_yl[i],
                                                        src_gradient_yp[i],
                                                        dst_x[i],
                                                        dst_y[i],
                                                        method=method)
            res = da.from_delayed(res, (num_bands, ) + dst_x[i].shape,
                                  dtype=np.float64)
        if dst_mosaic_locations[i] in chunks:
            if not is_pad:
                chunks[dst_mosaic_locations[i]].append(res)
        else:
            chunks[dst_mosaic_locations[i]] = [
                res,
            ]

    return _concatenate_chunks(chunks)
예제 #27
0
def _multimodel_mask_products(products, shape):
    """Apply common mask to all cubes of products in-place."""
    # Create mask and get products used for mask
    mask = da.full(shape, False, dtype=bool)
    used_products = set()
    for product in products:
        for cube in product.cubes:
            new_mask = da.ma.getmaskarray(cube.core_data())
            mask |= new_mask
            if da.any(new_mask):
                used_products.add(product)

    # Apply common mask and update provenance information
    for product in products:
        for cube in product.cubes:
            cube.data = da.ma.masked_array(cube.core_data(), mask=mask)
        for other_product in used_products:
            if other_product.filename != product.filename:
                product.wasderivedfrom(other_product)

    return products
예제 #28
0
    def _reshape_to_target_area(self, res, ndim):
        if ndim == 3:
            dim_multiplier = res.shape[0]
        else:
            dim_multiplier = 1
            res = da.reshape(res, (1, res.size))
        if res.size != dim_multiplier * self._target_geo_def.size:
            out = []
            for i in range(dim_multiplier):
                tmp = da.full(self._target_geo_def.size, np.nan)
                tmp[self._valid_output_indices] = res[i, :]
                out.append(tmp)
            res = da.stack(out)

        shp = self._target_geo_def.shape
        if ndim == 3:
            res = da.reshape(res, (res.shape[0], shp[0], shp[1]))
        else:
            res = da.reshape(res, (shp[0], shp[1]))

        return res
예제 #29
0
파일: convert.py 프로젝트: ska-sa/dask-ms
    def _expand_group_columns(self, datasets, args):
        if not args.group_columns:
            return datasets

        new_datasets = []

        for ds in datasets:
            # Remove grouping attribute and recreate grouping columns
            new_group_vars = {}
            row_chunks = ds.chunks["row"]
            row_dims = ds.dims["row"]
            attrs = ds.attrs

            for column in args.group_columns:
                value = attrs.pop(column)
                group_column = da.full(row_dims, value, chunks=row_chunks)
                new_group_vars[column] = (("row",), group_column)

            new_ds = ds.assign_attrs(attrs).assign(**new_group_vars)
            new_datasets.append(new_ds)

        return new_datasets
예제 #30
0
    def _vis_calibrate(self, radiance, measured):
        """VIS channel calibration."""
        # radiance to reflectance taken as in mipp/xrit/MSG.py
        # again FCI User Guide is not clear on how to do this

        cesilab = measured + "/channel_effective_solar_irradiance"
        cesi = self[cesilab]
        if cesi == cesi.attrs.get("FillValue",
                                  default_fillvals.get(cesi.dtype.str[1:])):
            logger.error(
                "channel effective solar irradiance set to fill value, "
                "cannot produce reflectance for {:s}.".format(measured))
            return xr.DataArray(da.full(shape=radiance.shape,
                                        chunks=radiance.chunks,
                                        fill_value=np.nan),
                                dims=radiance.dims,
                                coords=radiance.coords,
                                attrs=radiance.attrs)

        sirr = float(cesi)
        res = radiance / sirr * 100
        res.attrs["units"] = "%"
        return res
예제 #31
0
def shift(arr, num, axis, fill_value=0):
    """
    Shift N-dim array.
    """
    if not num:
        return arr.copy()

    fill_shape = arr.shape[:axis] + (abs(num), ) + arr.shape[axis + 1:]
    filled = da.full(shape=fill_shape, fill_value=fill_value)

    kept_slice = [
        ':',
    ] * arr.ndim
    if num > 0:
        kept_slice[axis] = '0:{}'.format(-num)
        kept = eval('arr[' + ', '.join(kept_slice) + ']')
        result = da.concatenate([filled, kept], axis=axis)
    else:
        kept_slice[axis] = '{}:'.format(-num)
        kept = eval('arr[' + ', '.join(kept_slice) + ']')
        result = da.concatenate([kept, filled], axis=axis)

    return result
예제 #32
0
def test_full():
    d = da.full((3, 4), 2, chunks=((2, 1), (2, 2)))
    assert d.chunks == ((2, 1), (2, 2))
    assert eq(d, np.full((3, 4), 2))
예제 #33
0
def rolling_window(a, axis, window, center, fill_value):
    """ Dask's equivalence to np.utils.rolling_window """
    orig_shape = a.shape
    # inputs for ghost
    if axis < 0:
        axis = a.ndim + axis
    depth = {d: 0 for d in range(a.ndim)}
    depth[axis] = int(window / 2)
    # For evenly sized window, we need to crop the first point of each block.
    offset = 1 if window % 2 == 0 else 0

    if depth[axis] > min(a.chunks[axis]):
        raise ValueError(
            "For window size %d, every chunk should be larger than %d, "
            "but the smallest chunk size is %d. Rechunk your array\n"
            "with a larger chunk size or a chunk size that\n"
            "more evenly divides the shape of your array." %
            (window, depth[axis], min(a.chunks[axis])))

    # Although dask.ghost pads values to boundaries of the array,
    # the size of the generated array is smaller than what we want
    # if center == False.
    if center:
        start = int(window / 2)  # 10 -> 5,  9 -> 4
        end = window - 1 - start
    else:
        start, end = window - 1, 0
    pad_size = max(start, end) + offset - depth[axis]
    drop_size = 0
    # pad_size becomes more than 0 when the ghosted array is smaller than
    # needed. In this case, we need to enlarge the original array by padding
    # before ghosting.
    if pad_size > 0:
        if pad_size < depth[axis]:
            # Ghosting requires each chunk larger than depth. If pad_size is
            # smaller than the depth, we enlarge this and truncate it later.
            drop_size = depth[axis] - pad_size
            pad_size = depth[axis]
        shape = list(a.shape)
        shape[axis] = pad_size
        chunks = list(a.chunks)
        chunks[axis] = (pad_size, )
        fill_array = da.full(shape, fill_value, dtype=a.dtype, chunks=chunks)
        a = da.concatenate([fill_array, a], axis=axis)

    boundary = {d: fill_value for d in range(a.ndim)}

    # create ghosted arrays
    ag = da.ghost.ghost(a, depth=depth, boundary=boundary)

    # apply rolling func
    def func(x, window, axis=-1):
        x = np.asarray(x)
        rolling = nputils._rolling_window(x, window, axis)
        return rolling[(slice(None), ) * axis + (slice(offset, None), )]

    chunks = list(a.chunks)
    chunks.append(window)
    out = ag.map_blocks(func, dtype=a.dtype, new_axis=a.ndim, chunks=chunks,
                        window=window, axis=axis)

    # crop boundary.
    index = (slice(None),) * axis + (slice(drop_size,
                                           drop_size + orig_shape[axis]), )
    return out[index]
예제 #34
0
파일: test_wrap.py 프로젝트: floriango/dask
def test_full():
    a = da.full((3, 3), 100, chunks=(2, 2), dtype='i8')

    assert (a.compute() == 100).all()
    assert a.dtype == a.compute(scheduler='sync').dtype == 'i8'
예제 #35
0
파일: test_wrap.py 프로젝트: ankravch/dask
def test_full():
    a = da.full((3, 3), 100, chunks=(2, 2), dtype='i8')

    assert (a.compute() == 100).all()
    assert a._dtype == a.compute(get=dask.get).dtype == 'i8'
예제 #36
0
def test_full():
    d = da.full((3, 4), 2, blockdims=((2, 1), (2, 2)))
    assert d.blockdims == ((2, 1), (2, 2))
    assert eq(d, np.full((3, 4), 2))