Beispiel #1
0
    def _slice_padded(self, _bounds):
        pads = (max(-_bounds[0], 0), max(-_bounds[1], 0),
                max(_bounds[2]-self.shape[2], 0), max(_bounds[3]-self.shape[1], 0))
        bounds = (max(_bounds[0], 0),
                  max(_bounds[1], 0),
                  max(min(_bounds[2], self.shape[2]), 0),
                  max(min(_bounds[3], self.shape[1]), 0))
        result = self[:, bounds[1]:bounds[3], bounds[0]:bounds[2]]
        if pads[0] > 0:
            dims = (result.shape[0], result.shape[1], pads[0])
            result = da.concatenate([da.zeros(dims, chunks=dims, dtype=result.dtype),
                                     result], axis=2)
        if pads[2] > 0:
            dims = (result.shape[0], result.shape[1], pads[2])
            result = da.concatenate([result,
                                     da.zeros(dims, chunks=dims, dtype=result.dtype)], axis=2)
        if pads[1] > 0:
            dims = (result.shape[0], pads[1], result.shape[2])
            result = da.concatenate([da.zeros(dims, chunks=dims, dtype=result.dtype),
                                     result], axis=1)
        if pads[3] > 0:
            dims = (result.shape[0], pads[3], result.shape[2])
            result = da.concatenate([result,
                                     da.zeros(dims, chunks=dims, dtype=result.dtype)], axis=1)

        return (result, _bounds[0], _bounds[1])
Beispiel #2
0
    def test_corrected_green(self):
        """Test adjusting the 'green' band."""
        import xarray as xr
        import dask.array as da
        import numpy as np
        from satpy.composites.ahi import GreenCorrector
        from pyresample.geometry import AreaDefinition
        rows = 5
        cols = 10
        area = AreaDefinition(
            'test', 'test', 'test',
            {'proj': 'eqc', 'lon_0': 0.0,
             'lat_0': 0.0},
            cols, rows,
            (-20037508.34, -10018754.17, 20037508.34, 10018754.17))

        comp = GreenCorrector('green', prerequisites=(0.51, 0.85),
                              standard_name='toa_bidirectional_reflectance')
        c01 = xr.DataArray(da.zeros((rows, cols), chunks=25) + 0.25,
                           dims=('y', 'x'),
                           attrs={'name': 'C01', 'area': area})
        c02 = xr.DataArray(da.zeros((rows, cols), chunks=25) + 0.30,
                           dims=('y', 'x'),
                           attrs={'name': 'C02', 'area': area})
        res = comp((c01, c02))
        self.assertIsInstance(res, xr.DataArray)
        self.assertIsInstance(res.data, da.Array)
        self.assertEqual(res.attrs['name'], 'green')
        self.assertEqual(res.attrs['standard_name'],
                         'toa_bidirectional_reflectance')
        data = res.compute()
        np.testing.assert_allclose(data, 0.2575)
Beispiel #3
0
def test_setitem_with_different_chunks_preserves_shape(params):
    """ Reproducer for https://github.com/dask/dask/issues/3730.

    Mutating based on an array with different chunks can cause new chunks to be
    used.  We need to ensure those new chunk sizes are applied to the mutated
    array, otherwise the array won't generate the correct keys.
    """
    array_size, chunk_size1, chunk_size2 = params
    x = da.zeros(array_size, chunks=chunk_size1)
    mask = da.zeros(array_size, chunks=chunk_size2)
    x[mask] = 1
    result = x.compute()
    assert x.shape == result.shape
Beispiel #4
0
    def test_3d_ewa(self, ll2cr, fornav):
        """Test EWA with a 3D dataset."""
        import numpy as np
        import dask.array as da
        import xarray as xr
        from satpy.resample import resample_dataset
        from pyresample.geometry import SwathDefinition, AreaDefinition
        from pyresample.utils import proj4_str_to_dict
        lons = xr.DataArray(da.zeros((10, 10), chunks=5))
        lats = xr.DataArray(da.zeros((10, 10), chunks=5))
        ll2cr.return_value = (100,
                              np.zeros((10, 10), dtype=np.float32),
                              np.zeros((10, 10), dtype=np.float32))
        fornav.return_value = ([100 * 200] * 3,
                               [np.zeros((200, 100), dtype=np.float32)] * 3)
        sgd = SwathDefinition(lons, lats)
        proj_dict = proj4_str_to_dict('+proj=lcc +datum=WGS84 +ellps=WGS84 '
                                      '+lon_0=-95. +lat_0=25 +lat_1=25 '
                                      '+units=m +no_defs')
        tgd = AreaDefinition(
            'test',
            'test',
            'test',
            proj_dict,
            x_size=100,
            y_size=200,
            area_extent=(-1000., -1500., 1000., 1500.),
        )
        input_data = xr.DataArray(
            da.zeros((3, 10, 10), chunks=5, dtype=np.float32),
            dims=('bands', 'y', 'x'), attrs={'area': sgd, 'test': 'test'})

        new_data = resample_dataset(input_data, tgd, resampler='ewa')
        self.assertTupleEqual(new_data.shape, (3, 200, 100))
        self.assertEqual(new_data.dtype, np.float32)
        self.assertEqual(new_data.attrs['test'], 'test')
        self.assertIs(new_data.attrs['area'], tgd)
        # make sure we can actually compute everything
        new_data.compute()
        previous_calls = ll2cr.call_count

        # resample a different dataset and make sure cache is used
        input_data = xr.DataArray(
            da.zeros((3, 10, 10), chunks=5, dtype=np.float32),
            dims=('bands', 'y', 'x'), attrs={'area': sgd, 'test': 'test'})
        new_data = resample_dataset(input_data, tgd, resampler='ewa')
        self.assertEqual(ll2cr.call_count, previous_calls)
        new_data.compute()
Beispiel #5
0
def test_index_with_int_dask_array_nanchunks(chunks):
    # Slice by array with nan-sized chunks
    a = da.arange(-2, 3, chunks=chunks)
    assert_eq(a[a.nonzero()], np.array([-2, -1,  1,  2]))
    # Edge case: the nan-sized chunks resolve to size 0
    a = da.zeros(5, chunks=chunks)
    assert_eq(a[a.nonzero()], np.array([]))
Beispiel #6
0
 def test_expand_without_dims(self):
     from satpy.resample import NativeResampler
     import numpy as np
     import dask.array as da
     from xarray import DataArray
     from pyresample.geometry import AreaDefinition
     from pyresample.utils import proj4_str_to_dict
     ds1 = DataArray(da.zeros((100, 50), chunks=85))
     proj_dict = proj4_str_to_dict('+proj=lcc +datum=WGS84 +ellps=WGS84 '
                                   '+lon_0=-95. +lat_0=25 +lat_1=25 '
                                   '+units=m +no_defs')
     target = AreaDefinition(
         'test',
         'test',
         'test',
         proj_dict,
         x_size=100,
         y_size=200,
         area_extent=(-1000., -1500., 1000., 1500.),
     )
     # source geo def doesn't actually matter
     resampler = NativeResampler(None, target)
     new_arr = resampler.resample(ds1)
     self.assertEqual(new_arr.shape, (200, 100))
     new_arr2 = resampler.resample(ds1.compute())
     self.assertTrue(np.all(new_arr == new_arr2))
Beispiel #7
0
def test_0d_array():
    x = da.mean(da.ones(4, chunks=4), axis=0).compute()
    y = np.mean(np.ones(4))
    assert type(x) == type(y)

    x = da.sum(da.zeros(4, chunks=1)).compute()
    y = np.sum(np.zeros(4))
    assert type(x) == type(y)
def test_fuse_roots():
    x = da.ones(10, chunks=(2,))
    y = da.zeros(10, chunks=(2,))
    z = (x + 1) + (2 * y ** 2)
    (zz,) = dask.optimize(z)
    # assert len(zz.dask) == 5
    assert sum(map(dask.istask, zz.dask.values())) == 5  # there are some aliases
    assert_eq(zz, z)
def test_get_signal_chunk_slice_not_square(sig_chunks, index, expected):
    data = da.zeros((2, 2, 10, 20), chunks=(2, 2, *sig_chunks[::-1]))
    if expected == 'error':
        with pytest.raises(ValueError):
            chunk_slice = get_signal_chunk_slice(index, data.chunks)
    else:
        chunk_slice = get_signal_chunk_slice(index, data.chunks)
        assert chunk_slice == expected
Beispiel #10
0
def execute_between_time(op, data, lower, upper, **kwargs):
    # TODO - Can this be done better?
    indexer = ((data.dt.time.astype(str) >= lower)
               & (data.dt.time.astype(str) <= upper)).to_dask_array(True)

    result = da.zeros(len(data), dtype=np.bool_)
    result[indexer] = True
    return dd.from_array(result)
Beispiel #11
0
def test_0d_array():
    x = da.mean(da.ones(4, chunks=4), axis=0).compute()
    y = np.mean(np.ones(4))
    assert type(x) == type(y)

    x = da.sum(da.zeros(4, chunks=1)).compute()
    y = np.sum(np.zeros(4))
    assert type(x) == type(y)
Beispiel #12
0
 def test_map_inplace_data_changing(self):
     s = _lazy_signals.LazySignal2D(
         da.zeros((6, 6, 8, 8), chunks=(2, 2, 4, 4)))
     s.__call__()
     assert len(s._cache_dask_chunk.shape) == 4
     s.map(np.sum, axis=1, ragged=False, inplace=True)
     s.__call__()
     assert len(s._cache_dask_chunk.shape) == 3
Beispiel #13
0
    def test_simulated_green(self):
        """Test creating a fake 'green' band."""
        import dask.array as da
        import numpy as np
        import xarray as xr
        from pyresample.geometry import AreaDefinition

        from satpy.composites.abi import SimulatedGreen
        rows = 5
        cols = 10
        area = AreaDefinition('test', 'test', 'test', {
            'proj': 'eqc',
            'lon_0': 0.0,
            'lat_0': 0.0
        }, cols, rows, (-20037508.34, -10018754.17, 20037508.34, 10018754.17))

        comp = SimulatedGreen('green',
                              prerequisites=('C01', 'C02', 'C03'),
                              standard_name='toa_bidirectional_reflectance')
        c01 = xr.DataArray(da.zeros((rows, cols), chunks=25) + 0.25,
                           dims=('y', 'x'),
                           attrs={
                               'name': 'C01',
                               'area': area
                           })
        c02 = xr.DataArray(da.zeros((rows, cols), chunks=25) + 0.30,
                           dims=('y', 'x'),
                           attrs={
                               'name': 'C02',
                               'area': area
                           })
        c03 = xr.DataArray(da.zeros((rows, cols), chunks=25) + 0.35,
                           dims=('y', 'x'),
                           attrs={
                               'name': 'C03',
                               'area': area
                           })
        res = comp((c01, c02, c03))
        self.assertIsInstance(res, xr.DataArray)
        self.assertIsInstance(res.data, da.Array)
        self.assertEqual(res.attrs['name'], 'green')
        self.assertEqual(res.attrs['standard_name'],
                         'toa_bidirectional_reflectance')
        data = res.compute()
        np.testing.assert_allclose(data, 0.28025)
def const_features_for_single_grid_single_file(grid_indx, wind_grid_indx, data):
    client = Client()
    dims = data['no2'].shape
    ntime = dims[0] - 1
    nvel = dims[2]
    data_dict = dict()
    data_hours = da.array(data['hour'][1:])
    data_dict['hour'] = da.repeat(data_hours[:, :], nvel, axis=1)
    data_dict['date'] = da.zeros((ntime, nvel)) + da.mean(data['date'][:])
    data_dict['date'] = data_dict['date']
    cum_ic_flash = da.array(data['IC_FLASHCOUNT'][:, grid_indx, :])
    cum_cg_flash = da.array(data['CG_FLASHCOUNT'][:, grid_indx, :])
    data_dict['IC_FLASHCOUNT'] = da.repeat(cum_ic_flash[1:, :] - cum_ic_flash[:-1, :], nvel, axis=1)
    data_dict['CG_FLASHCOUNT'] = da.repeat(cum_cg_flash[1:, :] - cum_cg_flash[:-1, :], nvel, axis=1)
    e_no_lower = da.array(data['E_NO'])[1:, grid_indx, :]
    e_no_upper = da.zeros((ntime, nvel - e_no_lower.shape[1]))
    data_dict['E_NO'] = da.concatenate([e_no_lower, e_no_upper], axis=1)
    data_dict['U'] = (data['U'][1:, wind_grid_indx[0][0], :] + data['U'][1:, wind_grid_indx[0][1], :])/2
    data_dict['V'] = (data['V'][1:, wind_grid_indx[1][0], :] + data['V'][1:, wind_grid_indx[1][1], :])/2

    match_vars = ['no2', 'pres', 'temp', 'CLDFRA']
    print('Variables read directly from wrf: {}'.format(match_vars[:]))
    for var in match_vars:
        data_dict[var] = da.array(data[var])[1:, grid_indx, :]

    reduce_dim_vars = ['elev', 'W']
    print('Variables average vertically: {}'.format(reduce_dim_vars[:]))
    for var in reduce_dim_vars:
        this_value = da.array(data[var])[1:, grid_indx, :]
        data_dict[var] = (this_value[:, 1:] + this_value[:, :-1]) / 2

    add_dim_vars = ['COSZEN', 'PBLH', 'LAI', 'HGT', 'SWDOWN', 'GLW']
    print('Variables add vertical layers: {}'.format(add_dim_vars[:]))

    for var in add_dim_vars:
        this_value = da.array(data[var])[1:, grid_indx, :]
        data_dict[var] = da.repeat(this_value, nvel, axis=1)

    print('Key of dict:{}'.format(data_dict.keys()))
    save_arr = []
    for var in data_dict.keys():
        data_dict[var] = data_dict[var].flatten()
        save_arr.append(data_dict[var])
    save_arr = da.array(save_arr).compute()
    return save_arr
Beispiel #15
0
    def test_convert_proj4_string(self):
        import xarray as xr
        import dask.array as da
        from satpy.writers.mitiff import MITIFFWriter
        from pyresample.geometry import AreaDefinition
        checks = [{
            'epsg':
            '+init=EPSG:32631',
            'proj4': (' Proj string: +proj=etmerc +lat_0=0 +lon_0=3 +k=0.9996 '
                      '+ellps=WGS84 +datum=WGS84 +units=km +x_0=501020.000000 '
                      '+y_0=1515.000000\n')
        }, {
            'epsg':
            '+init=EPSG:32632',
            'proj4': (' Proj string: +proj=etmerc +lat_0=0 +lon_0=9 +k=0.9996 '
                      '+ellps=WGS84 +datum=WGS84 +units=km +x_0=501020.000000 '
                      '+y_0=1515.000000\n')
        }, {
            'epsg':
            '+init=EPSG:32633',
            'proj4':
            (' Proj string: +proj=etmerc +lat_0=0 +lon_0=15 +k=0.9996 '
             '+ellps=WGS84 +datum=WGS84 +units=km +x_0=501020.000000 '
             '+y_0=1515.000000\n')
        }, {
            'epsg':
            '+init=EPSG:32634',
            'proj4':
            (' Proj string: +proj=etmerc +lat_0=0 +lon_0=21 +k=0.9996 '
             '+ellps=WGS84 +datum=WGS84 +units=km +x_0=501020.000000 '
             '+y_0=1515.000000\n')
        }, {
            'epsg':
            '+init=EPSG:32635',
            'proj4':
            (' Proj string: +proj=etmerc +lat_0=0 +lon_0=27 +k=0.9996 '
             '+ellps=WGS84 +datum=WGS84 +units=km +x_0=501020.000000 '
             '+y_0=1515.000000\n')
        }]
        for check in checks:
            area_def = AreaDefinition(
                'test',
                'test',
                'test',
                check['epsg'],
                100,
                200,
                (-1000., -1500., 1000., 1500.),
            )

            ds1 = xr.DataArray(da.zeros((10, 20), chunks=20),
                               dims=('y', 'x'),
                               attrs={'area': area_def})

            w = MITIFFWriter(filename='dummy.tif', base_dir=self.base_dir)
            proj4_string = w._add_proj4_string(ds1, ds1)
            self.assertEqual(proj4_string, check['proj4'])
Beispiel #16
0
def _get_test_datasets_2d():
    """Create a single 2D test dataset."""
    ds1 = xr.DataArray(da.zeros((100, 200), chunks=50),
                       dims=('y', 'x'),
                       attrs={
                           'name': 'test',
                           'start_time': datetime.utcnow()
                       })
    return [ds1]
Beispiel #17
0
    def test_rotate_diffraction_keep_shape(self):
        shape = (7, 5, 4, 15)
        s = Diffraction2D(np.zeros(shape))
        s_rot = s.rotate_diffraction(angle=45)
        assert s.axes_manager.shape == s_rot.axes_manager.shape

        s_lazy = LazyDiffraction2D(da.zeros(shape, chunks=(1, 1, 1, 1)))
        s_rot_lazy = s_lazy.rotate_diffraction(angle=45)
        assert s_lazy.axes_manager.shape == s_rot_lazy.axes_manager.shape
    def test_2d_input_2d_output(self, shape):
        dask_array = da.zeros(shape, chunks=(10, 10, 20, 20))
        s = hs.signals.Signal2D(dask_array).as_lazy()

        def a_function(image):
            return np.zeros((2, 3))

        s_out = s.map(a_function, inplace=False, lazy_output=True)
        assert s.data.shape[:-2] + (2, 3) == s_out.data.shape
Beispiel #19
0
def pad_chunks(darray, chunklen):
    ''' make sure chunks are the right shape'''
    padlen = chunklen - np.mod(darray.shape[0], chunklen)
    if padlen == 0:
        return darray
    else:
        pad = da.zeros((padlen, ), dtype=np.complex64)
        padded = da.concatenate([darray, pad], axis=0)
        return padded
Beispiel #20
0
def _create_test_dataset(name, shape=DEFAULT_SHAPE, area=None):
    """Create a test DataArray object."""
    import xarray as xr
    import dask.array as da
    import numpy as np

    return xr.DataArray(
        da.zeros(shape, dtype=np.float32, chunks=shape), dims=('y', 'x'),
        attrs={'name': name, 'area': area})
Beispiel #21
0
 def test_chunking_saving_lazy_specify(self, tmp_path, file):
     filename = tmp_path / file
     s = Signal2D(da.zeros((50, 100, 100))).as_lazy()
     # specify chunks
     chunks = (50, 10, 10)
     s.data = s.data.rechunk([50, 25, 25])
     s.save(filename, chunks=chunks)
     s1 = load(filename, lazy=True)
     assert tuple([c[0] for c in s1.data.chunks]) == chunks
def common_test_setup(self, shape_3d=(0, 2), data_chunks=None):
    # Construct a basic testcase with all-lazy mesh_cube and submesh_cubes
    # full-mesh cube shape is 'shape_3d'
    # data_chunks sets chunking of source cube, (else all-1-chunk)
    n_outer, n_z = shape_3d
    n_mesh = 20
    mesh = sample_mesh(n_nodes=20, n_edges=0, n_faces=n_mesh)
    mesh_cube = sample_mesh_cube(n_z=n_z, mesh=mesh)
    # Fix index-coord name to the expected default for recombine_submeshes.
    mesh_cube.coord("i_mesh_face").rename("i_mesh_index")
    if n_outer:
        # Crudely merge a set of copies to build an outer dimension.
        mesh_cube.add_aux_coord(AuxCoord([0], long_name="outer"))
        meshcubes_2d = []
        for i_outer in range(n_outer):
            cube = mesh_cube.copy()
            cube.coord("outer").points = np.array([i_outer])
            meshcubes_2d.append(cube)
        mesh_cube = CubeList(meshcubes_2d).merge_cube()

    if not data_chunks:
        data_chunks = mesh_cube.shape[:-1] + (-1, )
    mesh_cube.data = da.zeros(mesh_cube.shape, chunks=data_chunks)

    n_regions = 4  # it doesn't divide neatly
    region_len = n_mesh // n_regions
    i_points = np.arange(n_mesh)
    region_inds = [
        np.where((i_points // region_len) == i_region)
        for i_region in range(n_regions)
    ]
    # Disturb slightly to ensure some gaps + some overlaps
    region_inds = [list(indarr[0]) for indarr in region_inds]
    region_inds[2] = region_inds[2][:-2]  # missing points
    region_inds[3] += region_inds[1][:2]  # duplicates
    self.mesh_cube = mesh_cube
    self.region_inds = region_inds
    self.region_cubes = [mesh_cube[..., inds] for inds in region_inds]
    for i_cube, cube in enumerate(self.region_cubes):
        for i_z in range(n_z):
            # Set data='z' ; don't vary over other dimensions.
            cube.data[..., i_z, :] = i_cube + 1000 * i_z + 1
            cube.data = cube.lazy_data()

    # Also construct an array to match the expected result (2d cases only).
    # basic layer showing region allocation (large -ve values for missing)
    expected = np.array([1.0, 1, 1, 1, 1] +
                        [4, 4]  # points in #1 overlapped by #3
                        + [2, 2, 2] + [3, 3, 3] +
                        [-99999, -99999]  # missing points
                        + [4, 4, 4, 4, 4])
    # second layer should be same but +1000.
    # NOTE: only correct if shape_3d=None; no current need to generalise this.
    expected = np.stack([expected, expected + 1000])
    # convert to masked array with missing points.
    expected = np.ma.masked_less(expected, 0)
    self.expected_result = expected
Beispiel #23
0
def test_get_signal_chunk_slice(sig_chunks, index, expected):
    ndim = 1 + len(index)
    data = da.zeros([20]*ndim, chunks=(10, *sig_chunks[::-1]))
    if expected == 'error':
        with pytest.raises(ValueError):
            chunk_slice = get_signal_chunk_slice(index, data.chunks)
    else:
        chunk_slice = get_signal_chunk_slice(index, data.chunks)
        assert chunk_slice == expected
Beispiel #24
0
def get_dbz(daskArray, use_varint=False, use_liqskin=False, omp_threads=1):
    t = fetch_variable(daskArray, "T")
    p = fetch_variable(daskArray, "P")
    pb = fetch_variable(daskArray, "PB")
    qv = fetch_variable(daskArray, "QVAPOR")
    qr = fetch_variable(daskArray, "QRAIN")

    dtype = t.dtype

    try:
        qs = fetch_variable(daskArray, "QSNOW")
    except KeyError:
        qs = da.zeros(qv.shape, qv.dtype)

    try:
        qgraup = fetch_variable(daskArray, "QGRAUP")
    except KeyError:
        qgraup = da.zeros(qv.shape, qv.dtype)

    full_t = map_blocks(wrapped_add, t, Constants.T_BASE, dtype=t.dtype)
    full_p = map_blocks(wrapped_add, p, pb, dtype=p.dtype)
    tk = map_blocks(tk_wrap, full_p, full_t, omp_threads, dtype=p.dtype)

    sn0 = 1 if qs.any() else 0
    ivarint = 1 if use_varint else 0
    iliqskin = 1 if use_liqskin else 0

    del (t)
    del (p)
    del (pb)

    dbz = map_blocks(dbz_wrap,
                     full_p,
                     tk,
                     qv,
                     qr,
                     qs,
                     qgraup,
                     sn0,
                     ivarint,
                     iliqskin,
                     omp_threads,
                     dtype=dtype)
    return dbz
Beispiel #25
0
    def test_hncc_dnb(self):
        """Test the 'hncc_dnb' compositor."""
        import dask.array as da
        import numpy as np
        import xarray as xr
        from pyresample.geometry import AreaDefinition

        from satpy.composites.viirs import NCCZinke
        rows = 5
        cols = 10
        area = AreaDefinition(
            'test', 'test', 'test',
            {'proj': 'eqc', 'lon_0': 0.0,
             'lat_0': 0.0},
            cols, rows,
            (-20037508.34, -10018754.17, 20037508.34, 10018754.17))

        comp = NCCZinke('hncc_dnb', prerequisites=('dnb',),
                        standard_name='toa_outgoing_radiance_per_'
                                      'unit_wavelength')
        dnb = np.zeros((rows, cols)) + 0.25
        dnb[3, :] += 0.25
        dnb[4:, :] += 0.5
        dnb = da.from_array(dnb, chunks=25)
        c01 = xr.DataArray(dnb,
                           dims=('y', 'x'),
                           attrs={'name': 'DNB', 'area': area})
        sza = np.zeros((rows, cols)) + 70.0
        sza[:, 3] += 20.0
        sza[:, 4:] += 45.0
        sza = da.from_array(sza, chunks=25)
        c02 = xr.DataArray(sza,
                           dims=('y', 'x'),
                           attrs={'name': 'solar_zenith_angle', 'area': area})
        lza = np.zeros((rows, cols)) + 70.0
        lza[:, 3] += 20.0
        lza[:, 4:] += 45.0
        lza = da.from_array(lza, chunks=25)
        c03 = xr.DataArray(lza,
                           dims=('y', 'x'),
                           attrs={'name': 'lunar_zenith_angle', 'area': area})
        mif = xr.DataArray(da.zeros((5,), chunks=5) + 0.1,
                           dims=('y',),
                           attrs={'name': 'moon_illumination_fraction', 'area': area})
        res = comp((c01, c02, c03, mif))
        self.assertIsInstance(res, xr.DataArray)
        self.assertIsInstance(res.data, da.Array)
        self.assertEqual(res.attrs['name'], 'hncc_dnb')
        self.assertEqual(res.attrs['standard_name'],
                         'ncc_radiance')
        data = res.compute()
        unique = np.unique(data)
        np.testing.assert_allclose(
            unique, [3.48479712e-04, 6.96955799e-04, 1.04543189e-03, 4.75394738e-03,
                     9.50784532e-03, 1.42617433e-02, 1.50001560e+03, 3.00001560e+03,
                     4.50001560e+03])
Beispiel #26
0
    def parse(self, request: Metamorphing,
              settings: dict) -> List[Tuple[models.Model, str]]:

        rescale = True
        array = request.representation.array

        if "z" in array.dims:
            array = array.max(dim="z")
        if "t" in array.dims:
            array = array.sel(t=0)

        if "c" in array.dims:
            # Check if we have to convert to monoimage
            if array.c.size == 1:
                array = array.sel(c=0)

                if rescale == True:
                    self.progress("Rescaling")
                    min, max = array.min(), array.max()
                    image = np.interp(array, (min, max),
                                      (0, 255)).astype(np.uint8)
                else:
                    image = (array * 255).astype(np.uint8)

                from matplotlib import cm
                mapped = cm.viridis(image)

                finalarray = (mapped * 255).astype(np.uint8)

            else:
                if array.c.size >= 3:
                    array = array.sel(c=[0, 1, 2]).data
                elif array.c.size == 2:
                    # Two Channel Image will be displayed with a Dark Channel
                    array = da.concatenate([
                        array.sel(c=[0, 1]).data,
                        da.zeros((array.x.size, array.y.size, 1))
                    ],
                                           axis=2)

                if rescale == True:
                    self.progress("Rescaling")
                    min, max = array.min(), array.max()
                    image = np.interp(array.compute(), (min, max),
                                      (0, 255)).astype(np.uint8)
                else:
                    image = (array * 255).astype(np.uint8)

                finalarray = image

        else:
            raise NotImplementedError(
                "Image Does not provide the channel Argument")

        display = Display.objects.from_xarray_and_request(finalarray, request)
        return [(display, "create")]
Beispiel #27
0
 def test_cube_arg(self):
     """Check that a input lazy cube will be realised before return."""
     cube = Cube(da.zeros((1, 1), chunks=(1, 1)), long_name="dummy")
     self.assertTrue(cube.has_lazy_data())
     result = inputcube_nolazy(cube)
     self.coerce_patch.assert_called_with(improver.utilities.load.load_cube,
                                          cube,
                                          no_lazy_load=True)
     self.assertFalse(cube.has_lazy_data())
     self.assertEqual(result, "return")
Beispiel #28
0
def _get_test_datasets_3d():
    """Create a single 3D test dataset."""
    ds1 = xr.DataArray(da.zeros((3, 100, 200), chunks=50),
                       dims=('bands', 'y', 'x'),
                       coords={'bands': ['R', 'G', 'B']},
                       attrs={
                           'name': 'test',
                           'start_time': datetime.utcnow()
                       })
    return [ds1]
Beispiel #29
0
 def test_changed_data_trigger(self):
     s = _lazy_signals.LazySignal2D(
         da.zeros((6, 6, 8, 8), chunks=(2, 2, 4, 4)))
     position = s.axes_manager._getitem_tuple
     s._get_cache_dask_chunk(position)
     assert s._cache_dask_chunk is not None
     assert s._cache_dask_chunk_slice is not None
     s.events.data_changed.trigger(None)
     assert s._cache_dask_chunk is None
     assert s._cache_dask_chunk_slice is None
Beispiel #30
0
def test_notifications_error_with_threading(make_napari_viewer):
    """Test notifications of `threading` threads, using a dask example."""
    random_image = da.random.random(size=(50, 50))
    with notification_manager:
        viewer = make_napari_viewer()
        viewer.add_image(random_image)
        result = da.divide(random_image, da.zeros(50, 50))
        viewer.add_image(result)
        assert len(notification_manager.records) >= 1
        notification_manager.records = []
Beispiel #31
0
    def __init__(self, parameters: Parameter = None):

        self._type = 'acoustic'
        self._ndim = 2
        self._nx, self._nz = parameters['number-of-cells'][0], parameters[
            'number-of-cells'][1]

        self._sxx = da.zeros((self._nx, self._nz), dtype=DTYPE)
        self._vx = da.zeros_like(self._sxx)
        self._vz = da.zeros_like(self._sxx)
Beispiel #32
0
    def setup(self):
        A = 400
        B = 800

        a = da.ones((A, B, 2), chunks=1)
        b = da.zeros((A, B, 1), chunks=1)
        c = a + b
        g = c.__dask_graph__()
        layer = g.layers[c.name]
        self.layer = layer
Beispiel #33
0
    def _slice_padded(self, _bounds):
        pads = (max(-_bounds[0],
                    0), max(-_bounds[1],
                            0), max(_bounds[2] - self.shape[2],
                                    0), max(_bounds[3] - self.shape[1], 0))
        bounds = (max(_bounds[0], 0), max(_bounds[1], 0),
                  max(min(_bounds[2], self.shape[2]),
                      0), max(min(_bounds[3], self.shape[1]), 0))

        # NOTE: image is a dask array that implements daskmeta interface (via op)
        result = self[:, bounds[1]:bounds[3], bounds[0]:bounds[2]]
        if pads[0] > 0:
            dims = (result.shape[0], result.shape[1], pads[0])
            result = da.concatenate(
                [da.zeros(dims, chunks=dims, dtype=result.dtype), result],
                axis=2)
        if pads[2] > 0:
            dims = (result.shape[0], result.shape[1], pads[2])
            result = da.concatenate(
                [result,
                 da.zeros(dims, chunks=dims, dtype=result.dtype)],
                axis=2)
        if pads[1] > 0:
            dims = (result.shape[0], pads[1], result.shape[2])
            result = da.concatenate(
                [da.zeros(dims, chunks=dims, dtype=result.dtype), result],
                axis=1)
        if pads[3] > 0:
            dims = (result.shape[0], pads[3], result.shape[2])
            result = da.concatenate(
                [result,
                 da.zeros(dims, chunks=dims, dtype=result.dtype)],
                axis=1)

        image = super(DaskImage,
                      self.__class__).__new__(self.__class__, result.dask,
                                              result.name, result.chunks,
                                              result.dtype, result.shape)

        image.__geo_transform__ = self.__geo_transform__ + (_bounds[0],
                                                            _bounds[1])
        return image
Beispiel #34
0
def test_atop_stacked_new_axes_same_dim(concatenate):
    def f(x):
        return x[..., None] * np.ones((1, 7))

    x = da.ones(5, chunks=2)
    y = da.zeros(5, chunks=2)
    a = atop(f, 'aq', x, 'a', new_axes={'q': 7}, concatenate=concatenate, dtype=x.dtype)
    b = atop(f, 'aq', y, 'a', new_axes={'q': 7}, concatenate=concatenate, dtype=x.dtype)
    c = a + b
    assert c.chunks == ((2, 2, 1), (7,))
    assert_eq(c, np.ones((5, 7)))
Beispiel #35
0
def abi_l1b_c01_data_array(goes_east_conus_area_def) -> xr.DataArray:
    return xr.DataArray(
        da.zeros((3000, 5000), chunks=4096),
        dims=("y", "x"),
        attrs={
            "area": goes_east_conus_area_def,
            "platform_name": "goes16",
            "sensor": "abi",
            "name": "C01",
        },
    )
Beispiel #36
0
 def _get_test_datasets(self):
     import xarray as xr
     import dask.array as da
     from datetime import datetime
     ds1 = xr.DataArray(da.zeros((100, 200), chunks=50),
                        dims=('y', 'x'),
                        attrs={
                            'name': 'test',
                            'start_time': datetime.utcnow()
                        })
     return [ds1]
Beispiel #37
0
 def _get_test_datasets(self):
     import xarray as xr
     import dask.array as da
     from datetime import datetime
     ds1 = xr.DataArray(
         da.zeros((100, 200), chunks=50),
         dims=('y', 'x'),
         attrs={'name': 'test',
                'start_time': datetime.utcnow()}
     )
     return [ds1]
Beispiel #38
0
def test_mixed_output_type():
    y = da.random.random((10, 10), chunks=(5, 5))
    y[y < 0.4] = 0

    y = da.ma.masked_equal(y, 0)
    x = da.zeros((10, 1), chunks=(5, 1))

    z = da.concatenate([x, y], axis=1)
    assert z.shape == (10, 11)
    zz = z.compute()
    assert isinstance(zz, np.ma.masked_array)
Beispiel #39
0
def test_mixed_output_type():
    y = da.random.random((10, 10), chunks=(5, 5))
    y[y < 0.4] = 0

    y = da.ma.masked_equal(y, 0)
    x = da.zeros((10, 1), chunks=(5, 1))

    z = da.concatenate([x, y], axis=1)
    assert z.shape == (10, 11)
    zz = z.compute()
    assert isinstance(zz, np.ma.masked_array)
def test_atop_stacked_new_axes_same_dim(concatenate):
    def f(x):
        return x[..., None] * np.ones((1, 7))

    x = da.ones(5, chunks=2)
    y = da.zeros(5, chunks=2)
    a = atop(f, 'aq', x, 'a', new_axes={'q': 7}, concatenate=concatenate, dtype=x.dtype)
    b = atop(f, 'aq', y, 'a', new_axes={'q': 7}, concatenate=concatenate, dtype=x.dtype)
    c = a + b
    assert c.chunks == ((2, 2, 1), (7,))
    assert_eq(c, np.ones((5, 7)))
Beispiel #41
0
def test_mixed_output_type():
    y = da.random.random((10, 10), chunks=(5, 5))
    y[y < 0.8] = 0
    y = y.map_blocks(sparse.COO.from_numpy)

    x = da.zeros((10, 1), chunks=(5, 1))

    z = da.concatenate([x, y], axis=1)

    assert z.shape == (10, 11)

    zz = z.compute()
    assert isinstance(zz, sparse.COO)
    assert zz.nnz == y.compute().nnz
Beispiel #42
0
    def test_hncc_dnb(self):
        """Test the 'hncc_dnb' compositor."""
        import xarray as xr
        import dask.array as da
        import numpy as np
        from satpy.composites.viirs import NCCZinke
        from pyresample.geometry import AreaDefinition
        rows = 5
        cols = 10
        area = AreaDefinition(
            'test', 'test', 'test',
            {'proj': 'eqc', 'lon_0': 0.0,
             'lat_0': 0.0},
            cols, rows,
            (-20037508.34, -10018754.17, 20037508.34, 10018754.17))

        comp = NCCZinke('hncc_dnb', prerequisites=('dnb',),
                        standard_name='toa_outgoing_radiance_per_'
                                      'unit_wavelength')
        dnb = np.zeros((rows, cols)) + 0.25
        dnb[3, :] += 0.25
        dnb[4:, :] += 0.5
        dnb = da.from_array(dnb, chunks=25)
        c01 = xr.DataArray(dnb,
                           dims=('y', 'x'),
                           attrs={'name': 'DNB', 'area': area})
        sza = np.zeros((rows, cols)) + 70.0
        sza[3, :] += 20.0
        sza[4:, :] += 45.0
        sza = da.from_array(sza, chunks=25)
        c02 = xr.DataArray(sza,
                           dims=('y', 'x'),
                           attrs={'name': 'solar_zenith_angle', 'area': area})
        lza = da.from_array(sza, chunks=25)
        c03 = xr.DataArray(lza,
                           dims=('y', 'x'),
                           attrs={'name': 'lunar_zenith_angle', 'area': area})
        mif = xr.DataArray(da.zeros((5,), chunks=5) + 0.1,
                           dims=('y',),
                           attrs={'name': 'moon_illumination_fraction', 'area': area})
        res = comp((c01, c02, c03, mif))
        self.assertIsInstance(res, xr.DataArray)
        self.assertIsInstance(res.data, da.Array)
        self.assertEqual(res.attrs['name'], 'hncc_dnb')
        self.assertEqual(res.attrs['standard_name'],
                         'ncc_radiance')
        data = res.compute()
        unique = np.unique(data)
        np.testing.assert_allclose(
            unique, [3.484797e-04, 9.507845e-03, 4.500016e+03])
Beispiel #43
0
def test_rechunk_bad_keys():
    x = da.zeros((2, 3, 4), chunks=1)
    assert x.rechunk({-1: 4}).chunks == ((1, 1), (1, 1, 1), (4,))
    assert x.rechunk({-x.ndim: 2}).chunks == ((2,), (1, 1, 1), (1, 1, 1, 1))

    with pytest.raises(TypeError) as info:
        x.rechunk({'blah': 4})

    assert 'blah' in str(info.value)

    with pytest.raises(ValueError) as info:
        x.rechunk({100: 4})

    assert '100' in str(info.value)

    with pytest.raises(ValueError) as info:
        x.rechunk({-100: 4})

    assert '-100' in str(info.value)
Beispiel #44
0
    def test_expand_reduce(self):
        from satpy.resample import NativeResampler
        import numpy as np
        import dask.array as da
        d_arr = da.zeros((6, 20), chunks=4)
        new_arr = NativeResampler.expand_reduce(d_arr, {0: 2., 1: 2.})
        self.assertEqual(new_arr.shape, (12, 40))
        new_arr = NativeResampler.expand_reduce(d_arr, {0: .5, 1: .5})
        self.assertEqual(new_arr.shape, (3, 10))
        self.assertRaises(ValueError, NativeResampler.expand_reduce,
                          d_arr, {0: 1. / 3, 1: 1.})
        new_arr = NativeResampler.expand_reduce(d_arr, {0: 1., 1: 1.})
        self.assertEqual(new_arr.shape, (6, 20))
        self.assertIs(new_arr, d_arr)
        self.assertRaises(ValueError, NativeResampler.expand_reduce,
                          d_arr, {0: 0.333323423, 1: 1.})
        self.assertRaises(ValueError, NativeResampler.expand_reduce,
                          d_arr, {0: 1.333323423, 1: 1.})

        n_arr = np.zeros((6, 20))
        new_arr = NativeResampler.expand_reduce(n_arr, {0: 2., 1: 1.0})
        self.assertTrue(np.all(new_arr.compute()[::2, :] == n_arr))
Beispiel #45
0
def test_slicing_consistent_names_after_normalization():
    x = da.zeros(10, chunks=(5,))
    assert same_keys(x[0:], x[:10])
    assert same_keys(x[0:], x[0:10])
    assert same_keys(x[0:], x[0:10:1])
    assert same_keys(x[:], x[0:10:1])
Beispiel #46
0
def test_slice_list_then_None():
    x = da.zeros(shape=(5, 5), chunks=(3, 3))
    y = x[[2, 1]][None]

    assert_eq(y, np.zeros((1, 2, 5)))
Beispiel #47
0
    def get_reflectance(self, sun_zenith, sat_zenith, azidiff, bandname, redband=None):
        """Get the reflectance from the three sun-sat angles"""
        # Get wavelength in nm for band:
        if isinstance(bandname, float):
            LOG.warning('A wavelength is provided instead of band name - ' +
                        'disregard the relative spectral responses and assume ' +
                        'it is the effective wavelength: %f (micro meter)', bandname)
            wvl = bandname * 1000.0
        else:
            wvl = self.get_effective_wavelength(bandname)
            wvl = wvl * 1000.0

        rayl, wvl_coord, azid_coord, satz_sec_coord, sunz_sec_coord = self.get_reflectance_lut()

        # force dask arrays
        compute = False
        if HAVE_DASK and not isinstance(sun_zenith, Array):
            compute = True
            sun_zenith = from_array(sun_zenith, chunks=sun_zenith.shape)
            sat_zenith = from_array(sat_zenith, chunks=sat_zenith.shape)
            azidiff = from_array(azidiff, chunks=azidiff.shape)
            if redband is not None:
                redband = from_array(redband, chunks=redband.shape)

        clip_angle = rad2deg(arccos(1. / sunz_sec_coord.max()))
        sun_zenith = clip(sun_zenith, 0, clip_angle)
        sunzsec = 1. / cos(deg2rad(sun_zenith))
        clip_angle = rad2deg(arccos(1. / satz_sec_coord.max()))
        sat_zenith = clip(sat_zenith, 0, clip_angle)
        satzsec = 1. / cos(deg2rad(sat_zenith))
        shape = sun_zenith.shape

        if not(wvl_coord.min() < wvl < wvl_coord.max()):
            LOG.warning(
                "Effective wavelength for band %s outside 400-800 nm range!",
                str(bandname))
            LOG.info(
                "Set the rayleigh/aerosol reflectance contribution to zero!")
            if HAVE_DASK:
                chunks = sun_zenith.chunks if redband is None else redband.chunks
                res = zeros(shape, chunks=chunks)
                return res.compute() if compute else res
            else:
                return zeros(shape)

        idx = np.searchsorted(wvl_coord, wvl)
        wvl1 = wvl_coord[idx - 1]
        wvl2 = wvl_coord[idx]

        fac = (wvl2 - wvl) / (wvl2 - wvl1)
        raylwvl = fac * rayl[idx - 1, :, :, :] + (1 - fac) * rayl[idx, :, :, :]
        tic = time.time()

        smin = [sunz_sec_coord[0], azid_coord[0], satz_sec_coord[0]]
        smax = [sunz_sec_coord[-1], azid_coord[-1], satz_sec_coord[-1]]
        orders = [
            len(sunz_sec_coord), len(azid_coord), len(satz_sec_coord)]
        f_3d_grid = atleast_2d(raylwvl.ravel())

        if HAVE_DASK and isinstance(smin[0], Array):
            # compute all of these at the same time before passing to the interpolator
            # otherwise they are computed separately
            smin, smax, orders, f_3d_grid = da.compute(smin, smax, orders, f_3d_grid)
        minterp = MultilinearInterpolator(smin, smax, orders)
        minterp.set_values(f_3d_grid)

        if HAVE_DASK:
            ipn = map_blocks(self._do_interp, minterp, sunzsec, azidiff,
                             satzsec, dtype=raylwvl.dtype, chunks=azidiff.chunks)
        else:
            ipn = self._do_interp(minterp, sunzsec, azidiff, satzsec)

        LOG.debug("Time - Interpolation: {0:f}".format(time.time() - tic))

        ipn *= 100
        res = ipn
        if redband is not None:
            res = where(redband < 20., res,
                        (1 - (redband - 20) / 80) * res)

        res = clip(res, 0, 100)
        if compute:
            res = res.compute()
        return res
Beispiel #48
0
    def define_array_type_specific_functions(self):
        self._load = generic_netcdf_loader_for_grids\
			(array_type=self._array_type,chunks=self.chunks)
        self._zeros = lambda n:da.zeros(n,chunks=self.chunks)
Beispiel #49
0
    def decomposition(self,
                      output_dimension,
                      normalize_poissonian_noise=False,
                      algorithm='PCA',
                      signal_mask=None,
                      navigation_mask=None,
                      get=threaded.get,
                      num_chunks=None,
                      reproject=True,
                      bounds=True,
                      **kwargs):
        """Perform Incremental (Batch) decomposition on the data, keeping n
        significant components.

        Parameters
        ----------
        output_dimension : int
            the number of significant components to keep
        normalize_poissonian_noise : bool
            If True, scale the SI to normalize Poissonian noise
        algorithm : str
            One of ('PCA', 'ORPCA', 'ONMF'). By default ('PCA') IncrementalPCA
            from scikit-learn is run.
        get : dask scheduler
            the dask scheduler to use for computations;
            default `dask.threaded.get`
        num_chunks : int
            the number of dask chunks to pass to the decomposition model.
            More chunks require more memory, but should run faster. Will be
            increased to contain atleast output_dimension signals.
        navigation_mask : {BaseSignal, numpy array, dask array}
            The navigation locations marked as True are not used in the
            decompostion.
        signal_mask : {BaseSignal, numpy array, dask array}
            The signal locations marked as True are not used in the
            decomposition.
        reproject : bool
            Reproject data on the learnt components (factors) after learning.
        bounds : {tuple, bool}
            The (min, max) values of the data to normalize before learning.
            If tuple (min, max), those values will be used for normalization.
            If True, extremes will be looked up (expensive), default.
            If False, no normalization is done (learning may be very slow).
            If normalize_poissonian_noise is True, this cannot be True.
        **kwargs
            passed to the partial_fit/fit functions.

        Notes
        -----
        Various algorithm parameters and their default values:
            ONMF:
                lambda1=1,
                kappa=1,
                robust=False,
                store_r=False
                batch_size=None
            ORPCA:
                fast=True,
                lambda1=None,
                lambda2=None,
                method=None,
                learning_rate=None,
                init=None,
                training_samples=None,
                momentum=None
            PCA:
                batch_size=None,
                copy=True,
                white=False


        """
        explained_variance = None
        explained_variance_ratio = None
        _al_data = self._data_aligned_with_axes
        nav_chunks = _al_data.chunks[:self.axes_manager.navigation_dimension]
        sig_chunks = _al_data.chunks[self.axes_manager.navigation_dimension:]

        num_chunks = 1 if num_chunks is None else num_chunks
        blocksize = np.min([multiply(ar) for ar in product(*nav_chunks)])
        nblocks = multiply([len(c) for c in nav_chunks])
        if blocksize / output_dimension < num_chunks:
            num_chunks = np.ceil(blocksize / output_dimension)
        blocksize *= num_chunks

        ## LEARN
        if algorithm == 'PCA':
            from sklearn.decomposition import IncrementalPCA
            obj = IncrementalPCA(n_components=output_dimension)
            method = partial(obj.partial_fit, **kwargs)
            reproject = True

        elif algorithm == 'ORPCA':
            from hyperspy.learn.rpca import ORPCA
            kwg = {'fast': True}
            kwg.update(kwargs)
            obj = ORPCA(output_dimension, **kwg)
            method = partial(obj.fit, iterating=True)

        elif algorithm == 'ONMF':
            from hyperspy.learn.onmf import ONMF
            batch_size = kwargs.pop('batch_size', None)
            obj = ONMF(output_dimension, **kwargs)
            method = partial(obj.fit, batch_size=batch_size)

        else:
            raise ValueError('algorithm not known')

        original_data = self.data
        try:
            if normalize_poissonian_noise:
                if bounds is True:
                    bounds = False
                    # warnings.warn?
                data = self._data_aligned_with_axes
                ndim = self.axes_manager.navigation_dimension
                sdim = self.axes_manager.signal_dimension
                nm = da.logical_not(
                    da.zeros(
                        self.axes_manager.navigation_shape[::-1],
                        chunks=nav_chunks)
                    if navigation_mask is None else to_array(
                        navigation_mask, chunks=nav_chunks))
                sm = da.logical_not(
                    da.zeros(
                        self.axes_manager.signal_shape[::-1],
                        chunks=sig_chunks)
                    if signal_mask is None else to_array(
                        signal_mask, chunks=sig_chunks))
                ndim = self.axes_manager.navigation_dimension
                sdim = self.axes_manager.signal_dimension
                bH, aG = da.compute(
                    data.sum(axis=range(ndim)),
                    data.sum(axis=range(ndim, ndim + sdim)))
                bH = da.where(sm, bH, 1)
                aG = da.where(nm, aG, 1)

                raG = da.sqrt(aG)
                rbH = da.sqrt(bH)

                coeff = raG[(..., ) + (None, )*rbH.ndim] *\
                        rbH[(None, )*raG.ndim + (...,)]
                coeff.map_blocks(np.nan_to_num)
                coeff = da.where(coeff == 0, 1, coeff)
                data = data / coeff
                self.data = data

            # normalize the data for learning algs:
            if bounds:
                if bounds is True:
                    _min, _max = da.compute(self.data.min(), self.data.max())
                else:
                    _min, _max = bounds
                self.data = (self.data - _min) / (_max - _min)

            # LEARN
            this_data = []
            try:
                for chunk in progressbar(
                        self._block_iterator(
                            flat_signal=True,
                            get=get,
                            signal_mask=signal_mask,
                            navigation_mask=navigation_mask),
                        total=nblocks,
                        leave=True,
                        desc='Learn'):
                    this_data.append(chunk)
                    if len(this_data) == num_chunks:
                        thedata = np.concatenate(this_data, axis=0)
                        method(thedata)
                        this_data = []
                if len(this_data):
                    thedata = np.concatenate(this_data, axis=0)
                    method(thedata)
            except KeyboardInterrupt:
                pass

            # GET ALREADY CALCULATED RESULTS
            if algorithm == 'PCA':
                explained_variance = obj.explained_variance_
                explained_variance_ratio = obj.explained_variance_ratio_
                factors = obj.components_.T

            elif algorithm == 'ORPCA':
                _, _, U, S, V = obj.finish()
                factors = U * S
                loadings = V
                explained_variance = S**2 / len(factors)

            elif algorithm == 'ONMF':
                factors, loadings = obj.finish()
                loadings = loadings.T

            # REPROJECT
            if reproject:
                if algorithm == 'PCA':
                    method = obj.transform
                    post = lambda a: np.concatenate(a, axis=0)
                elif algorithm == 'ORPCA':
                    method = obj.project
                    obj.R = []
                    post = lambda a: obj.finish()[4]
                elif algorithm == 'ONMF':
                    method = obj.project
                    post = lambda a: np.concatenate(a, axis=1).T

                _map = map(lambda thing: method(thing),
                           self._block_iterator(
                               flat_signal=True,
                               get=get,
                               signal_mask=signal_mask,
                               navigation_mask=navigation_mask))
                H = []
                try:
                    for thing in progressbar(
                            _map, total=nblocks, desc='Project'):
                        H.append(thing)
                except KeyboardInterrupt:
                    pass
                loadings = post(H)

            if explained_variance is not None and \
                    explained_variance_ratio is None:
                explained_variance_ratio = \
                    explained_variance / explained_variance.sum()

            # RESHUFFLE "blocked" LOADINGS
            ndim = self.axes_manager.navigation_dimension
            try:
                loadings = _reshuffle_mixed_blocks(
                    loadings,
                    ndim,
                    (output_dimension,),
                    nav_chunks).reshape((-1, output_dimension))
            except ValueError:
                # In case the projection step was not finished, it's left
                # as scrambled
                pass
        finally:
            self.data = original_data

        target = self.learning_results
        target.decomposition_algorithm = algorithm
        target.output_dimension = output_dimension
        target._object = obj
        target.factors = factors
        target.loadings = loadings
        target.explained_variance = explained_variance
        target.explained_variance_ratio = explained_variance_ratio
Beispiel #50
0
def test_integer_input():
    assert da.zeros((4, 6), chunks=2).rechunk(3).chunks == ((3, 1), (3, 3))
Beispiel #51
0
    def _block_iterator(self,
                        flat_signal=True,
                        get=threaded.get,
                        navigation_mask=None,
                        signal_mask=None):
        """A function that allows iterating lazy signal data by blocks,
        defining the dask.Array.

        Parameters
        ----------
        flat_signal: bool
            returns each block flattened, such that the shape (for the
            particular block) is (navigation_size, signal_size), with
            optionally masked elements missing. If false, returns
            the equivalent of s.inav[{blocks}].data, where masked elements are
            set to np.nan or 0.
        get : dask scheduler
            the dask scheduler to use for computations;
            default `dask.threaded.get`
        navigation_mask : {BaseSignal, numpy array, dask array}
            The navigation locations marked as True are not returned (flat) or
            set to NaN or 0.
        signal_mask : {BaseSignal, numpy array, dask array} 
            The signal locations marked as True are not returned (flat) or set
            to NaN or 0.

        """
        self._make_lazy()
        data = self._data_aligned_with_axes
        nav_chunks = data.chunks[:self.axes_manager.navigation_dimension]
        indices = product(*[range(len(c)) for c in nav_chunks])
        signalsize = self.axes_manager.signal_size
        sig_reshape = (signalsize,) if signalsize else ()
        data = data.reshape((self.axes_manager.navigation_shape[::-1] +
                             sig_reshape))

        if signal_mask is None:
            signal_mask = slice(None) if flat_signal else \
                    np.zeros(self.axes_manager.signal_size, dtype='bool')
        else:
            try:
                signal_mask = to_array(signal_mask).ravel()
            except ValueError:
                # re-raise with a message
                raise ValueError("signal_mask has to be a signal, numpy or"
                                 " dask array, but "
                                 "{} was given".format(type(signal_mask)))
            if flat_signal:
                signal_mask = ~signal_mask

        if navigation_mask is None:
            nav_mask = da.zeros(
                self.axes_manager.navigation_shape[::-1],
                chunks=nav_chunks,
                dtype='bool')
        else:
            try:
                nav_mask = to_array(navigation_mask, chunks=nav_chunks)
            except ValueError:
                # re-raise with a message
                raise ValueError("navigation_mask has to be a signal, numpy or"
                                 " dask array, but "
                                 "{} was given".format(type(navigation_mask)))
        if flat_signal:
            nav_mask = ~nav_mask
        for ind in indices:
            chunk = get(data.dask,
                        (data.name, ) + ind + (0,)*bool(signalsize))
            n_mask = get(nav_mask.dask, (nav_mask.name, ) + ind)
            if flat_signal:
                yield chunk[n_mask, ...][..., signal_mask]
            else:
                chunk = chunk.copy()
                value = np.nan if np.can_cast('float', chunk.dtype) else 0
                chunk[n_mask, ...] = value
                chunk[..., signal_mask] = value
                yield chunk.reshape(chunk.shape[:-1] +
                                    self.axes_manager.signal_shape[::-1])
Beispiel #52
0
 def tocsr(self):
     nzs = self.data
     nzi = da.zeros(len(self.data), chunks=(int(1e4)))
Beispiel #53
0
    def decomposition(self,
                      normalize_poissonian_noise=False,
                      algorithm='svd',
                      output_dimension=None,
                      signal_mask=None,
                      navigation_mask=None,
                      get=threaded.get,
                      num_chunks=None,
                      reproject=True,
                      bounds=False,
                      **kwargs):
        """Perform Incremental (Batch) decomposition on the data, keeping n
        significant components.

        Parameters
        ----------
        normalize_poissonian_noise : bool
            If True, scale the SI to normalize Poissonian noise
        algorithm : str
            One of ('svd', 'PCA', 'ORPCA', 'ONMF'). By default 'svd',
            lazy SVD decomposition from dask.
        output_dimension : int
            the number of significant components to keep. If None, keep all
            (only valid for SVD)
        get : dask scheduler
            the dask scheduler to use for computations;
            default `dask.threaded.get`
        num_chunks : int
            the number of dask chunks to pass to the decomposition model.
            More chunks require more memory, but should run faster. Will be
            increased to contain atleast output_dimension signals.
        navigation_mask : {BaseSignal, numpy array, dask array}
            The navigation locations marked as True are not used in the
            decompostion.
        signal_mask : {BaseSignal, numpy array, dask array}
            The signal locations marked as True are not used in the
            decomposition.
        reproject : bool
            Reproject data on the learnt components (factors) after learning.
        **kwargs
            passed to the partial_fit/fit functions.

        Notes
        -----
        Various algorithm parameters and their default values:
            ONMF:
                lambda1=1,
                kappa=1,
                robust=False,
                store_r=False
                batch_size=None
            ORPCA:
                fast=True,
                lambda1=None,
                lambda2=None,
                method=None,
                learning_rate=None,
                init=None,
                training_samples=None,
                momentum=None
            PCA:
                batch_size=None,
                copy=True,
                white=False


        """
        if bounds:
            msg = (
                "The `bounds` keyword is deprecated and will be removed "
                "in v2.0. Since version > 1.3 this has no effect.")
            warnings.warn(msg, VisibleDeprecationWarning)
        explained_variance = None
        explained_variance_ratio = None
        _al_data = self._data_aligned_with_axes
        nav_chunks = _al_data.chunks[:self.axes_manager.navigation_dimension]
        sig_chunks = _al_data.chunks[self.axes_manager.navigation_dimension:]

        num_chunks = 1 if num_chunks is None else num_chunks
        blocksize = np.min([multiply(ar) for ar in product(*nav_chunks)])
        nblocks = multiply([len(c) for c in nav_chunks])
        if algorithm != "svd" and output_dimension is None:
            raise ValueError("With the %s the output_dimension "
                             "must be specified" % algorithm)
        if output_dimension and blocksize / output_dimension < num_chunks:
            num_chunks = np.ceil(blocksize / output_dimension)
        blocksize *= num_chunks
        # LEARN
        if algorithm == 'PCA':
            from sklearn.decomposition import IncrementalPCA
            obj = IncrementalPCA(n_components=output_dimension)
            method = partial(obj.partial_fit, **kwargs)
            reproject = True

        elif algorithm == 'ORPCA':
            from hyperspy.learn.rpca import ORPCA
            kwg = {'fast': True}
            kwg.update(kwargs)
            obj = ORPCA(output_dimension, **kwg)
            method = partial(obj.fit, iterating=True)

        elif algorithm == 'ONMF':
            from hyperspy.learn.onmf import ONMF
            batch_size = kwargs.pop('batch_size', None)
            obj = ONMF(output_dimension, **kwargs)
            method = partial(obj.fit, batch_size=batch_size)
        elif algorithm != "svd":
            raise ValueError('algorithm not known')

        original_data = self.data
        try:
            if normalize_poissonian_noise:
                data = self._data_aligned_with_axes
                ndim = self.axes_manager.navigation_dimension
                sdim = self.axes_manager.signal_dimension
                nm = da.logical_not(
                    da.zeros(
                        self.axes_manager.navigation_shape[::-1],
                        chunks=nav_chunks)
                    if navigation_mask is None else to_array(
                        navigation_mask, chunks=nav_chunks))
                sm = da.logical_not(
                    da.zeros(
                        self.axes_manager.signal_shape[::-1],
                        chunks=sig_chunks)
                    if signal_mask is None else to_array(
                        signal_mask, chunks=sig_chunks))
                ndim = self.axes_manager.navigation_dimension
                sdim = self.axes_manager.signal_dimension
                bH, aG = da.compute(
                    data.sum(axis=tuple(range(ndim))),
                    data.sum(axis=tuple(range(ndim, ndim + sdim))))
                bH = da.where(sm, bH, 1)
                aG = da.where(nm, aG, 1)

                raG = da.sqrt(aG)
                rbH = da.sqrt(bH)

                coeff = raG[(..., ) + (None, ) * rbH.ndim] *\
                    rbH[(None, ) * raG.ndim + (...,)]
                coeff.map_blocks(np.nan_to_num)
                coeff = da.where(coeff == 0, 1, coeff)
                data = data / coeff
                self.data = data

            # LEARN
            if algorithm == "svd":
                reproject = False
                from dask.array.linalg import svd
                try:
                    self._unfolded4decomposition = self.unfold()
                    # TODO: implement masking
                    if navigation_mask or signal_mask:
                        raise NotImplemented(
                            "Masking is not yet implemented for lazy SVD."
                        )
                    U, S, V = svd(self.data)
                    factors = V.T
                    explained_variance = S ** 2 / self.data.shape[0]
                    loadings = U * S
                finally:
                    if self._unfolded4decomposition is True:
                        self.fold()
                        self._unfolded4decomposition is False
            else:
                this_data = []
                try:
                    for chunk in progressbar(
                            self._block_iterator(
                                flat_signal=True,
                                get=get,
                                signal_mask=signal_mask,
                                navigation_mask=navigation_mask),
                            total=nblocks,
                            leave=True,
                            desc='Learn'):
                        this_data.append(chunk)
                        if len(this_data) == num_chunks:
                            thedata = np.concatenate(this_data, axis=0)
                            method(thedata)
                            this_data = []
                    if len(this_data):
                        thedata = np.concatenate(this_data, axis=0)
                        method(thedata)
                except KeyboardInterrupt:
                    pass

            # GET ALREADY CALCULATED RESULTS
            if algorithm == 'PCA':
                explained_variance = obj.explained_variance_
                explained_variance_ratio = obj.explained_variance_ratio_
                factors = obj.components_.T

            elif algorithm == 'ORPCA':
                _, _, U, S, V = obj.finish()
                factors = U * S
                loadings = V
                explained_variance = S**2 / len(factors)

            elif algorithm == 'ONMF':
                factors, loadings = obj.finish()
                loadings = loadings.T

            # REPROJECT
            if reproject:
                if algorithm == 'PCA':
                    method = obj.transform

                    def post(a): return np.concatenate(a, axis=0)
                elif algorithm == 'ORPCA':
                    method = obj.project
                    obj.R = []

                    def post(a): return obj.finish()[4]
                elif algorithm == 'ONMF':
                    method = obj.project

                    def post(a): return np.concatenate(a, axis=1).T

                _map = map(lambda thing: method(thing),
                           self._block_iterator(
                               flat_signal=True,
                               get=get,
                               signal_mask=signal_mask,
                               navigation_mask=navigation_mask))
                H = []
                try:
                    for thing in progressbar(
                            _map, total=nblocks, desc='Project'):
                        H.append(thing)
                except KeyboardInterrupt:
                    pass
                loadings = post(H)

            if explained_variance is not None and \
                    explained_variance_ratio is None:
                explained_variance_ratio = \
                    explained_variance / explained_variance.sum()

            # RESHUFFLE "blocked" LOADINGS
            ndim = self.axes_manager.navigation_dimension
            if algorithm != "svd":  # Only needed for online algorithms
                try:
                    loadings = _reshuffle_mixed_blocks(
                        loadings,
                        ndim,
                        (output_dimension,),
                        nav_chunks).reshape((-1, output_dimension))
                except ValueError:
                    # In case the projection step was not finished, it's left
                    # as scrambled
                    pass
        finally:
            self.data = original_data

        target = self.learning_results
        target.decomposition_algorithm = algorithm
        target.output_dimension = output_dimension
        if algorithm != "svd":
            target._object = obj
        target.factors = factors
        target.loadings = loadings
        target.explained_variance = explained_variance
        target.explained_variance_ratio = explained_variance_ratio

        # Rescale the results if the noise was normalized
        if normalize_poissonian_noise is True:
            target.factors = target.factors * rbH.ravel()[:, np.newaxis]
            target.loadings = target.loadings * raG.ravel()[:, np.newaxis]