Esempio n. 1
0
    def test_returnXArray(self):
        cdo = Cdo()
        cdo.debug = DEBUG

        if not cdo.hasXarray:
          print("nothing testes for test_returnXArray because of missing xarray")
          return

        topo = cdo.topo(options='-f nc',returnXArray='topo')
        self.assertEqual(-1889,int(topo.mean()))
        self.assertEqual(259200,topo.count())

        bathy = cdo.setrtomiss(0,10000, input = " -topo" ,returnXArray='topo')
        self.assertEqual(-3385,int(bathy.mean()))
        self.assertEqual(173565,bathy.count())

        oro = cdo.setrtomiss(-10000,0,
            input = cdo.topo(options='-f nc'),returnXArray='topo')
        self.assertEqual(1142,int(oro.mean()))
        self.assertEqual(85567,oro.count())

        bathy = cdo.remapnn('r2x2',input = cdo.topo(options = '-f nc'), returnXArray = 'topo')
        self.assertEqual(-4298.0,bathy[0,0])
        self.assertEqual(-2669.0,bathy[0,1])

        ta = cdo.remapnn('r2x2',input = cdo.topo(options = '-f nc'))
        tb = cdo.subc(-2669.0,input = ta)
        withMask = cdo.div(input=ta+" "+tb,returnXArray='topo')
        from xarray import DataArray
        self.assertEqual(False,DataArray.to_masked_array(withMask).mask[0,0])
        self.assertEqual(False,DataArray.to_masked_array(withMask).mask[1,0])
        self.assertEqual(False,DataArray.to_masked_array(withMask).mask[1,1])
        self.assertEqual(True,DataArray.to_masked_array(withMask).mask[0,1])
Esempio n. 2
0
    def __init__(self, data,
                 coords=None,
                 dims=None,
                 name=None,
                 attrs=None,
                 encoding=None,
                 fastpath=False
                 ):

        if major_x_ver == 0 and minor_x_ver < 7:

            DataArray.__init__(self, data=data,
                               coords=coords,
                               dims=dims,
                               name=name,
                               attrs=attrs,
                               encoding=encoding,
                               # fastpath=fastpath
                               )
        else:
            DataArray.__init__(self, data=data,
                               coords=coords,
                               dims=dims,
                               name=name,
                               attrs=attrs,
                               encoding=encoding,
                               fastpath=fastpath
                               )
Esempio n. 3
0
    def setUp(self):
        a = easy_array((10, 15, 3, 2))
        darray = DataArray(a, dims=["y", "x", "col", "row"])
        darray.coords["col"] = np.array(["col" + str(x) for x in darray.coords["col"].values])
        darray.coords["row"] = np.array(["row" + str(x) for x in darray.coords["row"].values])

        self.darray = darray
Esempio n. 4
0
 def test_default_title(self):
     a = DataArray(easy_array((4, 3, 2)), dims=['a', 'b', 'c'])
     a.coords['c'] = [0, 1]
     a.coords['d'] = u'foo'
     self.plotfunc(a.isel(c=1))
     title = plt.gca().get_title()
     self.assertTrue('c = 1, d = foo' == title or 'd = foo, c = 1' == title)
Esempio n. 5
0
 def test_subplot_kws(self):
     a = easy_array((10, 15, 4))
     d = DataArray(a, dims=["y", "x", "z"])
     d.coords["z"] = list("abcd")
     g = d.plot(x="x", y="y", col="z", col_wrap=2, cmap="cool", subplot_kws=dict(axisbg="r"))
     for ax in g.axes.flat:
         self.assertEqual(ax.get_axis_bgcolor(), "r")
Esempio n. 6
0
    def get_dataset(self, key, info):
        """Load a dataset."""
        if self._channel != key.name:
            return

        logger.debug('Reading %s.', key.name)
        # FIXME: get this from MTD_MSIL1C.xml
        quantification_value = 10000.
        jp2 = glymur.Jp2k(self.filename)
        bitdepth = 0
        for seg in jp2.codestream.segment:
            try:
                bitdepth = max(bitdepth, seg.bitdepth[0])
            except AttributeError:
                pass

        jp2.dtype = (np.uint8 if bitdepth <= 8 else np.uint16)

        # Initialize the jp2 reader / doesn't work in a multi-threaded context.
        # jp2[0, 0]
        # data = da.from_array(jp2, chunks=CHUNK_SIZE) / quantification_value * 100

        data = da.from_delayed(delayed(jp2.read)(), jp2.shape, jp2.dtype)
        data = data.rechunk(CHUNK_SIZE) / quantification_value * 100

        proj = DataArray(data, dims=['y', 'x'])
        proj.attrs = info.copy()
        proj.attrs['units'] = '%'
        proj.attrs['platform_name'] = self.platform_name
        return proj
Esempio n. 7
0
def construct_dataarray(dim_num, dtype, contains_nan, dask):
    # dimnum <= 3
    rng = np.random.RandomState(0)
    shapes = [16, 8, 4][:dim_num]
    dims = ('x', 'y', 'z')[:dim_num]

    if np.issubdtype(dtype, np.floating):
        array = rng.randn(*shapes).astype(dtype)
    elif np.issubdtype(dtype, np.integer):
        array = rng.randint(0, 10, size=shapes).astype(dtype)
    elif np.issubdtype(dtype, np.bool_):
        array = rng.randint(0, 1, size=shapes).astype(dtype)
    elif dtype == str:
        array = rng.choice(['a', 'b', 'c', 'd'], size=shapes)
    else:
        raise ValueError
    da = DataArray(array, dims=dims, coords={'x': np.arange(16)}, name='da')

    if contains_nan:
        da = da.reindex(x=np.arange(20))
    if dask and has_dask:
        chunks = {d: 4 for d in dims}
        da = da.chunk(chunks)

    return da
Esempio n. 8
0
    def read(self):
        """

        :return: DataArray objects populated with data read from eeg files. The size of the output is
        number of channels x number of start offsets x number of time series points
        The corresponding DataArray axes are: 'channels', 'start_offsets', 'offsets'

        """

        eventdata, read_ok_mask = self.read_file(self.dataroot,self.channels,self.start_offsets,self.read_size)
        # multiply by the gain
        eventdata *= self.params_dict['gain']

        eventdata = DataArray(eventdata,
                              dims=[self.channel_name, 'start_offsets', 'offsets'],
                              coords={
                                  self.channel_name: self.channels,
                                  'start_offsets': self.start_offsets.copy(),
                                  'offsets': np.arange(self.read_size),
                                  'samplerate': self.params_dict['samplerate']

                              }
                              )

        from copy import deepcopy
        eventdata.attrs = deepcopy(self.params_dict)

        return eventdata, read_ok_mask
Esempio n. 9
0
 def test_expand_without_dims(self):
     from satpy.resample import NativeResampler
     import numpy as np
     import dask.array as da
     from xarray import DataArray
     from pyresample.geometry import AreaDefinition
     from pyresample.utils import proj4_str_to_dict
     ds1 = DataArray(da.zeros((100, 50), chunks=85))
     proj_dict = proj4_str_to_dict('+proj=lcc +datum=WGS84 +ellps=WGS84 '
                                   '+lon_0=-95. +lat_0=25 +lat_1=25 '
                                   '+units=m +no_defs')
     target = AreaDefinition(
         'test',
         'test',
         'test',
         proj_dict,
         x_size=100,
         y_size=200,
         area_extent=(-1000., -1500., 1000., 1500.),
     )
     # source geo def doesn't actually matter
     resampler = NativeResampler(None, target)
     new_arr = resampler.resample(ds1)
     self.assertEqual(new_arr.shape, (200, 100))
     new_arr2 = resampler.resample(ds1.compute())
     self.assertTrue(np.all(new_arr == new_arr2))
Esempio n. 10
0
 def setUp(self):
     self.values = np.random.randn(4, 6)
     self.data = da.from_array(self.values, chunks=(2, 2))
     self.eager_array = DataArray(self.values, coords={'x': range(4)},
                                  dims=('x', 'y'), name='foo')
     self.lazy_array = DataArray(self.data, coords={'x': range(4)},
                                 dims=('x', 'y'), name='foo')
Esempio n. 11
0
def test_decode_cf_time_bounds():

    da = DataArray(np.arange(6, dtype='int64').reshape((3, 2)),
                   coords={'time': [1, 2, 3]},
                   dims=('time', 'nbnd'), name='time_bnds')

    attrs = {'units': 'days since 2001-01',
             'calendar': 'standard',
             'bounds': 'time_bnds'}

    ds = da.to_dataset()
    ds['time'].attrs.update(attrs)
    _update_bounds_attributes(ds.variables)
    assert ds.variables['time_bnds'].attrs == {'units': 'days since 2001-01',
                                               'calendar': 'standard'}
    dsc = decode_cf(ds)
    assert dsc.time_bnds.dtype == np.dtype('M8[ns]')
    dsc = decode_cf(ds, decode_times=False)
    assert dsc.time_bnds.dtype == np.dtype('int64')

    # Do not overwrite existing attrs
    ds = da.to_dataset()
    ds['time'].attrs.update(attrs)
    bnd_attr = {'units': 'hours since 2001-01', 'calendar': 'noleap'}
    ds['time_bnds'].attrs.update(bnd_attr)
    _update_bounds_attributes(ds.variables)
    assert ds.variables['time_bnds'].attrs == bnd_attr

    # If bounds variable not available do not complain
    ds = da.to_dataset()
    ds['time'].attrs.update(attrs)
    ds['time'].attrs['bounds'] = 'fake_var'
    _update_bounds_attributes(ds.variables)
Esempio n. 12
0
def loopread(tcoutput, size_record, ncol, n_alt, size_head, size_data_record, tReq):
    tcoutput = Path(tcoutput).expanduser()
    n_t = tcoutput.stat().st_size // size_record // d_bytes

    chi = empty(n_t, float)
    t = empty(n_t, datetime)

    plasmaparam = DataArray(data=empty((n_t, n_alt, 4)), dims=["time", "alt_km", "isrparam"])
    iono = DataArray(data=empty((n_t, n_alt, 22)), dims=["time", "alt_km", "param"])

    with tcoutput.open("rb") as f:  # reset to beginning
        for i in range(n_t):
            iono[i, ...], chi[i], t[i], alt, plasmaparam[i, ...] = data_tra(
                f, size_record, ncol, n_alt, size_head, size_data_record
            )
        # FIXME isn't there a way to inherit coordinates like Pandas?
        iono = iono.assign_coords(time=t, param=PARAM, alt_km=alt)
        plasmaparam = plasmaparam.assign_coords(time=t, isrparam=ISRPARAM, alt_km=alt)
    #%% handle time request -- will return Dataframe if tReq, else returns Panel of all times
    if tReq is not None:  # have to qualify this since picktime default gives last time as fallback
        tUsedInd = picktime(iono.time, tReq, None)[0]
        if tUsedInd is not None:  # in case ind is 0
            iono = iono[tUsedInd, ...]
            plasmaparam = plasmaparam[tUsedInd, ...]

    return iono, chi, plasmaparam
Esempio n. 13
0
def test_mask_valid_data():
    from xarray import DataArray, Dataset
    import numpy as np
    test_attrs = {
        'one': 1,
        'nodata': -999,
    }

    expected_data_array = DataArray(np.array([[1., np.nan, np.nan], [2, 3, np.nan], [np.nan, np.nan, np.nan]],
                                             dtype='float'),
                                    attrs=test_attrs, name='var_one')

    data_array = DataArray([[1, -999, -999], [2, 3, -999], [-999, -999, -999]], attrs=test_attrs)
    dataset = Dataset(data_vars={'var_one': data_array}, attrs={'ds_attr': 'still here'})

    # Make sure test is actually changing something
    assert not data_array.equals(expected_data_array)

    output_ds = mask_valid_data(dataset, keep_attrs=True)
    assert output_ds.attrs['ds_attr'] == 'still here'
    assert output_ds.data_vars['var_one'].equals(expected_data_array)
    assert output_ds.data_vars['var_one'].attrs['one'] == 1

    output_da = mask_valid_data(data_array, keep_attrs=True)
    assert output_da.equals(expected_data_array)
    assert output_da.attrs['one'] == 1
Esempio n. 14
0
def cyclic_dataarray(da, coord='lon'):
    """ Add a cyclic coordinate point to a DataArray along a specified
    named coordinate dimension.

    >>> from xarray import DataArray
    >>> data = DataArray([[1, 2, 3], [4, 5, 6]],
    ...                      coords={'x': [1, 2], 'y': range(3)},
    ...                      dims=['x', 'y'])
    >>> cd = cyclic_dataarray(data, 'y')
    >>> print cd.data
    array([[1, 2, 3, 1],
           [4, 5, 6, 4]])
    """
    assert isinstance(da, DataArray)

    lon_idx = da.dims.index(coord)
    cyclic_data, cyclic_coord = add_cyclic_point(da.values,
                                                 coord=da.coords[coord],
                                                 axis=lon_idx)

    # Copy and add the cyclic coordinate and data
    new_coords = dict(da.coords)
    new_coords[coord] = cyclic_coord
    new_values = cyclic_data

    new_da = DataArray(new_values, dims=da.dims, coords=new_coords)

    # Copy the attributes for the re-constructed data and coords
    for att, val in da.attrs.items():
        new_da.attrs[att] = val
    for c in da.coords:
        for att in da.coords[c].attrs:
            new_da.coords[c].attrs[att] = da.coords[c].attrs[att]

    return new_da
Esempio n. 15
0
 def test_subplot_kws(self):
     a = easy_array((10, 15, 4))
     d = DataArray(a, dims=['y', 'x', 'z'])
     d.coords['z'] = list('abcd')
     g = d.plot(x='x', y='y', col='z', col_wrap=2, cmap='cool',
                subplot_kws=dict(axisbg='r'))
     for ax in g.axes.flat:
         self.assertEqual(ax.get_axis_bgcolor(), 'r')
Esempio n. 16
0
 def test_datetime_dimension(self):
     nrow = 3
     ncol = 4
     time = pd.date_range("2000-01-01", periods=nrow)
     a = DataArray(easy_array((nrow, ncol)), coords=[("time", time), ("y", range(ncol))])
     a.plot()
     ax = plt.gca()
     self.assertTrue(ax.has_data())
Esempio n. 17
0
 def test_stack(self):
     data = da.random.normal(size=(2, 3, 4), chunks=(1, 3, 4))
     arr = DataArray(data, dims=('w', 'x', 'y'))
     stacked = arr.stack(z=('x', 'y'))
     z = pd.MultiIndex.from_product([np.arange(3), np.arange(4)],
                                    names=['x', 'y'])
     expected = DataArray(data.reshape(2, -1), {'z': z}, dims=['w', 'z'])
     assert stacked.data.chunks == expected.data.chunks
     self.assertLazyAndEqual(expected, stacked)
Esempio n. 18
0
    def setUp(self):
        a = easy_array((10, 15, 3, 2))
        darray = DataArray(a, dims=['y', 'x', 'col', 'row'])
        darray.coords['col'] = np.array(['col' + str(x) for x in
                                         darray.coords['col'].values])
        darray.coords['row'] = np.array(['row' + str(x) for x in
                                         darray.coords['row'].values])

        self.darray = darray
Esempio n. 19
0
    def test_convenient_facetgrid_4d(self):
        a = easy_array((10, 15, 2, 3))
        d = DataArray(a, dims=['y', 'x', 'columns', 'rows'])
        g = d.plot(x='x', y='y', col='columns', row='rows')

        self.assertArrayEqual(g.axes.shape, [3, 2])
        for ax in g.axes.flat:
            self.assertTrue(ax.has_data())

        with self.assertRaisesRegexp(ValueError, '[Ff]acet'):
            d.plot(x='x', y='y', col='columns', ax=plt.gca())
Esempio n. 20
0
    def setUp(self):
        '''
        Create a DataArray with a time-axis that contains datetime objects.
        '''
        month = np.arange(1, 13, 1)
        data = np.sin(2 * np.pi * month / 12.0)

        darray = DataArray(data, dims=['time'])
        darray.coords['time'] = np.array([datetime(2017, m, 1) for m in month])

        self.darray = darray
Esempio n. 21
0
    def test_convenient_facetgrid_4d(self):
        a = easy_array((10, 15, 2, 3))
        d = DataArray(a, dims=["y", "x", "columns", "rows"])
        g = d.plot(x="x", y="y", col="columns", row="rows")

        self.assertArrayEqual(g.axes.shape, [3, 2])
        for ax in g.axes.flat:
            self.assertTrue(ax.has_data())

        with self.assertRaisesRegexp(ValueError, "[Ff]acet"):
            d.plot(x="x", y="y", col="columns", ax=plt.gca())
Esempio n. 22
0
 def setUp(self):
     da = DataArray(easy_array((10, 15), start=-1), dims=["y", "x"])
     # add 2d coords
     ds = da.to_dataset(name="testvar")
     x, y = np.meshgrid(da.x.values, da.y.values)
     ds["x2d"] = DataArray(x, dims=["y", "x"])
     ds["y2d"] = DataArray(y, dims=["y", "x"])
     ds.set_coords(["x2d", "y2d"], inplace=True)
     # set darray and plot method
     self.darray = ds.testvar
     self.plotmethod = getattr(self.darray.plot, self.plotfunc.__name__)
Esempio n. 23
0
 def test_dataarray_pickle(self):
     # Test that pickling/unpickling does not convert the dask
     # backend to numpy
     a1 = DataArray(build_dask_array())
     a1.compute()
     self.assertFalse(a1._in_memory)
     self.assertEquals(kernel_call_count, 1)
     a2 = pickle.loads(pickle.dumps(a1))
     self.assertEquals(kernel_call_count, 1)
     self.assertDataArrayIdentical(a1, a2)
     self.assertFalse(a1._in_memory)
     self.assertFalse(a2._in_memory)
Esempio n. 24
0
 def test_subplot_kws(self):
     a = easy_array((10, 15, 4))
     d = DataArray(a, dims=['y', 'x', 'z'])
     d.coords['z'] = list('abcd')
     g = d.plot(x='x', y='y', col='z', col_wrap=2, cmap='cool',
                subplot_kws=dict(axisbg='r'))
     for ax in g.axes.flat:
         try:
             # mpl V2
             self.assertEqual(ax.get_facecolor()[0:3],
                              mpl.colors.to_rgb('r'))
         except AttributeError:
             self.assertEqual(ax.get_axis_bgcolor(), 'r')
Esempio n. 25
0
def sumplasmaline(fn,P):
    spec,freq = readplasmaline(fn,P)
    assert isinstance(spec,DataArray) and spec.ndim==4
    assert isinstance(P['flim'][0],float)

    z = spec.srng
    specsum = DataArray(index=spec.items,columns=spec.labels)

    zind = (P['zlim'][0] <= z) & (z <= P['zlim'][1])

    for s in spec:
        find = (P['flim'][0] <= absolute(freq[s]/1.e6)) & (absolute(freq[s]/1.e6) < P['flim'][1])
        specsum.loc[:,s] = spec.loc[:,:,zind,find].sum(axis=3).sum(axis=2) #FIXME .sum(dim=)

    return specsum
Esempio n. 26
0
    def test_convenient_facetgrid(self):
        a = easy_array((10, 15, 4))
        d = DataArray(a, dims=['y', 'x', 'z'])
        d.coords['z'] = list('abcd')
        g = d.plot(x='x', y='y', col='z', col_wrap=2, cmap='cool')

        self.assertArrayEqual(g.axes.shape, [2, 2])
        for ax in g.axes.flat:
            self.assertTrue(ax.has_data())

        with self.assertRaisesRegexp(ValueError, '[Ff]acet'):
            d.plot(x='x', y='y', col='z', ax=plt.gca())

        with self.assertRaisesRegexp(ValueError, '[Ff]acet'):
            d[0].plot(x='x', y='y', col='z', ax=plt.gca())
Esempio n. 27
0
    def test_convenient_facetgrid(self):
        a = easy_array((10, 15, 4))
        d = DataArray(a, dims=["y", "x", "z"])
        d.coords["z"] = list("abcd")
        g = d.plot(x="x", y="y", col="z", col_wrap=2, cmap="cool")

        self.assertArrayEqual(g.axes.shape, [2, 2])
        for ax in g.axes.flat:
            self.assertTrue(ax.has_data())

        with self.assertRaisesRegexp(ValueError, "[Ff]acet"):
            d.plot(x="x", y="y", col="z", ax=plt.gca())

        with self.assertRaisesRegexp(ValueError, "[Ff]acet"):
            d[0].plot(x="x", y="y", col="z", ax=plt.gca())
Esempio n. 28
0
def unstack_cat(da: xr.DataArray, dim, level=0):
    """Unstack DataArray expanding to dataset along a given level

    Parameters
    ----------
    da
    dim
    level

    Returns
    -------
    xr.Dataset

    """
    if not isinstance(da, xr.DataArray):
        raise ValueError("da must be a DataArray object")

    idx = da.indexes[dim]
    if not isinstance(idx, pd.MultiIndex):
        raise ValueError(f"{dim} is not a stacked coordinate")
    variables = idx.levels[level]

    # pull variables out of datarray
    data_dict = {}
    for k in variables:
        data_dict[k] = da.sel(variable=k).squeeze(drop=True)

    # unstacked dataset
    return xr.Dataset(data_dict)
Esempio n. 29
0
class DoNotCooccur(HACConstraint):
    """Do NOT merge co-occurring face tracks"""

    def initialize(self, parent=None):

        current_state = parent.current_state
        clusters = [cluster for cluster in current_state.labels()]
        n_clusters = len(clusters)

        self._cooccur = DataArray(
            np.zeros((n_clusters, n_clusters)),
            [('i', clusters), ('j', clusters)])

        for (segment1, track1), (segment2, track2) in current_state.co_iter(current_state):
            i = current_state[segment1, track1]
            j = current_state[segment2, track2]
            if i == j:
                continue
            self._cooccur.loc[i, j] = 1
            self._cooccur.loc[j, i] = 1

    def mergeable(self, clusters, parent=None):
        return self._cooccur.loc[clusters, clusters].sum().item() == 0.

    def update(self, merged_clusters, new_cluster, parent=None):

        # clusters that will be removed
        _clusters = list(set(merged_clusters) - set([new_cluster]))

        # update coooccurrence matrix
        self._cooccur.loc[new_cluster, :] += self._cooccur.loc[_clusters, :].sum(dim='i')
        self._cooccur.loc[:, new_cluster] += self._cooccur.loc[:, _clusters].sum(dim='j')

        # remove clusters
        self._cooccur = self._cooccur.drop(_clusters, dim='i').drop(_clusters, dim='j')
Esempio n. 30
0
def centered_to_right(f: xr.DataArray, block_size, dim, boundary='wrap'):
    """Move centered variable to the right interface

    Parameters
    ----------
    f : xr.DataArray
    block_size : size of the coarse graining block
    dim : str
    boundary : str, optional
        A boundary condition which is passed to `isel_bc`

    Returns
    -------
    interface : xr.DataArray
        The value of f along the right interfaces of the coarse-grain blocks
    """
    new_coord = get_center_coords(f[dim].values, block_size)
    n = f.shape[f.get_axis_num(dim)]

    left_idx = slice(block_size, n+1, block_size)
    right_idx = slice(block_size-1, n, block_size)

    left = isel_bc(f, left_idx, dim, boundary=boundary)
    left = left.assign_coords(**{dim: new_coord})
    right = isel_bc(f, right_idx, dim, boundary=boundary)
    right = right.assign_coords(**{dim: new_coord})

    return (left+right)/2
Esempio n. 31
0
class TestDataArrayAndDataset(DaskTestCase):
    def assertLazyAndIdentical(self, expected, actual):
        self.assertLazyAnd(expected, actual, self.assertDataArrayIdentical)

    def assertLazyAndAllClose(self, expected, actual):
        self.assertLazyAnd(expected, actual, self.assertDataArrayAllClose)

    def assertLazyAndEqual(self, expected, actual):
        self.assertLazyAnd(expected, actual, self.assertDataArrayEqual)

    def setUp(self):
        self.values = np.random.randn(4, 6)
        self.data = da.from_array(self.values, chunks=(2, 2))
        self.eager_array = DataArray(self.values,
                                     coords={'x': range(4)},
                                     dims=('x', 'y'),
                                     name='foo')
        self.lazy_array = DataArray(self.data,
                                    coords={'x': range(4)},
                                    dims=('x', 'y'),
                                    name='foo')

    def test_rechunk(self):
        chunked = self.eager_array.chunk({'x': 2}).chunk({'y': 2})
        self.assertEqual(chunked.chunks, ((2, ) * 2, (2, ) * 3))
        self.assertLazyAndIdentical(self.lazy_array, chunked)

    def test_new_chunk(self):
        chunked = self.eager_array.chunk()
        self.assertTrue(chunked.data.name.startswith('xarray-<this-array>'))

    def test_lazy_dataset(self):
        lazy_ds = Dataset({'foo': (('x', 'y'), self.data)})
        self.assertIsInstance(lazy_ds.foo.variable.data, da.Array)

    def test_lazy_array(self):
        u = self.eager_array
        v = self.lazy_array

        self.assertLazyAndAllClose(u, v)
        self.assertLazyAndAllClose(-u, -v)
        self.assertLazyAndAllClose(u.T, v.T)
        self.assertLazyAndAllClose(u.mean(), v.mean())
        self.assertLazyAndAllClose(1 + u, 1 + v)

        actual = xr.concat([v[:2], v[2:]], 'x')
        self.assertLazyAndAllClose(u, actual)

    @pytest.mark.skipif(LooseVersion(dask.__version__) <= '0.15.4',
                        reason='Need dask 0.16 for new interface')
    def test_compute(self):
        u = self.eager_array
        v = self.lazy_array

        assert dask.is_dask_collection(v)
        (v2, ) = dask.compute(v + 1)
        assert not dask.is_dask_collection(v2)

        assert ((u + 1).data == v2.data).all()

    @pytest.mark.skipif(LooseVersion(dask.__version__) <= '0.15.4',
                        reason='Need dask 0.16 for new interface')
    def test_persist(self):
        u = self.eager_array
        v = self.lazy_array + 1

        (v2, ) = dask.persist(v)
        assert v is not v2
        assert len(v2.__dask_graph__()) < len(v.__dask_graph__())
        assert v2.__dask_keys__() == v.__dask_keys__()
        assert dask.is_dask_collection(v)
        assert dask.is_dask_collection(v2)

        self.assertLazyAndAllClose(u + 1, v)
        self.assertLazyAndAllClose(u + 1, v2)

    def test_concat_loads_variables(self):
        # Test that concat() computes not-in-memory variables at most once
        # and loads them in the output, while leaving the input unaltered.
        d1 = build_dask_array('d1')
        c1 = build_dask_array('c1')
        d2 = build_dask_array('d2')
        c2 = build_dask_array('c2')
        d3 = build_dask_array('d3')
        c3 = build_dask_array('c3')
        # Note: c is a non-index coord.
        # Index coords are loaded by IndexVariable.__init__.
        ds1 = Dataset(data_vars={'d': ('x', d1)}, coords={'c': ('x', c1)})
        ds2 = Dataset(data_vars={'d': ('x', d2)}, coords={'c': ('x', c2)})
        ds3 = Dataset(data_vars={'d': ('x', d3)}, coords={'c': ('x', c3)})

        assert kernel_call_count == 0
        out = xr.concat([ds1, ds2, ds3],
                        dim='n',
                        data_vars='different',
                        coords='different')
        # each kernel is computed exactly once
        assert kernel_call_count == 6
        # variables are loaded in the output
        assert isinstance(out['d'].data, np.ndarray)
        assert isinstance(out['c'].data, np.ndarray)

        out = xr.concat([ds1, ds2, ds3],
                        dim='n',
                        data_vars='all',
                        coords='all')
        # no extra kernel calls
        assert kernel_call_count == 6
        assert isinstance(out['d'].data, dask.array.Array)
        assert isinstance(out['c'].data, dask.array.Array)

        out = xr.concat([ds1, ds2, ds3],
                        dim='n',
                        data_vars=['d'],
                        coords=['c'])
        # no extra kernel calls
        assert kernel_call_count == 6
        assert isinstance(out['d'].data, dask.array.Array)
        assert isinstance(out['c'].data, dask.array.Array)

        out = xr.concat([ds1, ds2, ds3], dim='n', data_vars=[], coords=[])
        # variables are loaded once as we are validing that they're identical
        assert kernel_call_count == 12
        assert isinstance(out['d'].data, np.ndarray)
        assert isinstance(out['c'].data, np.ndarray)

        out = xr.concat([ds1, ds2, ds3],
                        dim='n',
                        data_vars='different',
                        coords='different',
                        compat='identical')
        # compat=identical doesn't do any more kernel calls than compat=equals
        assert kernel_call_count == 18
        assert isinstance(out['d'].data, np.ndarray)
        assert isinstance(out['c'].data, np.ndarray)

        # When the test for different turns true halfway through,
        # stop computing variables as it would not have any benefit
        ds4 = Dataset(data_vars={'d': ('x', [2.0])},
                      coords={'c': ('x', [2.0])})
        out = xr.concat([ds1, ds2, ds4, ds3],
                        dim='n',
                        data_vars='different',
                        coords='different')
        # the variables of ds1 and ds2 were computed, but those of ds3 didn't
        assert kernel_call_count == 22
        assert isinstance(out['d'].data, dask.array.Array)
        assert isinstance(out['c'].data, dask.array.Array)
        # the data of ds1 and ds2 was loaded into numpy and then
        # concatenated to the data of ds3. Thus, only ds3 is computed now.
        out.compute()
        assert kernel_call_count == 24

        # Finally, test that riginals are unaltered
        assert ds1['d'].data is d1
        assert ds1['c'].data is c1
        assert ds2['d'].data is d2
        assert ds2['c'].data is c2
        assert ds3['d'].data is d3
        assert ds3['c'].data is c3

    def test_groupby(self):
        if LooseVersion(dask.__version__) == LooseVersion('0.15.3'):
            pytest.xfail('upstream bug in dask: '
                         'https://github.com/dask/dask/issues/2718')

        u = self.eager_array
        v = self.lazy_array

        expected = u.groupby('x').mean()
        actual = v.groupby('x').mean()
        self.assertLazyAndAllClose(expected, actual)

    def test_groupby_first(self):
        u = self.eager_array
        v = self.lazy_array

        for coords in [u.coords, v.coords]:
            coords['ab'] = ('x', ['a', 'a', 'b', 'b'])
        with raises_regex(NotImplementedError, 'dask'):
            v.groupby('ab').first()
        expected = u.groupby('ab').first()
        actual = v.groupby('ab').first(skipna=False)
        self.assertLazyAndAllClose(expected, actual)

    def test_reindex(self):
        u = self.eager_array.assign_coords(y=range(6))
        v = self.lazy_array.assign_coords(y=range(6))

        for kwargs in [{
                'x': [2, 3, 4]
        }, {
                'x': [1, 100, 2, 101, 3]
        }, {
                'x': [2.5, 3, 3.5],
                'y': [2, 2.5, 3]
        }]:
            expected = u.reindex(**kwargs)
            actual = v.reindex(**kwargs)
            self.assertLazyAndAllClose(expected, actual)

    def test_to_dataset_roundtrip(self):
        u = self.eager_array
        v = self.lazy_array

        expected = u.assign_coords(x=u['x'])
        self.assertLazyAndEqual(expected, v.to_dataset('x').to_array('x'))

    def test_merge(self):
        def duplicate_and_merge(array):
            return xr.merge([array, array.rename('bar')]).to_array()

        expected = duplicate_and_merge(self.eager_array)
        actual = duplicate_and_merge(self.lazy_array)
        self.assertLazyAndEqual(expected, actual)

    def test_ufuncs(self):
        u = self.eager_array
        v = self.lazy_array
        self.assertLazyAndAllClose(np.sin(u), xu.sin(v))

    def test_where_dispatching(self):
        a = np.arange(10)
        b = a > 3
        x = da.from_array(a, 5)
        y = da.from_array(b, 5)
        expected = DataArray(a).where(b)
        self.assertLazyAndEqual(expected, DataArray(a).where(y))
        self.assertLazyAndEqual(expected, DataArray(x).where(b))
        self.assertLazyAndEqual(expected, DataArray(x).where(y))

    def test_simultaneous_compute(self):
        ds = Dataset({'foo': ('x', range(5)), 'bar': ('x', range(5))}).chunk()

        count = [0]

        def counting_get(*args, **kwargs):
            count[0] += 1
            return dask.get(*args, **kwargs)

        with dask.set_options(get=counting_get):
            ds.load()
        self.assertEqual(count[0], 1)

    def test_stack(self):
        data = da.random.normal(size=(2, 3, 4), chunks=(1, 3, 4))
        arr = DataArray(data, dims=('w', 'x', 'y'))
        stacked = arr.stack(z=('x', 'y'))
        z = pd.MultiIndex.from_product(
            [np.arange(3), np.arange(4)], names=['x', 'y'])
        expected = DataArray(data.reshape(2, -1), {'z': z}, dims=['w', 'z'])
        assert stacked.data.chunks == expected.data.chunks
        self.assertLazyAndEqual(expected, stacked)

    def test_dot(self):
        eager = self.eager_array.dot(self.eager_array[0])
        lazy = self.lazy_array.dot(self.lazy_array[0])
        self.assertLazyAndAllClose(eager, lazy)

    def test_dataarray_repr(self):
        # Test that __repr__ converts the dask backend to numpy
        # in neither the data variable nor the non-index coords
        data = build_dask_array('data')
        nonindex_coord = build_dask_array('coord')
        a = DataArray(data, dims=['x'], coords={'y': ('x', nonindex_coord)})
        expected = dedent("""\
        <xarray.DataArray 'data' (x: 1)>
        dask.array<shape=(1,), dtype=int64, chunksize=(1,)>
        Coordinates:
            y        (x) int64 dask.array<shape=(1,), chunksize=(1,)>
        Dimensions without coordinates: x""")
        self.assertEqual(expected, repr(a))
        assert kernel_call_count == 0

    def test_dataset_repr(self):
        # Test that pickling/unpickling converts the dask backend
        # to numpy in neither the data variables nor the non-index coords
        data = build_dask_array('data')
        nonindex_coord = build_dask_array('coord')
        ds = Dataset(data_vars={'a': ('x', data)},
                     coords={'y': ('x', nonindex_coord)})
        expected = dedent("""\
        <xarray.Dataset>
        Dimensions:  (x: 1)
        Coordinates:
            y        (x) int64 dask.array<shape=(1,), chunksize=(1,)>
        Dimensions without coordinates: x
        Data variables:
            a        (x) int64 dask.array<shape=(1,), chunksize=(1,)>""")
        self.assertEqual(expected, repr(ds))
        assert kernel_call_count == 0

    def test_dataarray_pickle(self):
        # Test that pickling/unpickling converts the dask backend
        # to numpy in neither the data variable nor the non-index coords
        data = build_dask_array('data')
        nonindex_coord = build_dask_array('coord')
        a1 = DataArray(data, dims=['x'], coords={'y': ('x', nonindex_coord)})
        a1.compute()
        self.assertFalse(a1._in_memory)
        self.assertFalse(a1.coords['y']._in_memory)
        assert kernel_call_count == 2
        a2 = pickle.loads(pickle.dumps(a1))
        assert kernel_call_count == 2
        self.assertDataArrayIdentical(a1, a2)
        self.assertFalse(a1._in_memory)
        self.assertFalse(a2._in_memory)
        self.assertFalse(a1.coords['y']._in_memory)
        self.assertFalse(a2.coords['y']._in_memory)

    def test_dataset_pickle(self):
        # Test that pickling/unpickling converts the dask backend
        # to numpy in neither the data variables nor the non-index coords
        data = build_dask_array('data')
        nonindex_coord = build_dask_array('coord')
        ds1 = Dataset(data_vars={'a': ('x', data)},
                      coords={'y': ('x', nonindex_coord)})
        ds1.compute()
        self.assertFalse(ds1['a']._in_memory)
        self.assertFalse(ds1['y']._in_memory)
        assert kernel_call_count == 2
        ds2 = pickle.loads(pickle.dumps(ds1))
        assert kernel_call_count == 2
        self.assertDatasetIdentical(ds1, ds2)
        self.assertFalse(ds1['a']._in_memory)
        self.assertFalse(ds2['a']._in_memory)
        self.assertFalse(ds1['y']._in_memory)
        self.assertFalse(ds2['y']._in_memory)

    def test_dataarray_getattr(self):
        # ipython/jupyter does a long list of getattr() calls to when trying to
        # represent an object.
        # Make sure we're not accidentally computing dask variables.
        data = build_dask_array('data')
        nonindex_coord = build_dask_array('coord')
        a = DataArray(data, dims=['x'], coords={'y': ('x', nonindex_coord)})
        with suppress(AttributeError):
            getattr(a, 'NOTEXIST')
        assert kernel_call_count == 0

    def test_dataset_getattr(self):
        # Test that pickling/unpickling converts the dask backend
        # to numpy in neither the data variables nor the non-index coords
        data = build_dask_array('data')
        nonindex_coord = build_dask_array('coord')
        ds = Dataset(data_vars={'a': ('x', data)},
                     coords={'y': ('x', nonindex_coord)})
        with suppress(AttributeError):
            getattr(ds, 'NOTEXIST')
        assert kernel_call_count == 0

    def test_values(self):
        # Test that invoking the values property does not convert the dask
        # backend to numpy
        a = DataArray([1, 2]).chunk()
        self.assertFalse(a._in_memory)
        assert a.values.tolist() == [1, 2]
        self.assertFalse(a._in_memory)

    def test_from_dask_variable(self):
        # Test array creation from Variable with dask backend.
        # This is used e.g. in broadcast()
        a = DataArray(self.lazy_array.variable,
                      coords={'x': range(4)},
                      name='foo')
        self.assertLazyAndIdentical(self.lazy_array, a)
Esempio n. 32
0
def ingest_NXarpes(paths):
    assert len(paths) == 1
    path = paths[0]

    f = fits.open(path)

    frame_count = f[1].data.shape[0]
    data = np.stack([f[1].data[i][-1] for i in range(frame_count)])
    data = data.reshape((f[0].header["N_0_0"], f[0].header["N_0_1"],
                         data.shape[1], data.shape[2]))
    energy = np.arange(f[0].header["SFSE_0"], f[0].header["SFEE_0"],
                       1. / f[0].header["SFPEV_0"] * f[0].header["SFBE0"])
    sample_x = np.linspace(f[0].header["ST_0_0"], f[0].header["EN_0_0"],
                           f[0].header["N_0_0"])
    sample_y = np.linspace(f[0].header["ST_0_1"], f[0].header["EN_0_1"],
                           f[0].header["N_0_1"])
    unknown_axis_coords = np.arange(data.shape[2])

    dim0 = f"{f[0].header['NM_0_0']} ({f[0].header['UN_0_0']})"
    dim1 = f"{f[0].header['NM_0_1']} ({f[0].header['UN_0_1']})"

    # TODO: length mismatch between energy shape and data shape[-1] (energy).
    # is this occurring because of arange and floats (see arange's documentation on its return value)
    if len(energy) != data.shape[-1]:
        energy = energy[:-1]

    xarray = DataArray(
        data,
        dims=[dim1, dim0, ANGLE_FIELD, ENERGY_FIELD],
        coords=[sample_y, sample_x, unknown_axis_coords, energy])
    # dask_data = da.from_array(xarray)

    # Compose run start
    run_bundle = event_model.compose_run(
    )  # type: event_model.ComposeRunBundle
    start_doc = run_bundle.start_doc
    start_doc["sample_name"] = Path(paths[0]).resolve().stem
    start_doc["projections"] = projections
    yield 'start', start_doc

    # Compose descriptor
    source = 'nxSTXM'
    frame_data_keys = {
        'raw': {
            'source': source,
            'dtype': 'number',
            'dims': xarray.dims,
            # 'coords': [energy, sample_y, sample_x],
            'shape': data.shape
        },
        ENERGY_FIELD: {
            'source': source,
            'dtype': 'number',
            'shape': energy.shape
        },
        dim0: {
            'source': source,
            'dtype': 'number',
            'shape': sample_x.shape
        },
        dim1: {
            'source': source,
            'dtype': 'number',
            'shape': sample_y.shape
        },
        ANGLE_FIELD: {
            'source': source,
            'dtype': 'number',
            'shape': unknown_axis_coords.shape
        }
    }
    frame_stream_name = 'primary'
    frame_stream_bundle = run_bundle.compose_descriptor(
        data_keys=frame_data_keys,
        name=frame_stream_name,
        # configuration=_metadata(path)
    )
    yield 'descriptor', frame_stream_bundle.descriptor_doc

    # NOTE: Resource document may be meaningful in the future. For transient access it is not useful
    # # Compose resource
    # resource = run_bundle.compose_resource(root=Path(path).root, resource_path=path, spec='NCEM_DM', resource_kwargs={})
    # yield 'resource', resource.resource_doc

    # Compose datum_page
    # z_indices, t_indices = zip(*itertools.product(z_indices, t_indices))
    # datum_page_doc = resource.compose_datum_page(datum_kwargs={'index_z': list(z_indices), 'index_t': list(t_indices)})
    # datum_ids = datum_page_doc['datum_id']
    # yield 'datum_page', datum_page_doc

    yield 'event', frame_stream_bundle.compose_event(data={
        'raw':
        xarray,
        ENERGY_FIELD:
        energy,
        dim0:
        sample_x,
        dim1:
        sample_y,
        ANGLE_FIELD:
        unknown_axis_coords
    },
                                                     timestamps={
                                                         'raw': time.time(),
                                                         ENERGY_FIELD:
                                                         time.time(),
                                                         dim0: time.time(),
                                                         dim1: time.time(),
                                                         ANGLE_FIELD:
                                                         time.time()
                                                     })

    yield 'stop', run_bundle.compose_stop()
Esempio n. 33
0
 def test_nonnumeric_index_raises_typeerror(self):
     a = DataArray(easy_array((3, 2)), coords=[['a', 'b', 'c'], ['d', 'e']])
     with self.assertRaisesRegexp(TypeError, r'[Pp]lot'):
         self.plotfunc(a)
Esempio n. 34
0
 def test_2d_before_squeeze(self):
     a = DataArray(easy_array((1, 5)))
     a.plot()
Esempio n. 35
0
 def setUp(self):
     self.darray = DataArray(easy_array((2, 3, 4)))
Esempio n. 36
0
def ensembles2dataset_dask(ensdict, ncfpath, dsattrs={}, chunks=10,
                           verbose=True, print_every=1000):
    """
    Convert a dictionary of ensembles into an xarray Dataset object
    using dask.delayed to keep memory usage feasible.
    """
    mms2ms = 1e-3
    n=0
    # fbadens = np.array(ensdict_aux)==None
    # nt = len(ensdict) - np.sum(fbadens)
    # embed()

    ensdict0 = None
    while ensdict0 is None:
        ensdict0 = ensdict[n].compute()
        n+=1
    nz = ensdict0['fixed_leader_janus']['number_of_cells']

    fixj = ensdict0['fixed_leader_janus'].compute()
    fix5 = ensdict0['fixed_leader_beam5'].compute()

    # Add ping offset to get beam 5's timestamps.
    dt5 = fix5['ping_offset_time'] # In milliseconds.
    dt5 = np.array(Timedelta(dt5, unit='ms'))

    th = fixj['beam_angle']
    assert th==25 # Always 25 degrees.
    th = th*np.pi/180.
    Cth = np.cos(th)

    # Construct along-beam/vertical axes.
    cm2m = 1e-2
    r1janus = fixj['bin_1_distance']*cm2m
    r1b5 = fix5['bin_1_distance']*cm2m
    ncj = fixj['number_of_cells']
    nc5 = fix5['number_of_cells']
    lcj = fixj['depth_cell_length']*cm2m
    lc5 = fix5['depth_cell_length']*cm2m
    Lj = ncj*lcj # Distance from center of bin 1 to the center of last bin (Janus).
    L5 = nc5*lc5 # Distance from center of bin 1 to the center of last bin (beam 5).

    rb = r1janus + np.arange(0, Lj, lcj) # Distance from xducer head
                                         # (Janus).
    zab = Cth*rb                         # Vertical distance from xducer head
                                         # (Janus).
    zab5 = r1b5 + np.arange(0, L5, lc5)  # Distance from xducer head, also
                                         # depth for the vertical beam.

    rb = IndexVariable('z', rb, attrs={'units':'meters', 'long_name':"along-beam distance from the xducer's face to the center of the bins, for beams 1-4 (Janus)"})
    zab = IndexVariable('z', zab, attrs={'units':'meters', 'long_name':"vertical distance from the instrument's head to the center of the bins, for beams 1-4 (Janus)"})
    zab5 = IndexVariable('z5', zab5, attrs={'units':'meters', 'long_name':"vertical distance from xducer face to the center of the bins, for beam 5 (vertical)"})

    ensdict = from_sequence(ensdict)
    tjanus = ensdict.map_partitions(_alloc_timestamp_parts)
    t5 = _addtarr(tjanus, dt5)

    if verbose: print("Unpacking timestamps.")
    time = IndexVariable('time', tjanus.compute(), attrs={'long_name':'timestamps for beams 1-4 (Janus)'})
    time5 = IndexVariable('time5', t5.compute(), attrs={'long_name':'timestamps for beam 5 (vertical)'})
    if verbose: print("Done unpacking timestamps.")

    coords0 = dict(time=time)
    coords = dict(z=zab, time=time, rb=rb)
    coords5 = dict(z5=zab5, time5=time5)
    dims = ['z', 'time']
    dims5 = ['z5', 'time5']
    dims0 = ['time']

    coordsdict = coords0
    if verbose: print("Allocating heading, pitch, roll.")
    kwda = dict(coords=coordsdict, dims=dims0, attrs=dict(units=unit, long_name=lname))
    svars = ['heading', 'pitch', 'roll']
    long_names = svars
    units = ['degrees']*3
    grp = 'variable_leader_janus'
    vars1d = dict()
    for vname,lname,unit in zip(svars,long_names,units):
        if verbose: print(vname)
        wrk = ensdict.map_partitions(_alloc_hpr, grp, vname)
        # wrk = darr.from_array(np.array(wrk.compute()), chunks=chunks)
        wrk2 = delayed(_bag2DataArray)(wrk, chunks)(**kwda)
        vars1d.update({vname:wrk2})
    del(wrk, wrk2)

    ds2hpr = Dataset(data_vars=vars1d, coords=coordsdict)
    ds2hpr = ds2hpr.to_netcdf(ncfpath, compute=False, mode='w')
    if verbose: print("Saving heading, pitch, roll.")
    ds2hpr.compute()
    if verbose: print("Done saving heading, pitch, roll.")
    del(ds2hpr)

    coordsdict = coords5
    # Load beam 5 variables into memory to
    # be able to put them in a chunked DataArray.
    if verbose: print("Allocating beam 5 variables.")
    grps = ['velocity_beam5', 'correlation_beam5', 'echo_intensity_beam5']
    long_names = ['Beam 5 velocity', 'Beam 5 correlation', 'Beam 5 echo amplitude']
    units = ['mm/s, positive toward xducer face', 'unitless', 'dB']
    vars5 = dict()
    for grp,lname,unit in zip(grps,long_names,units):
        if verbose: print(grp)
        wrk = ensdict.map_partitions(_alloc_beam5, grp)
        wrk = darr.from_array(np.array(wrk.compute()).T, chunks=(1, chunks))
        wrk = DataArray(wrk, coords=coordsdict, dims=dims5, attrs=dict(units=unit, long_name=lname))
        vars5.update({grp:wrk})
    del(wrk)

    ds5 = Dataset(data_vars=vars5, coords=coordsdict)
    ds5 = ds5.to_netcdf(ncfpath, compute=False, mode='a')
    if verbose: print("Saving beam 5 variables.")
    ds5.compute()
    if verbose: print("Done saving beam 5 variables.")
    del(ds5)
    embed()

    coordsdict = coords
    # Load beams 1-4 variables into memory to
    # be able to put them in a chunked DataArray.
    if verbose: print("Allocating Janus variables.")
    grps = ['velocity_janus', 'correlation_janus', 'echo_intensity_janus']
    long_names = ['Janus velocity', 'Janus correlation', 'Janus echo amplitude']
    units = ['mm/s, positive toward xducer face', 'unitless', 'dB']
    vars5 = dict()
    for grp,lname,unit in zip(grps,long_names,units):
        if verbose: print(grp)
        wrk = ensdict.map_partitions(_alloc_janus, grp)
        wrk = darr.from_array(np.array(wrk.compute()).T, chunks=(1, chunks))
        wrk = DataArray(wrk, coords=coordsdict, dims=dims5, attrs=dict(units=unit, long_name=lname))
        vars5.update({grp:wrk})
    del(wrk)

    dsj = Dataset(data_vars=varsj, coords=coordsdict)
    dsj = dsj.to_netcdf(ncfpath, compute=False, mode='a')
    if verbose: print("Saving Janus variables.")
    dsj.compute()
    if verbose: print("Done saving Janus variables.")
    del(dsj)

    long_names = ('Beam 1 velocity', 'Beam 2 velocity',
             'Beam 3 velocity', 'Beam 4 velocity',
             'Beam 5 velocity',
             'Beam 1 correlation', 'Beam 2 correlation',
             'Beam 3 correlation', 'Beam 4 correlation',
             'Beam 5 correlation',
             'Beam 1 echo amplitude', 'Beam 2 echo amplitude',
             'Beam 3 echo amplitude', 'Beam 4 echo amplitude',
             'Beam 5 echo amplitude',
             'heading', 'pitch', 'roll')
    units = ('m/s, positive toward xducer face',
             'm/s, positive toward xducer face',
             'm/s, positive toward xducer face',
             'm/s, positive toward xducer face',
             'm/s, positive toward xducer face',
             'no units', 'no units', 'no units', 'no units',
             'no units',
             'dB', 'dB', 'dB', 'dB',
             'dB',
             'degrees', 'degrees', 'degrees')
    names = ('b1', 'b2', 'b3', 'b4', 'b5',
             'cor1', 'cor2', 'cor3', 'cor4', 'cor5',
             'int1', 'int2', 'int3', 'int4', 'int5',
             'phi1', 'phi2', 'phi3')
    # data_vars = {}


    #
    # sk = darr.zeros((nz, nt), chunks=chunks)*np.nan # Beam vels stored in mm/s
    #                                   # as int64 to save memory.
    # b1, b2, b3, b4 = sk.copy(), sk.copy(), sk.copy(), sk.copy()
    # # embed()
    # sk0 = darr.zeros(nt, chunks=chunks)*np.nan
    # cor1, cor2, cor3, cor4 = sk.copy(), sk.copy(), sk.copy(), sk.copy()
    # int1, int2, int3, int4 = sk.copy(), sk.copy(), sk.copy(), sk.copy()
    # b5, cor5, int5 = sk.copy(), sk.copy(), sk.copy()
    # heading, pitch, roll = sk0.copy(), sk0.copy(), sk0.copy()
    # tjanus = []

    # ensdict = np.array(ensdict)[~fbadens]
    # ensdict = ensdict.tolist()
    arrs = (b1, b2, b3, b4, b5,
            cor1, cor2, cor3, cor4, cor5,
            int1, int2, int3, int4, int5,
            heading, pitch, roll)
            # pressure, temperature, salinity, soundspeed)

    for arr,name,long_name,unit in zip(arrs,names,long_names,units):

        if 'Beam5' in long_name:
            coordsn = coords5
            dimsn = dims
        elif 'phi' in name:
            coordsn = coords0
            dimsn = dims0
        else:
            coordsn = coords
            dimsn = dims

        da = DataArray(arr, coords=coordsn, dims=dimsn, attrs=dict(units=unit, long_name=long_name))
        data_vars.update({name:da})

    allcoords.update(coords)
    allcoords.update(coords5)
    ds = Dataset(data_vars=data_vars, coords=allcoords, attrs=dsattrs)

    return ds
Esempio n. 37
0
def _swath_def_of_data_arrays(rows, cols):
    return SwathDefinition(
        DataArray(da.zeros((rows, cols)), dims=('y', 'x')),
        DataArray(da.zeros((rows, cols)), dims=('y', 'x')),
    )
Esempio n. 38
0
def main(argv=sys.argv[1:]):
    from polar2grid.core.script_utils import setup_logging, create_basic_parser, create_exc_handler, rename_log_file, ExtendAction
    from polar2grid.compositors import CompositorManager
    frontends = available_frontends()
    backends = available_backends()
    parser = create_basic_parser(description="Extract swath data, remap it, and write it to a new file format")
    parser.add_argument("frontend", choices=sorted(frontends.keys()),
                        help="Specify the swath extractor to use to read data (additional arguments are determined after this is specified)")
    parser.add_argument("backend", choices=sorted(backends.keys()),
                        help="Specify the backend to use to write data output (additional arguments are determined after this is specified)")
    parser.add_argument("--compositor-configs", nargs="*", default=None,
                        help="Specify alternative configuration file(s) for compositors")
    # don't include the help flag
    argv_without_help = [x for x in argv if x not in ["-h", "--help"]]
    args, remaining_args = parser.parse_known_args(argv_without_help)
    glue_name = args.frontend + "2" + args.backend
    LOG = logging.getLogger(glue_name)

    # Load compositor information (we can't know the compositor choices until we've loaded the configuration)
    compositor_manager = CompositorManager(config_files=args.compositor_configs)
    # Hack: argparse doesn't let you use choices and nargs=* on a positional argument
    parser.add_argument("compositors", choices=list(compositor_manager.keys()) + [[]], nargs="*",
                        help="Specify the compositors to apply to the provided scene (additional arguments are determined after this is specified)")

    # load the actual components we need
    farg_func = get_frontend_argument_func(frontends, args.frontend)
    fcls = get_frontend_class(frontends, args.frontend)
    barg_func = get_backend_argument_func(backends, args.backend)
    bcls = get_backend_class(backends, args.backend)

    # add_frontend_arguments(parser)
    subgroup_titles = []
    subgroup_titles += farg_func(parser)
    subgroup_titles += add_remap_argument_groups(parser)
    subgroup_titles += barg_func(parser)

    parser.add_argument('-f', dest='data_files', nargs="+", default=[], action=ExtendAction,
                        help="List of files or directories to extract data from")
    parser.add_argument('-d', dest='data_files', nargs="+", default=[], action=ExtendAction,
                        help="Data directories to look for input data files (equivalent to -f)")
    global_keywords = ("keep_intermediate", "overwrite_existing", "exit_on_error")
    args = parser.parse_args(argv, global_keywords=global_keywords, subgroup_titles=subgroup_titles)

    if not args.data_files:
        # FUTURE: When the -d flag is removed this won't be needed because -f will be required
        parser.print_usage()
        parser.exit(1, "ERROR: No data files provided (-f flag)\n")

    # Logs are renamed once data the provided start date is known
    rename_log = False
    if args.log_fn is None:
        rename_log = True
        args.log_fn = glue_name + "_fail.log"
    levels = [logging.ERROR, logging.WARN, logging.INFO, logging.DEBUG]
    setup_logging(console_level=levels[min(3, args.verbosity)], log_filename=args.log_fn)
    sys.excepthook = create_exc_handler(LOG.name)
    LOG.debug("Starting script with arguments: %s", " ".join(sys.argv))

    # Keep track of things going wrong to tell the user what went wrong (we want to create as much as possible)
    status_to_return = STATUS_SUCCESS

    # Compositor validation
    # XXX: Hack to make `polar2grid.sh crefl gtiff` work like legacy crefl2gtiff.sh script
    if args.subgroup_args['Frontend Swath Extraction'].get('no_compositors'):
        LOG.debug("Removing all compositors")
        args.compositors = []
    elif args.frontend == 'crefl':
        if args.backend in ['awips', 'scmi']:
            LOG.debug("Adding 'crefl_sharpen' compositor")
            args.compositors.append('crefl_sharpen' if args.backend == 'scmi' else 'crefl_sharpen_awips')
        else:
            LOG.debug("Adding 'true_color' compositor")
            args.compositors.append('true_color')
            if '--true-color' in sys.argv and 'true_color' not in args.compositors:
                LOG.debug("Adding 'true_color' compositor")
                args.compositors.append('true_color')
            if '--false-color' in sys.argv and 'false_color' not in args.compositors:
                LOG.debug("Adding 'false_color' compositor")
                args.compositors.append('false_color')

    # if "--true-color" in
    for c in args.compositors:
        if c not in compositor_manager:
            LOG.error("Compositor '%s' is unknown" % (c,))
            raise RuntimeError("Compositor '%s' is unknown" % (c,))

    # Frontend
    try:
        LOG.info("Initializing reader...")
        list_products = args.subgroup_args["Frontend Initialization"].pop("list_products")
        f = fcls(search_paths=args.data_files, **args.subgroup_args["Frontend Initialization"])
    except (ValueError, KeyError):
        LOG.debug("Frontend exception: ", exc_info=True)
        LOG.error("%s frontend failed to load and sort data files (see log for details)", args.frontend)
        return STATUS_FRONTEND_FAIL

    # Rename the log file
    if rename_log:
        rename_log_file(glue_name + f.begin_time.strftime("_%Y%m%d_%H%M%S.log"))

    if list_products:
        print("\n".join(sorted(f.available_product_names)))
        return STATUS_SUCCESS

    try:
        LOG.info("Initializing remapping...")
        remapper = Remapper(**args.subgroup_args["Remapping Initialization"])
        remap_kwargs = args.subgroup_args["Remapping"]
    except (ValueError, KeyError):
        LOG.debug("Remapping initialization exception: ", exc_info=True)
        LOG.error("Remapping initialization failed (see log for details)")
        return STATUS_REMAP_FAIL

    try:
        LOG.info("Initializing backend...")
        backend = bcls(**args.subgroup_args["Backend Initialization"])
    except (ValueError, KeyError):
        LOG.debug("Writer initialization exception: ", exc_info=True)
        LOG.error("Writer initialization failed (see log for details)")
        return STATUS_BACKEND_FAIL

    try:
        LOG.info("Initializing compositor objects...")
        compositor_objects = {}
        for c in args.compositors:
            compositor_objects[c] = compositor_manager.get_compositor(c, **args.global_kwargs)
    except (ValueError, KeyError):
        LOG.debug("Compositor initialization exception: ", exc_info=True)
        LOG.error("Compositor initialization failed (see log for details)")
        return STATUS_COMP_FAIL

    try:
        LOG.info("Extracting swaths from data files available...")
        scene = f.create_scene(**args.subgroup_args["Frontend Swath Extraction"])

        # Determine if we have a satpy scene if we should convert it to
        # a P2G Scene to continue processing
        resample_method = args.subgroup_args["Remapping"].get("remap_method")
        is_satpy_resample_method = resample_method in SATPY_RESAMPLERS
        if is_satpy_resample_method and not isinstance(scene, Scene):
            raise RuntimeError("Resampling method '{}' only supports 'satpy' readers".format(resample_method))
        elif not is_satpy_resample_method and isinstance(scene, Scene):
            # convert satpy scene to P2G Scene to be compatible with old P2G resamplers
            scene = convert_satpy_to_p2g_swath(f, scene)

        if isinstance(scene, Scene):
            if not scene.datasets:
                LOG.error("No products were returned by the frontend")
                raise RuntimeError("No products were returned by the frontend")
            if args.keep_intermediate:
                raise RuntimeError("satpy readers do not currently support saving intermediate files")
        else:
            if (isinstance(scene, Scene) and not scene.datasets) or not scene:
                LOG.error("No products were returned by the frontend")
                raise RuntimeError("No products were returned by the frontend")
            if args.keep_intermediate:
                filename = glue_name + "_swath_scene.json"
                LOG.info("Saving intermediate swath scene as '%s'", filename)
                scene.save(filename)
    except (ValueError, KeyError):
        LOG.debug("Frontend data extraction exception: ", exc_info=True)
        LOG.error("Frontend data extraction failed (see log for details)")
        return STATUS_FRONTEND_FAIL

    # What grids should we remap to (the user should tell us or the backend should have a good set of defaults)
    known_grids = backend.known_grids
    LOG.debug("Writer known grids: %r", known_grids)
    grids = remap_kwargs.pop("forced_grids", None)
    LOG.debug("Forced Grids: %r", grids)
    if resample_method == "sensor" and grids != ["sensor"]:
        LOG.error("'sensor' resampling method only supports the 'sensor' grid")
        return STATUS_GDETER_FAIL
    if not grids and not known_grids:
        # the user didn't ask for any grids and the backend doesn't have specific defaults
        LOG.error("No grids specified and no known defaults")
        return STATUS_GDETER_FAIL
    elif not grids:
        # the user didn't tell us what to do, so let's try everything the backend knows how to do
        grids = known_grids
    elif known_grids is not None:
        # the user told us what to do, let's make sure the backend can do it
        grids = list(set(grids) & set(known_grids))
        if not grids:
            LOG.error("%s backend doesn't know how to handle any of the grids specified", args.backend)
            return STATUS_GDETER_FAIL
    LOG.debug("Grids that will be mapped to: %r", grids)

    # Remap
    for grid_name in grids:
        LOG.info("Remapping to grid %s", grid_name)
        try:
            gridded_scene = remapper.remap_scene(scene, grid_name, **remap_kwargs)
            if args.keep_intermediate:
                filename = glue_name + "_gridded_scene_" + grid_name + ".json"
                LOG.debug("saving intermediate gridded scene as '%s'", filename)
                gridded_scene.save(filename)
        except (ValueError, KeyError):
            LOG.debug("Remapping data exception: ", exc_info=True)
            LOG.error("Remapping data failed")
            status_to_return |= STATUS_REMAP_FAIL
            if args.exit_on_error:
                return status_to_return
            continue

        if not isinstance(scene, Scene):
            # Composition
            for c, comp in compositor_objects.items():
                try:
                    LOG.info("Running gridded scene through '%s' compositor", c)
                    gridded_scene = comp.modify_scene(gridded_scene, **args.subgroup_args[c + " Modification"])
                    if args.keep_intermediate:
                        filename = glue_name + "_gridded_scene_" + grid_name + ".json"
                        LOG.debug("Updating saved intermediate gridded scene (%s) after compositor", filename)
                        gridded_scene.save(filename)
                except (KeyError, ValueError):
                    LOG.debug("Compositor Error: ", exc_info=True)
                    LOG.error("Could not properly modify scene using compositor '%s'" % (c,))
                    if args.exit_on_error:
                        raise RuntimeError("Could not properly modify scene using compositor '%s'" % (c,))

        if isinstance(f, ReaderWrapper) and not isinstance(gridded_scene, Scene):
            this_grid_definition = None
            # HACK: Create SatPy composites that were either separated before
            # resampling or needed resampling to be created
            rgbs = {}
            for product_name in gridded_scene.keys():
                rgb_name = product_name[:-6]
                # Keep track of one of the grid definitions
                if this_grid_definition is None:
                    this_grid_definition = gridded_scene[product_name]["grid_definition"]

                if product_name.endswith("rgb_0") or product_name.endswith("rgb_1") or product_name.endswith("rgb_2"):
                    if rgb_name not in rgbs:
                        rgbs[rgb_name] = [None, None, None]
                    chn_idx = int(product_name[-1])
                    rgbs[rgb_name][chn_idx] = product_name
            LOG.debug("Putting RGBs back together again")
            for rgb_name, v in rgbs.items():
                r = gridded_scene.pop(v[0])
                g = gridded_scene.pop(v[1])
                b = gridded_scene.pop(v[2])
                new_info = r.copy()
                new_info["grid_data"] = new_info["grid_data"].replace(v[0], rgb_name)
                new_info["product_name"] = rgb_name
                data = np.memmap(new_info["grid_data"], dtype=new_info["data_type"],
                                 mode="w+", shape=(3, new_info["grid_definition"]["height"], new_info["grid_definition"]["width"]))
                data[0] = r.get_data_array()[:]
                data[1] = g.get_data_array()[:]
                data[2] = b.get_data_array()[:]
                gridded_scene[rgb_name] = new_info
                del data, new_info

            # Create composites that satpy couldn't complete until after remapping
            composite_names = [x for x in f.wishlist if not isinstance(x, DatasetID)]
            if composite_names:
                tmp_scene = Scene()
                for k, v in gridded_scene.items():
                    if not isinstance(v["sensor"], set):
                        v["sensor"] = set([v["sensor"]])  # turn sensor back in to a set to match satpy usage
                    tmp_scene[v["id"]] = DataArray(v.get_data_array(), attrs=v)
                    tmp_scene[v["id"]].attrs["area"] = this_grid_definition.to_satpy_area()
                    # tmp_scene[v["id"]].info = {}
                    if v["sensor"] not in tmp_scene.attrs["sensor"]:
                        tmp_scene.attrs["sensor"].extend(v["sensor"])
                # Overwrite the wishlist that will include the above assigned datasets
                tmp_scene.wishlist = f.wishlist
                for cname in composite_names:
                    tmp_scene.compositors[cname] = tmp_scene.cpl.load_compositor(cname, tmp_scene.attrs["sensor"])
                tmp_scene.compute()
                tmp_scene.unload()
                # Add any new Datasets to our P2G Scene if SatPy created them
                for ds in tmp_scene:
                    ds_id = DatasetID.from_dict(ds.attrs)
                    if ds_id.name not in gridded_scene:
                        LOG.debug("Adding Dataset from SatPy Commpositing: %s", ds_id)
                        gridded_scene[ds_id.name] = dataarray_to_gridded_product(ds)
                        gridded_scene[ds_id.name]["grid_definition"] = this_grid_definition
                # Remove any Products from P2G Scene that SatPy decided it didn't need anymore
                for k, v in list(gridded_scene.items()):
                    if v["id"].name not in tmp_scene:
                        LOG.debug("Removing Dataset that is no longer used: %s", k)
                        del gridded_scene[k]
                del tmp_scene, v

        if isinstance(gridded_scene, Scene):
            LOG.debug("Converting satpy Scene to P2G Gridded Scene")
            # Convert it to P2G Gridded Scene
            gridded_scene = convert_satpy_to_p2g_gridded(f, gridded_scene)

        # Writer
        try:
            LOG.info("Creating output from data mapped to grid %s", grid_name)
            backend.create_output_from_scene(gridded_scene, **args.subgroup_args["Backend Output Creation"])
        except (ValueError, KeyError):
            LOG.debug("Writer output creation exception: ", exc_info=True)
            LOG.error("Writer output creation failed (see log for details)")
            status_to_return |= STATUS_BACKEND_FAIL
            if args.exit_on_error:
                return status_to_return
            continue

        LOG.info("Processing data for grid %s complete", grid_name)
        # Force deletion and eventual garbage collection of the scene objects
        del gridded_scene
    del scene
    return status_to_return
Esempio n. 39
0
 def __getitem__(self, item):
     return DataArray(self.data[item], attrs=self.attrs)
            # Tterm = np.nansum(Term*Tzimskdzt, axis=0)/TH # [1/s2].
            Tterm = np.nansum(Term * Tzimskdzt, axis=0)  # [m/s2].
            Tterm[fland] = np.nan

        Iterm = stripmsk(Iterm)
        if TOPOG_TERMS:
            Tterm = stripmsk(Tterm)

        iterm = 'I' + term
        if TOPOG_TERMS:
            tterm = 'T' + term
        # Add timestamp.

        coords = dict(lon=(dimsxy, lont), lat=(dimsxy, latt))

        Iterm = DataArray(Iterm, coords=coords, dims=dimsxy)
        if TOPOG_TERMS:
            Tterm = DataArray(Tterm, coords=coords, dims=dimsxy)

        t = np.array(Timestamp(date))
        Iterm.coords.update(dict(time=t))

        Terms.update({iterm: Iterm})
        if TOPOG_TERMS:
            Terms.update({tterm: Tterm})

    if SAVE_TERMS_netCDF:
        fout = headout + 'vortbdgt_' + date + '.nc'
        Datasetx(data_vars=Terms,
                 coords=coords).to_netcdf(fout, unlimited_dims='time')
Esempio n. 41
0
def _impute_genotype_call_with_variant_mean(
        call_g: xr.DataArray, call_g_mask: xr.DataArray) -> xr.DataArray:
    call_g_present = ~call_g_mask  # type: ignore[operator]
    variant_mean = call_g.where(call_g_present).mean(dim="samples")
    imputed_call_g: xr.DataArray = call_g.where(call_g_present, variant_mean)
    return imputed_call_g
Esempio n. 42
0
    def test_multivar_numbered_tiles_glm(self, sector):
        """Test creating a tiles with multiple variables."""
        import xarray as xr
        from satpy.writers.awips_tiled import AWIPSTiledWriter
        from xarray import DataArray
        from pyresample.geometry import AreaDefinition
        from pyresample.utils import proj4_str_to_dict
        w = AWIPSTiledWriter(base_dir=self.base_dir, compress=True)
        area_def = AreaDefinition(
            'test',
            'test',
            'test',
            proj4_str_to_dict(
                '+proj=lcc +datum=WGS84 +ellps=WGS84 +lon_0=-95. '
                '+lat_0=25 +lat_1=25 +units=m +no_defs'),
            100,
            200,
            (-1000., -1500., 1000., 1500.),
        )
        now = datetime(2018, 1, 1, 12, 0, 0)
        end_time = now + timedelta(minutes=20)
        ds1 = DataArray(da.from_array(np.linspace(0.,
                                                  1.,
                                                  20000,
                                                  dtype=np.float32).reshape(
                                                      (200, 100)),
                                      chunks=50),
                        attrs=dict(name='total_energy',
                                   platform_name='GOES-17',
                                   sensor='SENSOR',
                                   units='1',
                                   area=area_def,
                                   start_time=now,
                                   end_time=end_time,
                                   scan_mode='M3',
                                   scene_abbr=sector,
                                   platform_shortname="G17"))
        ds2 = ds1.copy()
        ds2.attrs.update({
            'name': 'flash_extent_density',
        })
        ds3 = ds1.copy()
        ds3.attrs.update({
            'name': 'average_flash_area',
        })
        dqf = ds1.copy()
        dqf = (dqf * 255).astype(np.uint8)
        dqf.attrs = ds1.attrs.copy()
        dqf.attrs.update({
            'name': 'DQF',
            '_FillValue': 1,
        })

        w.save_datasets([ds1, ds2, ds3, dqf],
                        sector_id='TEST',
                        source_name="TESTS",
                        tile_count=(3, 3),
                        template='glm_l2_rad{}'.format(sector.lower()))
        all_files = glob(os.path.join(self.base_dir, '*_GLM*.nc'))
        assert len(all_files) == 9
        for fn in all_files:
            ds = xr.open_dataset(fn, mask_and_scale=False)
            check_required_common_attributes(ds)
            if sector == 'C':
                assert ds.attrs['time_coverage_end'] == end_time.strftime(
                    '%Y-%m-%dT%H:%M:%S.%fZ')
            else:  # 'F'
                assert ds.attrs['time_coverage_end'] == end_time.strftime(
                    '%Y-%m-%dT%H:%M:%SZ')
Esempio n. 43
0
def flux_woolf2016_rapid(
    temp_bulk_C,
    salt_bulk,
    pCO2_bulk_uatm,
    pCO2_air_uatm,
    press_hPa,
    wind_ms,
    kw_func=gas_transfer_velocity.k_Ni00,
    cool_skin_bias=-0.14,
    salty_skin_bias=0.1,
):
    """
    Calculates air sea CO2 fluxes using the RAPID model as defined by Woolf et
    al. (2016), where the concentration of CO2 in the skin and foundation
    layers are used to calculate the fluxes rather than delta pCO2 (latter is
    called bulk flux).

    We calculate the skin temperature and salinity using a cool and salty skin
    bias as defined in Woolf et al. (2016). The defaults are 0.14 degC and
    0.1 PSU as taken from FluxEngine.

    **Assumptions: ** This function is set up to use AVHRR only OISST which
    reports temperatures at 1m depth based on a buoy correction (Banzon et al.
    2016). We make the assumption that this bulk temperature is equivalent to
    foundation temperature (where nighttime and daytime temperatures are the
    same). We also assume that EN4 salinity is foundation salinity (this is
    probably more accurate than the first assumtion). Lastly we assume that the
    ML estimated fCO2 is bulk fCO2 – we use bulk variable inputs (SSS and SST).

    Parameters
    ----------
    temp_bulk_C : np.array
        temperature from OISST in deg Celcius with an allowable range of
        [-2 : 45]
    salt_bulk : np.array
        salinity from EN4 in PSU. Allowable range [5 : 50]
    pCO2_bulk_uatm : np.array
        partial pressure of CO2 in the sea in micro-atmospheres, assuming that
        it was measured/predicted at the same level as the temperature and
        salinity (See our assumptions above). Allowable range is [50 : 1000]
    pCO2_air_uatm : np.array
        partial pressure of CO2 in the air in micro-atmospheres. Allowable
        range is [50:1000].
    press_hPa : np.array
        atmospheric pressure in hecto-Pascals with an allowable range of
        [500 : 1500] hPa
    wind_ms : np.array
        wind speed in metres per second with an allowable range of [0 : 40]
    kw_func : callable
        a function that returns the gas transfer velocity in cm/hr. The default
        is the gas transfer volicty as calculated by Ho et al. (2006). This
        is the prefered method of Goddijn-Murphy et al. (2016). Other functions
        are available in the `gas_transfer` class. If you'd like to use your
        own inputs must be wind speed (m/s) and temperature (degC) and output
        must be cm/hr
    cool_skin_bias : float
        The temperature difference between the foundation/bulk temperature and
        the skin temperature as suggested by Wolf et al. (2016). The default is
        0.14 degC where this will be subtracted from the bulk temperature, i.e.
        the surface is cooler due to the cooling effect of winds.
    salty_skin_bias : float
        The salinity difference between the foundation and skin layers. This is
        driven by evaporation and defaults to 0.1 (will be added to salinity).

    Reurns
    ------
    FCO2 : np.array
        Sea-air CO2 flux where positive is out of the ocean and negative is
        into the ocean. Units are gC.m-2.day-1 (grams Carbon per metre squared
        per day)
    """
    from numpy import array
    from xarray import DataArray

    warnings.warn("This function has not been tested yet")

    if isinstance(pCO2_bulk_uatm, DataArray):
        var = pCO2_bulk_uatm.copy()  # attribute preservation
    else:
        var = None

    press_atm = array(press_hPa) / 1013.25

    SSTfnd_C = array(temp_bulk_C)
    SSTskn_C = SSTfnd_C - cool_skin_bias  # from default FluxEngine config
    SSTfnd_K = SSTfnd_C + 273.15
    SSTskn_K = SSTskn_C + 273.15
    SSTdelta = SSTfnd_C - SSTskn_C

    SSSfnd = array(salt_bulk)
    SSSskn = SSSfnd + salty_skin_bias  # from default FluxEngine config

    pCO2sea = array(pCO2_bulk_uatm) * 1e-6  # to atm
    pCO2air = array(pCO2_air_uatm) * 1e-6

    # checking units
    press_atm = check.pres_atm(press_atm)
    SSTfnd_K = check.temp_K(SSTfnd_K)
    SSSfnd = check.salt(SSSfnd)
    pCO2sea = check.CO2_mol(pCO2sea)
    pCO2air = check.CO2_mol(pCO2air)
    wind_ms = check.wind_ms(wind_ms)

    fCO2sea = pCO2sea * f2p.virial_coeff(SSTfnd_K, press_atm)
    fCO2air = pCO2air * f2p.virial_coeff(SSTskn_K, press_atm)

    # units in mol . L-1 . atm-1
    K0fnd = sol.solubility_woolf2016(SSSfnd, SSTfnd_K, SSTdelta, press_atm)
    K0skn = sol.solubility_woolf2016(SSSskn, SSTskn_K, SSTdelta, press_atm)

    # molar mass of carbon (gC/mol * kg/g)
    mC = 12.0108 * 1000  # kg . mol-1

    # CONC : UNIT ANALYSIS
    #         solubility         *  pCO2 *  molar mass
    # conc = (mol . L-1 . atm-1) * (atm) * (kg . mol-1)
    # conc = mol. mol-1 . L-1 . atm . atm-1 * kg
    # conc = kg . L-1    |||    gC . m-3
    # Bulk uses skin, equilibrium and rapid use foundation for concSEA
    concSEA = K0fnd * fCO2sea * mC
    concAIR = K0skn * fCO2air * mC

    # KW : UNIT ANALYSIS
    # kw = (cm / 100) / (hr / 24)
    # kw = m . day-1
    kw = kw_func(wind_ms, SSTskn_C) * (24 / 100)

    # FLUX : UNIT ANALYSIS
    # flux = (m . day-1) * (g . m-3)
    # flux = gC . m . m-3 . day-1
    # flux = gC . m-2 . day-1
    CO2flux_woolfe = kw * (concSEA - concAIR)

    if isinstance(var, DataArray):
        kw_name = kw_func.__name__[2:]
        attributes = dict(
            units="gC / m2 / day",
            description=f"sea-air CO2 fluxes calculated with {kw_name}",
            long_name="sea-air CO2 fluxes",
        )

        CO2flux_woolfe = DataArray(data=CO2flux_woolfe,
                                   coords=var.coords,
                                   attrs=attributes)

    return CO2flux_woolfe
Esempio n. 44
0
def _bag2DataArray(bg, chunks, **kwargs):
    return DataArray(darr.from_array(np.array(wrk.compute()), chunks=chunks)
                     **kwargs)
Esempio n. 45
0
def make_fake_scene(content_dict, daskify=False, area=True, common_attrs=None):
    """Create a fake Scene.

    Create a fake Scene object from fake data.  Data are provided in
    the ``content_dict`` argument.  In ``content_dict``, keys should be
    strings or DataID, and values may be either numpy.ndarray
    or xarray.DataArray, in either case with exactly two dimensions.
    The function will convert each of the numpy.ndarray objects into
    an xarray.DataArray and assign those as datasets to a Scene object.
    A fake AreaDefinition will be assigned for each array, unless disabled
    by passing ``area=False``.  When areas are automatically generated,
    arrays with the same shape will get the same area.

    This function is exclusively intended for testing purposes.

    If regular ndarrays are passed and the keyword argument daskify is
    True, DataArrays will be created as dask arrays.  If False (default),
    regular DataArrays will be created.  When the user passes xarray.DataArray
    objects then this flag has no effect.

    Args:
        content_dict (Mapping): Mapping where keys correspond to objects
            accepted by ``Scene.__setitem__``, i.e. strings or DataID,
            and values may be either ``numpy.ndarray`` or
            ``xarray.DataArray``.
        daskify (bool): optional, to use dask when converting
            ``numpy.ndarray`` to ``xarray.DataArray``.  No effect when the
            values in ``content_dict`` are already ``xarray.DataArray``.
        area (bool or BaseDefinition): Can be ``True``, ``False``, or an
            instance of ``pyresample.geometry.BaseDefinition`` such as
            ``AreaDefinition`` or ``SwathDefinition``.  If ``True``, which is
            the default, automatically generate areas.  If ``False``, values
            will not have assigned areas.  If an instance of
            ``pyresample.geometry.BaseDefinition``, those instances will be
            used for all generated fake datasets.  Warning: Passing an area as
            a string (``area="germ"``) is not supported.
        common_attrs (Mapping): optional, additional attributes that will
            be added to every dataset in the scene.

    Returns:
        Scene object with datasets corresponding to content_dict.
    """
    if common_attrs is None:
        common_attrs = {}
    sc = Scene()
    for (did, arr) in content_dict.items():
        extra_attrs = common_attrs.copy()
        if isinstance(area, BaseDefinition):
            extra_attrs["area"] = area
        elif area:
            extra_attrs["area"] = create_area_def("test-area", {
                "proj": "eqc",
                "lat_ts": 0,
                "lat_0": 0,
                "lon_0": 0,
                "x_0": 0,
                "y_0": 0,
                "ellps": "sphere",
                "units": "m",
                "no_defs": None,
                "type": "crs"
            },
                                                  units="m",
                                                  shape=arr.shape,
                                                  resolution=1000,
                                                  center=(0, 0))
        if isinstance(arr, DataArray):
            sc[did] = arr.copy()  # don't change attributes of input
            sc[did].attrs.update(extra_attrs)
        else:
            if daskify:
                arr = da.from_array(arr)
            sc[did] = DataArray(arr, dims=("y", "x"), attrs=extra_attrs)
    return sc
Esempio n. 46
0
def ingest_nxTOMO(paths):

    assert len(paths) == 1
    path = paths[0]

    h5 = h5py.File(path, 'r')

    data = h5['irmap']['DATA']['data']
    energy = h5['irmap']['DATA']['energy'][()]
    sample_x = h5['irmap']['DATA']['sample_x'][()]
    sample_y = h5['irmap']['DATA']['sample_y'][()]

    xarray = DataArray(data,
                       dims=['E (eV)', 'y (μm)', 'x (μm)'],
                       coords=[energy, sample_y, sample_x])
    dask_data = da.from_array(xarray)

    projections = [('nxSTXM', {
        'irmap/DATA/data': ('primary', 'raw'),
        'irmap/DATA/energy': energy,
        'irmap/DATA/sample_x': sample_x,
        'irmap/DATA/sample_y': sample_y
    })]

    # Compose run start
    run_bundle = event_model.compose_run(
    )  # type: event_model.ComposeRunBundle
    start_doc = run_bundle.start_doc
    start_doc["sample_name"] = Path(paths[0]).resolve().stem
    start_doc["projections"] = projections
    yield 'start', start_doc

    # Compose descriptor
    source = 'nxSTXM'
    frame_data_keys = {
        'raw': {
            'source': source,
            'dtype': 'number',
            'dims': xarray.dims,
            # 'coords': [energy, sample_y, sample_x],
            'shape': data.shape
        }
    }
    frame_stream_name = 'primary'
    frame_stream_bundle = run_bundle.compose_descriptor(
        data_keys=frame_data_keys,
        name=frame_stream_name,
        # configuration=_metadata(path)
    )
    yield 'descriptor', frame_stream_bundle.descriptor_doc

    # NOTE: Resource document may be meaningful in the future. For transient access it is not useful
    # # Compose resource
    # resource = run_bundle.compose_resource(root=Path(path).root, resource_path=path, spec='NCEM_DM', resource_kwargs={})
    # yield 'resource', resource.resource_doc

    # Compose datum_page
    # z_indices, t_indices = zip(*itertools.product(z_indices, t_indices))
    # datum_page_doc = resource.compose_datum_page(datum_kwargs={'index_z': list(z_indices), 'index_t': list(t_indices)})
    # datum_ids = datum_page_doc['datum_id']
    # yield 'datum_page', datum_page_doc

    yield 'event', frame_stream_bundle.compose_event(
        data={'raw': dask_data}, timestamps={'raw': time.time()})

    yield 'stop', run_bundle.compose_stop()
Esempio n. 47
0
class TestFacetGrid(PlotTestCase):
    def setUp(self):
        d = easy_array((10, 15, 3))
        self.darray = DataArray(d,
                                dims=['y', 'x', 'z'],
                                coords={'z': ['a', 'b', 'c']})
        self.g = xplt.FacetGrid(self.darray, col='z')

    def test_no_args(self):
        self.g.map_dataarray(xplt.contourf, 'x', 'y')

        # Don't want colorbar labeled with 'None'
        alltxt = text_in_fig()
        self.assertNotIn('None', alltxt)

        for ax in self.g.axes.flat:
            self.assertTrue(ax.has_data())

            # default font size should be small
            fontsize = ax.title.get_size()
            self.assertLessEqual(fontsize, 12)

    def test_names_appear_somewhere(self):
        self.darray.name = 'testvar'
        self.g.map_dataarray(xplt.contourf, 'x', 'y')
        for k, ax in zip('abc', self.g.axes.flat):
            self.assertEqual('z = {0}'.format(k), ax.get_title())

        alltxt = text_in_fig()
        self.assertIn(self.darray.name, alltxt)
        for label in ['x', 'y']:
            self.assertIn(label, alltxt)

    def test_text_not_super_long(self):
        self.darray.coords['z'] = [100 * letter for letter in 'abc']
        g = xplt.FacetGrid(self.darray, col='z')
        g.map_dataarray(xplt.contour, 'x', 'y')
        alltxt = text_in_fig()
        maxlen = max(len(txt) for txt in alltxt)
        self.assertLess(maxlen, 50)

        t0 = g.axes[0, 0].get_title()
        self.assertTrue(t0.endswith('...'))

    def test_colorbar(self):
        vmin = self.darray.values.min()
        vmax = self.darray.values.max()
        expected = np.array((vmin, vmax))

        self.g.map_dataarray(xplt.imshow, 'x', 'y')

        for image in plt.gcf().findobj(mpl.image.AxesImage):
            clim = np.array(image.get_clim())
            self.assertTrue(np.allclose(expected, clim))

        self.assertEqual(1, len(find_possible_colorbars()))

    def test_empty_cell(self):
        g = xplt.FacetGrid(self.darray, col='z', col_wrap=2)
        g.map_dataarray(xplt.imshow, 'x', 'y')

        bottomright = g.axes[-1, -1]
        self.assertFalse(bottomright.has_data())
        self.assertFalse(bottomright.get_visible())

    def test_norow_nocol_error(self):
        with self.assertRaisesRegexp(ValueError, r'[Rr]ow'):
            xplt.FacetGrid(self.darray)

    def test_groups(self):
        self.g.map_dataarray(xplt.imshow, 'x', 'y')
        upperleft_dict = self.g.name_dicts[0, 0]
        upperleft_array = self.darray.loc[upperleft_dict]
        z0 = self.darray.isel(z=0)

        self.assertDataArrayEqual(upperleft_array, z0)

    def test_float_index(self):
        self.darray.coords['z'] = [0.1, 0.2, 0.4]
        g = xplt.FacetGrid(self.darray, col='z')
        g.map_dataarray(xplt.imshow, 'x', 'y')

    def test_nonunique_index_error(self):
        self.darray.coords['z'] = [0.1, 0.2, 0.2]
        with self.assertRaisesRegexp(ValueError, r'[Uu]nique'):
            xplt.FacetGrid(self.darray, col='z')

    def test_robust(self):
        z = np.zeros((20, 20, 2))
        darray = DataArray(z, dims=['y', 'x', 'z'])
        darray[:, :, 1] = 1
        darray[2, 0, 0] = -1000
        darray[3, 0, 0] = 1000
        g = xplt.FacetGrid(darray, col='z')
        g.map_dataarray(xplt.imshow, 'x', 'y', robust=True)

        # Color limits should be 0, 1
        # The largest number displayed in the figure should be less than 21
        numbers = set()
        alltxt = text_in_fig()
        for txt in alltxt:
            try:
                numbers.add(float(txt))
            except ValueError:
                pass
        largest = max(abs(x) for x in numbers)
        self.assertLess(largest, 21)

    def test_can_set_vmin_vmax(self):
        vmin, vmax = 50.0, 1000.0
        expected = np.array((vmin, vmax))
        self.g.map_dataarray(xplt.imshow, 'x', 'y', vmin=vmin, vmax=vmax)

        for image in plt.gcf().findobj(mpl.image.AxesImage):
            clim = np.array(image.get_clim())
            self.assertTrue(np.allclose(expected, clim))

    def test_can_set_norm(self):
        norm = mpl.colors.SymLogNorm(0.1)
        self.g.map_dataarray(xplt.imshow, 'x', 'y', norm=norm)
        for image in plt.gcf().findobj(mpl.image.AxesImage):
            self.assertIs(image.norm, norm)

    def test_figure_size(self):

        self.assertArrayEqual(self.g.fig.get_size_inches(), (10, 3))

        g = xplt.FacetGrid(self.darray, col='z', size=6)
        self.assertArrayEqual(g.fig.get_size_inches(), (19, 6))

        g = self.darray.plot.imshow(col='z', size=6)
        self.assertArrayEqual(g.fig.get_size_inches(), (19, 6))

        g = xplt.FacetGrid(self.darray, col='z', size=4, aspect=0.5)
        self.assertArrayEqual(g.fig.get_size_inches(), (7, 4))

        g = xplt.FacetGrid(self.darray, col='z', figsize=(9, 4))
        self.assertArrayEqual(g.fig.get_size_inches(), (9, 4))

        with self.assertRaisesRegexp(ValueError, "cannot provide both"):
            g = xplt.plot(self.darray, row=2, col='z', figsize=(6, 4), size=6)

        with self.assertRaisesRegexp(ValueError, "Can't use"):
            g = xplt.plot(self.darray, row=2, col='z', ax=plt.gca(), size=6)

    def test_num_ticks(self):
        nticks = 99
        maxticks = nticks + 1
        self.g.map_dataarray(xplt.imshow, 'x', 'y')
        self.g.set_ticks(max_xticks=nticks, max_yticks=nticks)

        for ax in self.g.axes.flat:
            xticks = len(ax.get_xticks())
            yticks = len(ax.get_yticks())
            self.assertLessEqual(xticks, maxticks)
            self.assertLessEqual(yticks, maxticks)
            self.assertGreaterEqual(xticks, nticks / 2.0)
            self.assertGreaterEqual(yticks, nticks / 2.0)

    def test_map(self):
        self.g.map(plt.contourf, 'x', 'y', Ellipsis)
        self.g.map(lambda: None)

    def test_map_dataset(self):
        g = xplt.FacetGrid(self.darray.to_dataset(name='foo'), col='z')
        g.map(plt.contourf, 'x', 'y', 'foo')

        alltxt = text_in_fig()
        for label in ['x', 'y']:
            self.assertIn(label, alltxt)
        # everything has a label
        self.assertNotIn('None', alltxt)

        # colorbar can't be inferred automatically
        self.assertNotIn('foo', alltxt)
        self.assertEqual(0, len(find_possible_colorbars()))

        g.add_colorbar(label='colors!')
        self.assertIn('colors!', text_in_fig())
        self.assertEqual(1, len(find_possible_colorbars()))

    def test_set_axis_labels(self):
        g = self.g.map_dataarray(xplt.contourf, 'x', 'y')
        g.set_axis_labels('longitude', 'latitude')
        alltxt = text_in_fig()
        for label in ['longitude', 'latitude']:
            self.assertIn(label, alltxt)

    def test_facetgrid_colorbar(self):
        a = easy_array((10, 15, 4))
        d = DataArray(a, dims=['y', 'x', 'z'], name='foo')

        d.plot.imshow(x='x', y='y', col='z')
        self.assertEqual(1, len(find_possible_colorbars()))

        d.plot.imshow(x='x', y='y', col='z', add_colorbar=True)
        self.assertEqual(1, len(find_possible_colorbars()))

        d.plot.imshow(x='x', y='y', col='z', add_colorbar=False)
        self.assertEqual(0, len(find_possible_colorbars()))

    def test_facetgrid_polar(self):
        # test if polar projection in FacetGrid does not raise an exception
        self.darray.plot.pcolormesh(col='z',
                                    subplot_kws=dict(projection='polar'),
                                    sharex=False,
                                    sharey=False)
Esempio n. 48
0
def escore(
    tgt: xr.DataArray,
    sim: xr.DataArray,
    dims: Sequence[str] = ("variables", "time"),
    N: int = 0,  # noqa
    scale: bool = False,
) -> xr.DataArray:
    r"""Energy score, or energy dissimilarity metric, based on [SzekelyRizzo]_ and [Cannon18]_.

    Parameters
    ----------
    tgt: xr.DataArray
      Target observations.
    sim: xr.DataArray
      Candidate observations. Must have the same dimensions as `tgt`.
    dims: sequence of 2 strings
      The name of the dimensions along which the variables and observation points are listed.
      `tgt` and `sim` can have different length along the second one, but must be equal along the first one.
      The result will keep all other dimensions.
    N : int
      If larger than 0, the number of observations to use in the score computation. The points are taken
      evenly distributed along `obs_dim`.
    scale: bool
      Whether to scale the data before computing the score. If True, both arrays as scaled according
      to the mean and standard deviation of `tgt` along `obs_dim`. (std computed with `ddof=1` and both
      statistics excluding NaN values).

    Returns
    -------
    xr.DataArray
        e-score with dimensions not in `dims`.

    Notes
    -----
    Explanation adapted from the "energy" R package documentation.
    The e-distance between two clusters :math:`C_i`, :math:`C_j` (tgt and sim) of size :math:`n_i,n_j`
    proposed by Székely and Rizzo (2004) is defined by:

    .. math::

        e(C_i,C_j) = \frac{1}{2}\frac{n_i n_j}{n_i + n_j} \left[2 M_{ij} − M_{ii} − M_{jj}\right]

    where

    .. math::

        M_{ij} = \frac{1}{n_i n_j} \sum_{p = 1}^{n_i} \sum_{q = 1}^{n_j} \left\Vert X_{ip} − X{jq} \right\Vert.

    :math:`\Vert\cdot\Vert` denotes Euclidean norm, :math:`X_{ip}` denotes the p-th observation in the i-th cluster.

    The input scaling and the factor :math:`\frac{1}{2}` in the first equation are additions of [Cannon18]_ to
    the metric. With that factor, the test becomes identical to the one defined by [BaringhausFranz]_.
    This version is tested against values taken from Alex Cannon's MBC R package.

    References
    ----------
    .. [BaringhausFranz] Baringhaus, L. and Franz, C. (2004) On a new multivariate two-sample test, Journal of Multivariate Analysis, 88(1), 190–206. https://doi.org/10.1016/s0047-259x(03)00079-4
    .. [Cannon18] Cannon, A. J. (2018). Multivariate quantile mapping bias correction: An N-dimensional probability density function transform for climate model simulations of multiple variables. Climate Dynamics, 50(1), 31–49. https://doi.org/10.1007/s00382-017-3580-6
    .. [SzekelyRizzo] Székely, G. J. and Rizzo, M. L. (2004) Testing for Equal Distributions in High Dimension, InterStat, November (5)
    """

    pts_dim, obs_dim = dims

    if N > 0:
        # If N non-zero we only take around N points, evenly distributed
        sim_step = int(np.ceil(sim[obs_dim].size / N))
        sim = sim.isel({obs_dim: slice(None, None, sim_step)})
        tgt_step = int(np.ceil(tgt[obs_dim].size / N))
        tgt = tgt.isel({obs_dim: slice(None, None, tgt_step)})

    if scale:
        tgt, avg, std = standardize(tgt)
        sim, _, _ = standardize(sim, avg, std)

    # The dimension renaming is to allow different coordinates.
    # Otherwise, apply_ufunc tries to align both obs_dim together.
    new_dim = get_temp_dimname(tgt.dims, obs_dim)
    sim = sim.rename({obs_dim: new_dim})
    out = xr.apply_ufunc(
        _escore,
        tgt,
        sim,
        input_core_dims=[[pts_dim, obs_dim], [pts_dim, new_dim]],
        output_dtypes=[sim.dtype],
        dask="parallelized",
    )

    out.name = "escores"
    out.attrs.update(
        long_name="Energy dissimilarity metric",
        description=f"Escores computed from {N or 'all'} points.",
        references="Székely, G. J. and Rizzo, M. L. (2004) Testing for Equal Distributions in High Dimension, InterStat, November (5)",
    )
    return out
Esempio n. 49
0
def test_contains_cftime_datetimes_1d(data):
    assert contains_cftime_datetimes(data.time)


@pytest.mark.skipif(not has_dask, reason='dask not installed')
@pytest.mark.skipif(not has_cftime, reason='cftime not installed')
def test_contains_cftime_datetimes_dask_1d(data):
    assert contains_cftime_datetimes(data.time.chunk())


@pytest.mark.skipif(not has_cftime, reason='cftime not installed')
def test_contains_cftime_datetimes_3d(times_3d):
    assert contains_cftime_datetimes(times_3d)


@pytest.mark.skipif(not has_dask, reason='dask not installed')
@pytest.mark.skipif(not has_cftime, reason='cftime not installed')
def test_contains_cftime_datetimes_dask_3d(times_3d):
    assert contains_cftime_datetimes(times_3d.chunk())


@pytest.mark.parametrize('non_cftime_data', [DataArray([]), DataArray([1, 2])])
def test_contains_cftime_datetimes_non_cftimes(non_cftime_data):
    assert not contains_cftime_datetimes(non_cftime_data)


@pytest.mark.skipif(not has_dask, reason='dask not installed')
@pytest.mark.parametrize('non_cftime_data', [DataArray([]), DataArray([1, 2])])
def test_contains_cftime_datetimes_non_cftimes_dask(non_cftime_data):
    assert not contains_cftime_datetimes(non_cftime_data.chunk())
Esempio n. 50
0
def jitter(
    x: xr.DataArray,
    lower: str = None,
    upper: str = None,
    minimum: str = None,
    maximum: str = None,
) -> xr.DataArray:
    """Replaces values under a threshold and values above another by a uniform random noise.

    Do not confuse with R's jitter, which adds uniform noise instead of replacing values.

    Parameters
    ----------
    x : xr.DataArray
      Values.
    lower : str
      Threshold under which to add uniform random noise to values, a quantity with units.
      If None, no jittering is performed on the lower end.
    upper : str
      Threshold over which to add uniform random noise to values, a quantity with units.
      If None, no jittering is performed on the upper end.
    minimum : str
      Lower limit (excluded) for the lower end random noise, a quantity with units.
      If None but `lower` is not None, 0 is used.
    maximum : str
      Upper limit (excluded) for the upper end random noise, a quantity with units.
      If `upper` is not None, it must be given.

    Returns
    -------
    xr.DataArray
      Same as  `x` but values < lower are replaced by a uniform noise in range (minimum, lower)
      and values >= upper are replaced by a uniform noise in range [upper, maximum).
      The two noise distributions are independent.
    """
    out = x
    notnull = x.notnull()
    if lower is not None:
        lower = convert_units_to(lower, x)
        minimum = convert_units_to(minimum, x) if minimum is not None else 0
        minimum = minimum + np.finfo(x.dtype).eps
        if uses_dask(x):
            jitter = dsk.random.uniform(
                low=minimum, high=lower, size=x.shape, chunks=x.chunks
            )
        else:
            jitter = np.random.uniform(low=minimum, high=lower, size=x.shape)
        out = out.where(~((x < lower) & notnull), jitter.astype(x.dtype))
    if upper is not None:
        if maximum is None:
            raise ValueError("If 'upper' is given, so must 'maximum'.")
        upper = convert_units_to(upper, x)
        maximum = convert_units_to(maximum, x)
        if uses_dask(x):
            jitter = dsk.random.uniform(
                low=upper, high=maximum, size=x.shape, chunks=x.chunks
            )
        else:
            jitter = np.random.uniform(low=upper, high=maximum, size=x.shape)
        out = out.where(~((x >= upper) & notnull), jitter.astype(x.dtype))

    copy_all_attrs(out, x)  # copy attrs and same units
    return out
Esempio n. 51
0
    def get_test_content(self, filename, filename_info, filetype_info):
        """Mimic reader input file content"""
        file_content = {
            '/attr/Platform_Name':
            filename_info['platform_shortname'],
            '/attr/Element_Resolution':
            2.,
            '/attr/Line_Resolution':
            2.,
            '/attr/Subsatellite_Longitude':
            -70.2 if 'GOES' in filename_info['platform_shortname'] else 140.65,
            'pixel_longitude':
            DEFAULT_LON_DATA,
            'pixel_longitude/attr/scale_factor':
            1.,
            'pixel_longitude/attr/add_offset':
            0.,
            'pixel_longitude/shape':
            DEFAULT_FILE_SHAPE,
            'pixel_longitude/attr/_FillValue':
            np.nan,
            'pixel_latitude':
            DEFAULT_LAT_DATA,
            'pixel_latitude/attr/scale_factor':
            1.,
            'pixel_latitude/attr/add_offset':
            0.,
            'pixel_latitude/shape':
            DEFAULT_FILE_SHAPE,
            'pixel_latitude/attr/_FillValue':
            np.nan,
        }
        sensor = {
            'HIMAWARI-8': 'himawari8',
            'GOES-16': 'goes16',
            'GOES-13': 'goes',
            'GOES-14': 'goes',
            'GOES-15': 'goes',
        }[filename_info['platform_shortname']]
        file_content['/attr/Sensor_Name'] = sensor

        if filename_info['platform_shortname'] == 'HIMAWARI-8':
            file_content['pixel_longitude'] = DEFAULT_LON_DATA + 130.

        file_content['variable1'] = DEFAULT_FILE_DATA.astype(np.float32)
        file_content['variable1/attr/_FillValue'] = -1
        file_content['variable1/attr/scale_factor'] = 1.
        file_content['variable1/attr/add_offset'] = 0.
        file_content['variable1/attr/units'] = '1'
        file_content['variable1/shape'] = DEFAULT_FILE_SHAPE

        # data with fill values
        file_content['variable2'] = np.ma.masked_array(
            DEFAULT_FILE_DATA.astype(np.float32),
            mask=np.zeros_like(DEFAULT_FILE_DATA))
        file_content['variable2'].mask[::5, ::5] = True
        file_content['variable2/attr/_FillValue'] = -1
        file_content['variable2/attr/scale_factor'] = 1.
        file_content['variable2/attr/add_offset'] = 0.
        file_content['variable2/attr/units'] = '1'
        file_content['variable2/shape'] = DEFAULT_FILE_SHAPE

        # category
        file_content['variable3'] = DEFAULT_FILE_DATA.astype(np.byte)
        file_content['variable3/attr/_FillValue'] = -128
        file_content[
            'variable3/attr/flag_meanings'] = "clear water supercooled mixed ice unknown"
        file_content['variable3/attr/flag_values'] = [0, 1, 2, 3, 4, 5]
        file_content['variable3/attr/units'] = '1'
        file_content['variable3/shape'] = DEFAULT_FILE_SHAPE

        # convert to xarrays
        from xarray import DataArray
        for key, val in file_content.items():
            if isinstance(val, np.ndarray):
                attrs = {}
                for a in [
                        '_FillValue', 'flag_meanings', 'flag_values', 'units'
                ]:
                    if key + '/attr/' + a in file_content:
                        attrs[a] = file_content[key + '/attr/' + a]
                if val.ndim > 1:
                    file_content[key] = DataArray(val,
                                                  dims=('lines', 'elements'),
                                                  attrs=attrs)
                else:
                    file_content[key] = DataArray(val, attrs=attrs)

        return file_content
Esempio n. 52
0
 def setUp(self):
     d = easy_array((10, 15, 3))
     self.darray = DataArray(d,
                             dims=['y', 'x', 'z'],
                             coords={'z': ['a', 'b', 'c']})
     self.g = xplt.FacetGrid(self.darray, col='z')
Esempio n. 53
0
class TestPlot(PlotTestCase):
    def setUp(self):
        self.darray = DataArray(easy_array((2, 3, 4)))

    def test1d(self):
        self.darray[:, 0, 0].plot()

    def test_2d_before_squeeze(self):
        a = DataArray(easy_array((1, 5)))
        a.plot()

    def test2d_uniform_calls_imshow(self):
        self.assertTrue(self.imshow_called(self.darray[:, :, 0].plot.imshow))

    def test2d_nonuniform_calls_contourf(self):
        a = self.darray[:, :, 0]
        a.coords['dim_1'] = [2, 1, 89]
        self.assertTrue(self.contourf_called(a.plot.contourf))

    def test3d(self):
        self.darray.plot()

    def test_can_pass_in_axis(self):
        self.pass_in_axis(self.darray.plot)

    def test__infer_interval_breaks(self):
        self.assertArrayEqual([-0.5, 0.5, 1.5], _infer_interval_breaks([0, 1]))
        self.assertArrayEqual([-0.5, 0.5, 5.0, 9.5, 10.5],
                              _infer_interval_breaks([0, 1, 9, 10]))
        self.assertArrayEqual(
            pd.date_range('20000101', periods=4) - np.timedelta64(12, 'h'),
            _infer_interval_breaks(pd.date_range('20000101', periods=3)))

        # make a bounded 2D array that we will center and re-infer
        xref, yref = np.meshgrid(np.arange(6), np.arange(5))
        cx = (xref[1:, 1:] + xref[:-1, :-1]) / 2
        cy = (yref[1:, 1:] + yref[:-1, :-1]) / 2
        x = _infer_interval_breaks(cx, axis=1)
        x = _infer_interval_breaks(x, axis=0)
        y = _infer_interval_breaks(cy, axis=1)
        y = _infer_interval_breaks(y, axis=0)
        np.testing.assert_allclose(xref, x)
        np.testing.assert_allclose(yref, y)

    def test_datetime_dimension(self):
        nrow = 3
        ncol = 4
        time = pd.date_range('2000-01-01', periods=nrow)
        a = DataArray(easy_array((nrow, ncol)),
                      coords=[('time', time), ('y', range(ncol))])
        a.plot()
        ax = plt.gca()
        self.assertTrue(ax.has_data())

    def test_convenient_facetgrid(self):
        a = easy_array((10, 15, 4))
        d = DataArray(a, dims=['y', 'x', 'z'])
        d.coords['z'] = list('abcd')
        g = d.plot(x='x', y='y', col='z', col_wrap=2, cmap='cool')

        self.assertArrayEqual(g.axes.shape, [2, 2])
        for ax in g.axes.flat:
            self.assertTrue(ax.has_data())

        with self.assertRaisesRegexp(ValueError, '[Ff]acet'):
            d.plot(x='x', y='y', col='z', ax=plt.gca())

        with self.assertRaisesRegexp(ValueError, '[Ff]acet'):
            d[0].plot(x='x', y='y', col='z', ax=plt.gca())

    def test_subplot_kws(self):
        a = easy_array((10, 15, 4))
        d = DataArray(a, dims=['y', 'x', 'z'])
        d.coords['z'] = list('abcd')
        g = d.plot(x='x',
                   y='y',
                   col='z',
                   col_wrap=2,
                   cmap='cool',
                   subplot_kws=dict(axisbg='r'))
        for ax in g.axes.flat:
            try:
                # mpl V2
                self.assertEqual(ax.get_facecolor()[0:3],
                                 mpl.colors.to_rgb('r'))
            except AttributeError:
                self.assertEqual(ax.get_axis_bgcolor(), 'r')

    def test_plot_size(self):
        self.darray[:, 0, 0].plot(figsize=(13, 5))
        assert tuple(plt.gcf().get_size_inches()) == (13, 5)

        self.darray.plot(figsize=(13, 5))
        assert tuple(plt.gcf().get_size_inches()) == (13, 5)

        self.darray.plot(size=5)
        assert plt.gcf().get_size_inches()[1] == 5

        self.darray.plot(size=5, aspect=2)
        assert tuple(plt.gcf().get_size_inches()) == (10, 5)

        with self.assertRaisesRegexp(ValueError, 'cannot provide both'):
            self.darray.plot(ax=plt.gca(), figsize=(3, 4))

        with self.assertRaisesRegexp(ValueError, 'cannot provide both'):
            self.darray.plot(size=5, figsize=(3, 4))

        with self.assertRaisesRegexp(ValueError, 'cannot provide both'):
            self.darray.plot(size=5, ax=plt.gca())

        with self.assertRaisesRegexp(ValueError, 'cannot provide `aspect`'):
            self.darray.plot(aspect=1)

    def test_convenient_facetgrid_4d(self):
        a = easy_array((10, 15, 2, 3))
        d = DataArray(a, dims=['y', 'x', 'columns', 'rows'])
        g = d.plot(x='x', y='y', col='columns', row='rows')

        self.assertArrayEqual(g.axes.shape, [3, 2])
        for ax in g.axes.flat:
            self.assertTrue(ax.has_data())

        with self.assertRaisesRegexp(ValueError, '[Ff]acet'):
            d.plot(x='x', y='y', col='columns', ax=plt.gca())
Esempio n. 54
0
def from_series_or_scalar(se):
    try:
        return DataArray.from_series(se)
    except AttributeError:  # scalar case
        return DataArray(se)
Esempio n. 55
0
class TestDataArrayAndDataset(DaskTestCase):
    def assertLazyAndIdentical(self, expected, actual):
        self.assertLazyAnd(expected, actual, assert_identical)

    def assertLazyAndAllClose(self, expected, actual):
        self.assertLazyAnd(expected, actual, assert_allclose)

    def assertLazyAndEqual(self, expected, actual):
        self.assertLazyAnd(expected, actual, assert_equal)

    @pytest.fixture(autouse=True)
    def setUp(self):
        self.values = np.random.randn(4, 6)
        self.data = da.from_array(self.values, chunks=(2, 2))
        self.eager_array = DataArray(self.values,
                                     coords={"x": range(4)},
                                     dims=("x", "y"),
                                     name="foo")
        self.lazy_array = DataArray(self.data,
                                    coords={"x": range(4)},
                                    dims=("x", "y"),
                                    name="foo")

    def test_rechunk(self):
        chunked = self.eager_array.chunk({"x": 2}).chunk({"y": 2})
        assert chunked.chunks == ((2, ) * 2, (2, ) * 3)
        self.assertLazyAndIdentical(self.lazy_array, chunked)

    def test_new_chunk(self):
        chunked = self.eager_array.chunk()
        assert chunked.data.name.startswith("xarray-<this-array>")

    def test_lazy_dataset(self):
        lazy_ds = Dataset({"foo": (("x", "y"), self.data)})
        assert isinstance(lazy_ds.foo.variable.data, da.Array)

    def test_lazy_array(self):
        u = self.eager_array
        v = self.lazy_array

        self.assertLazyAndAllClose(u, v)
        self.assertLazyAndAllClose(-u, -v)
        self.assertLazyAndAllClose(u.T, v.T)
        self.assertLazyAndAllClose(u.mean(), v.mean())
        self.assertLazyAndAllClose(1 + u, 1 + v)

        actual = xr.concat([v[:2], v[2:]], "x")
        self.assertLazyAndAllClose(u, actual)

    def test_compute(self):
        u = self.eager_array
        v = self.lazy_array

        assert dask.is_dask_collection(v)
        (v2, ) = dask.compute(v + 1)
        assert not dask.is_dask_collection(v2)

        assert ((u + 1).data == v2.data).all()

    def test_persist(self):
        u = self.eager_array
        v = self.lazy_array + 1

        (v2, ) = dask.persist(v)
        assert v is not v2
        assert len(v2.__dask_graph__()) < len(v.__dask_graph__())
        assert v2.__dask_keys__() == v.__dask_keys__()
        assert dask.is_dask_collection(v)
        assert dask.is_dask_collection(v2)

        self.assertLazyAndAllClose(u + 1, v)
        self.assertLazyAndAllClose(u + 1, v2)

    def test_concat_loads_variables(self):
        # Test that concat() computes not-in-memory variables at most once
        # and loads them in the output, while leaving the input unaltered.
        d1 = build_dask_array("d1")
        c1 = build_dask_array("c1")
        d2 = build_dask_array("d2")
        c2 = build_dask_array("c2")
        d3 = build_dask_array("d3")
        c3 = build_dask_array("c3")
        # Note: c is a non-index coord.
        # Index coords are loaded by IndexVariable.__init__.
        ds1 = Dataset(data_vars={"d": ("x", d1)}, coords={"c": ("x", c1)})
        ds2 = Dataset(data_vars={"d": ("x", d2)}, coords={"c": ("x", c2)})
        ds3 = Dataset(data_vars={"d": ("x", d3)}, coords={"c": ("x", c3)})

        assert kernel_call_count == 0
        out = xr.concat([ds1, ds2, ds3],
                        dim="n",
                        data_vars="different",
                        coords="different")
        # each kernel is computed exactly once
        assert kernel_call_count == 6
        # variables are loaded in the output
        assert isinstance(out["d"].data, np.ndarray)
        assert isinstance(out["c"].data, np.ndarray)

        out = xr.concat([ds1, ds2, ds3],
                        dim="n",
                        data_vars="all",
                        coords="all")
        # no extra kernel calls
        assert kernel_call_count == 6
        assert isinstance(out["d"].data, dask.array.Array)
        assert isinstance(out["c"].data, dask.array.Array)

        out = xr.concat([ds1, ds2, ds3],
                        dim="n",
                        data_vars=["d"],
                        coords=["c"])
        # no extra kernel calls
        assert kernel_call_count == 6
        assert isinstance(out["d"].data, dask.array.Array)
        assert isinstance(out["c"].data, dask.array.Array)

        out = xr.concat([ds1, ds2, ds3], dim="n", data_vars=[], coords=[])
        # variables are loaded once as we are validing that they're identical
        assert kernel_call_count == 12
        assert isinstance(out["d"].data, np.ndarray)
        assert isinstance(out["c"].data, np.ndarray)

        out = xr.concat(
            [ds1, ds2, ds3],
            dim="n",
            data_vars="different",
            coords="different",
            compat="identical",
        )
        # compat=identical doesn't do any more kernel calls than compat=equals
        assert kernel_call_count == 18
        assert isinstance(out["d"].data, np.ndarray)
        assert isinstance(out["c"].data, np.ndarray)

        # When the test for different turns true halfway through,
        # stop computing variables as it would not have any benefit
        ds4 = Dataset(data_vars={"d": ("x", [2.0])},
                      coords={"c": ("x", [2.0])})
        out = xr.concat([ds1, ds2, ds4, ds3],
                        dim="n",
                        data_vars="different",
                        coords="different")
        # the variables of ds1 and ds2 were computed, but those of ds3 didn't
        assert kernel_call_count == 22
        assert isinstance(out["d"].data, dask.array.Array)
        assert isinstance(out["c"].data, dask.array.Array)
        # the data of ds1 and ds2 was loaded into numpy and then
        # concatenated to the data of ds3. Thus, only ds3 is computed now.
        out.compute()
        assert kernel_call_count == 24

        # Finally, test that originals are unaltered
        assert ds1["d"].data is d1
        assert ds1["c"].data is c1
        assert ds2["d"].data is d2
        assert ds2["c"].data is c2
        assert ds3["d"].data is d3
        assert ds3["c"].data is c3

        # now check that concat() is correctly using dask name equality to skip loads
        out = xr.concat([ds1, ds1, ds1],
                        dim="n",
                        data_vars="different",
                        coords="different")
        assert kernel_call_count == 24
        # variables are not loaded in the output
        assert isinstance(out["d"].data, dask.array.Array)
        assert isinstance(out["c"].data, dask.array.Array)

        out = xr.concat([ds1, ds1, ds1],
                        dim="n",
                        data_vars=[],
                        coords=[],
                        compat="identical")
        assert kernel_call_count == 24
        # variables are not loaded in the output
        assert isinstance(out["d"].data, dask.array.Array)
        assert isinstance(out["c"].data, dask.array.Array)

        out = xr.concat(
            [ds1, ds2.compute(), ds3],
            dim="n",
            data_vars="all",
            coords="different",
            compat="identical",
        )
        # c1,c3 must be computed for comparison since c2 is numpy;
        # d2 is computed too
        assert kernel_call_count == 28

        out = xr.concat(
            [ds1, ds2.compute(), ds3],
            dim="n",
            data_vars="all",
            coords="all",
            compat="identical",
        )
        # no extra computes
        assert kernel_call_count == 30

        # Finally, test that originals are unaltered
        assert ds1["d"].data is d1
        assert ds1["c"].data is c1
        assert ds2["d"].data is d2
        assert ds2["c"].data is c2
        assert ds3["d"].data is d3
        assert ds3["c"].data is c3

    def test_groupby(self):
        u = self.eager_array
        v = self.lazy_array

        expected = u.groupby("x").mean(...)
        actual = v.groupby("x").mean(...)
        self.assertLazyAndAllClose(expected, actual)

    def test_groupby_first(self):
        u = self.eager_array
        v = self.lazy_array

        for coords in [u.coords, v.coords]:
            coords["ab"] = ("x", ["a", "a", "b", "b"])
        with raises_regex(NotImplementedError, "dask"):
            v.groupby("ab").first()
        expected = u.groupby("ab").first()
        actual = v.groupby("ab").first(skipna=False)
        self.assertLazyAndAllClose(expected, actual)

    def test_reindex(self):
        u = self.eager_array.assign_coords(y=range(6))
        v = self.lazy_array.assign_coords(y=range(6))

        for kwargs in [
            {
                "x": [2, 3, 4]
            },
            {
                "x": [1, 100, 2, 101, 3]
            },
            {
                "x": [2.5, 3, 3.5],
                "y": [2, 2.5, 3]
            },
        ]:
            expected = u.reindex(**kwargs)
            actual = v.reindex(**kwargs)
            self.assertLazyAndAllClose(expected, actual)

    def test_to_dataset_roundtrip(self):
        u = self.eager_array
        v = self.lazy_array

        expected = u.assign_coords(x=u["x"])
        self.assertLazyAndEqual(expected, v.to_dataset("x").to_array("x"))

    def test_merge(self):
        def duplicate_and_merge(array):
            return xr.merge([array, array.rename("bar")]).to_array()

        expected = duplicate_and_merge(self.eager_array)
        actual = duplicate_and_merge(self.lazy_array)
        self.assertLazyAndEqual(expected, actual)

    @pytest.mark.filterwarnings("ignore::PendingDeprecationWarning")
    def test_ufuncs(self):
        u = self.eager_array
        v = self.lazy_array
        self.assertLazyAndAllClose(np.sin(u), xu.sin(v))

    def test_where_dispatching(self):
        a = np.arange(10)
        b = a > 3
        x = da.from_array(a, 5)
        y = da.from_array(b, 5)
        expected = DataArray(a).where(b)
        self.assertLazyAndEqual(expected, DataArray(a).where(y))
        self.assertLazyAndEqual(expected, DataArray(x).where(b))
        self.assertLazyAndEqual(expected, DataArray(x).where(y))

    def test_simultaneous_compute(self):
        ds = Dataset({"foo": ("x", range(5)), "bar": ("x", range(5))}).chunk()

        count = [0]

        def counting_get(*args, **kwargs):
            count[0] += 1
            return dask.get(*args, **kwargs)

        ds.load(scheduler=counting_get)

        assert count[0] == 1

    def test_stack(self):
        data = da.random.normal(size=(2, 3, 4), chunks=(1, 3, 4))
        arr = DataArray(data, dims=("w", "x", "y"))
        stacked = arr.stack(z=("x", "y"))
        z = pd.MultiIndex.from_product(
            [np.arange(3), np.arange(4)], names=["x", "y"])
        expected = DataArray(data.reshape(2, -1), {"z": z}, dims=["w", "z"])
        assert stacked.data.chunks == expected.data.chunks
        self.assertLazyAndEqual(expected, stacked)

    def test_dot(self):
        eager = self.eager_array.dot(self.eager_array[0])
        lazy = self.lazy_array.dot(self.lazy_array[0])
        self.assertLazyAndAllClose(eager, lazy)

    @pytest.mark.skipif(LooseVersion(dask.__version__) >= "2.0",
                        reason="no meta")
    def test_dataarray_repr_legacy(self):
        data = build_dask_array("data")
        nonindex_coord = build_dask_array("coord")
        a = DataArray(data, dims=["x"], coords={"y": ("x", nonindex_coord)})
        expected = dedent("""\
            <xarray.DataArray 'data' (x: 1)>
            {!r}
            Coordinates:
                y        (x) int64 dask.array<chunksize=(1,), meta=np.ndarray>
            Dimensions without coordinates: x""".format(data))
        assert expected == repr(a)
        assert kernel_call_count == 0  # should not evaluate dask array

    @pytest.mark.skipif(LooseVersion(dask.__version__) < "2.0",
                        reason="needs meta")
    def test_dataarray_repr(self):
        data = build_dask_array("data")
        nonindex_coord = build_dask_array("coord")
        a = DataArray(data, dims=["x"], coords={"y": ("x", nonindex_coord)})
        expected = dedent("""\
            <xarray.DataArray 'data' (x: 1)>
            {!r}
            Coordinates:
                y        (x) int64 dask.array<chunksize=(1,), meta=np.ndarray>
            Dimensions without coordinates: x""".format(data))
        assert expected == repr(a)
        assert kernel_call_count == 0  # should not evaluate dask array

    @pytest.mark.skipif(LooseVersion(dask.__version__) < "2.0",
                        reason="needs meta")
    def test_dataset_repr(self):
        data = build_dask_array("data")
        nonindex_coord = build_dask_array("coord")
        ds = Dataset(data_vars={"a": ("x", data)},
                     coords={"y": ("x", nonindex_coord)})
        expected = dedent("""\
            <xarray.Dataset>
            Dimensions:  (x: 1)
            Coordinates:
                y        (x) int64 dask.array<chunksize=(1,), meta=np.ndarray>
            Dimensions without coordinates: x
            Data variables:
                a        (x) int64 dask.array<chunksize=(1,), meta=np.ndarray>"""
                          )
        assert expected == repr(ds)
        assert kernel_call_count == 0  # should not evaluate dask array

    def test_dataarray_pickle(self):
        # Test that pickling/unpickling converts the dask backend
        # to numpy in neither the data variable nor the non-index coords
        data = build_dask_array("data")
        nonindex_coord = build_dask_array("coord")
        a1 = DataArray(data, dims=["x"], coords={"y": ("x", nonindex_coord)})
        a1.compute()
        assert not a1._in_memory
        assert not a1.coords["y"]._in_memory
        assert kernel_call_count == 2
        a2 = pickle.loads(pickle.dumps(a1))
        assert kernel_call_count == 2
        assert_identical(a1, a2)
        assert not a1._in_memory
        assert not a2._in_memory
        assert not a1.coords["y"]._in_memory
        assert not a2.coords["y"]._in_memory

    def test_dataset_pickle(self):
        # Test that pickling/unpickling converts the dask backend
        # to numpy in neither the data variables nor the non-index coords
        data = build_dask_array("data")
        nonindex_coord = build_dask_array("coord")
        ds1 = Dataset(data_vars={"a": ("x", data)},
                      coords={"y": ("x", nonindex_coord)})
        ds1.compute()
        assert not ds1["a"]._in_memory
        assert not ds1["y"]._in_memory
        assert kernel_call_count == 2
        ds2 = pickle.loads(pickle.dumps(ds1))
        assert kernel_call_count == 2
        assert_identical(ds1, ds2)
        assert not ds1["a"]._in_memory
        assert not ds2["a"]._in_memory
        assert not ds1["y"]._in_memory
        assert not ds2["y"]._in_memory

    def test_dataarray_getattr(self):
        # ipython/jupyter does a long list of getattr() calls to when trying to
        # represent an object.
        # Make sure we're not accidentally computing dask variables.
        data = build_dask_array("data")
        nonindex_coord = build_dask_array("coord")
        a = DataArray(data, dims=["x"], coords={"y": ("x", nonindex_coord)})
        with suppress(AttributeError):
            getattr(a, "NOTEXIST")
        assert kernel_call_count == 0

    def test_dataset_getattr(self):
        # Test that pickling/unpickling converts the dask backend
        # to numpy in neither the data variables nor the non-index coords
        data = build_dask_array("data")
        nonindex_coord = build_dask_array("coord")
        ds = Dataset(data_vars={"a": ("x", data)},
                     coords={"y": ("x", nonindex_coord)})
        with suppress(AttributeError):
            getattr(ds, "NOTEXIST")
        assert kernel_call_count == 0

    def test_values(self):
        # Test that invoking the values property does not convert the dask
        # backend to numpy
        a = DataArray([1, 2]).chunk()
        assert not a._in_memory
        assert a.values.tolist() == [1, 2]
        assert not a._in_memory

    def test_from_dask_variable(self):
        # Test array creation from Variable with dask backend.
        # This is used e.g. in broadcast()
        a = DataArray(self.lazy_array.variable,
                      coords={"x": range(4)},
                      name="foo")
        self.assertLazyAndIdentical(self.lazy_array, a)
Esempio n. 56
0
def ensembles2dataset(ensdict, dsattrs={}, verbose=False, print_every=1000):
    """
    Convert a dictionary of ensembles into an xarray Dataset object.
    """
    mms2ms = 1e-3
    fbadens = np.array([not isinstance(ens, dict) for ens in ensdict])
    nt = len(ensdict) - np.sum(fbadens)
    n=0
    ensdict0 = np.nan
    while not isinstance(ensdict0, dict):
        ensdict0 = ensdict[n]
        n+=1
    nz = ensdict0['fixed_leader_janus']['number_of_cells']
    sk = np.ma.zeros((nz, nt))*np.nan # Beam vels stored in mm/s
                                      # as int64 to save memory.
    b1, b2, b3, b4 = sk.copy(), sk.copy(), sk.copy(), sk.copy()
    sk0 = np.ma.zeros(nt)*np.nan
    cor1, cor2, cor3, cor4 = sk.copy(), sk.copy(), sk.copy(), sk.copy()
    int1, int2, int3, int4 = sk.copy(), sk.copy(), sk.copy(), sk.copy()
    b5, cor5, int5 = sk.copy(), sk.copy(), sk.copy()
    heading, pitch, roll = sk0.copy(), sk0.copy(), sk0.copy()
    tjanus = []

    ensdict = np.array(ensdict)[~fbadens]
    ensdict = ensdict.tolist()
    n=0
    for ensarr in ensdict:
        tjanus.append(ensarr['timestamp'])
        heading[n] = ensarr['variable_leader_janus']['heading']
        pitch[n] = ensarr['variable_leader_janus']['pitch']
        roll[n] = ensarr['variable_leader_janus']['roll']
        vjanus = ensarr['velocity_janus']['data']
        b1[:, n] = vjanus[:, 0]
        b2[:, n] = vjanus[:, 1]
        b3[:, n] = vjanus[:, 2]
        b4[:, n] = vjanus[:, 3]
        b5[:, n] = ensarr['velocity_beam5']['data'].squeeze()
        corjanus = ensarr['correlation_janus']['data']
        cor1[:, n] = corjanus[:, 0]
        cor2[:, n] = corjanus[:, 1]
        cor3[:, n] = corjanus[:, 2]
        cor4[:, n] = corjanus[:, 3]
        cor5[:, n] = ensarr['correlation_beam5']['data'].squeeze()
        intjanus = ensarr['echo_intensity_janus']['data']
        int1[:, n] = intjanus[:, 0]
        int2[:, n] = intjanus[:, 1]
        int3[:, n] = intjanus[:, 2]
        int4[:, n] = intjanus[:, 3]
        int5[:, n] = ensarr['echo_intensity_beam5']['data'].squeeze()

        n+=1
        if verbose and not n%print_every: print(n)

    fixj = ensdict0['fixed_leader_janus']
    fix5 = ensdict0['fixed_leader_beam5']

    # Add ping offset to get beam 5's timestamps.
    dt5 = fix5['ping_offset_time'] # In milliseconds.
    dt5 = np.array(Timedelta(dt5, unit='ms'))
    t5 = tjanus + dt5

    th = fixj['beam_angle']
    assert th==25 # Always 25 degrees.
    th = th*np.pi/180.
    Cth = np.cos(th)

    # Construct along-beam/vertical axes.
    cm2m = 1e-2
    r1janus = fixj['bin_1_distance']*cm2m
    r1b5 = fix5['bin_1_distance']*cm2m
    ncj = fixj['number_of_cells']
    nc5 = fix5['number_of_cells']
    lcj = fixj['depth_cell_length']*cm2m
    lc5 = fix5['depth_cell_length']*cm2m
    Lj = ncj*lcj # Distance from center of bin 1 to the center of last bin (Janus).
    L5 = nc5*lc5 # Distance from center of bin 1 to the center of last bin (beam 5).

    rb = r1janus + np.arange(0, Lj, lcj) # Distance from xducer head
                                         # (Janus).
    zab = Cth*rb                         # Vertical distance from xducer head
                                         # (Janus).
    zab5 = r1b5 + np.arange(0, L5, lc5)  # Distance from xducer head, also
                                         # depth for the vertical beam.

    rb = IndexVariable('z', rb, attrs={'units':'meters', 'long_name':"along-beam distance from the xducer's face to the center of the bins, for beams 1-4 (Janus)"})
    zab = IndexVariable('z', zab, attrs={'units':'meters', 'long_name':"vertical distance from the instrument's head to the center of the bins, for beams 1-4 (Janus)"})
    zab5 = IndexVariable('z', zab5, attrs={'units':'meters', 'long_name':"vertical distance from xducer face to the center of the bins, for beam 5 (vertical)"})
    time = IndexVariable('time', tjanus, attrs={'long_name':'timestamp for beams 1-4 (Janus)'})
    time5 = IndexVariable('time', t5, attrs={'long_name':'timestamp for beam 5 (vertical)'})

    coords0 = [('time', time)]
    coords = [('z', zab), ('time', time)]
    coords5 = [('z5', zab5), ('time5', time5)]
    dims = ['z', 'time']
    dims0 = ['time']

    # Convert velocities to m/s.
    b1, b2, b3, b4, b5 = b1*mms2ms, b2*mms2ms, b3*mms2ms, b4*mms2ms, b5*mms2ms

    # Scale heading, pitch and roll. Sentinel V manual, p. 259.
    phisc = 0.01
    heading *= phisc
    pitch *= phisc
    roll *= phisc

    arrs = (b1, b2, b3, b4, b5,
            cor1, cor2, cor3, cor4, cor5,
            int1, int2, int3, int4, int5,
            heading, pitch, roll)
            # pressure, temperature, salinity, soundspeed)
    long_names = ('Beam 1 velocity', 'Beam 2 velocity',
             'Beam 3 velocity', 'Beam 4 velocity',
             'Beam 5 velocity',
             'Beam 1 correlation', 'Beam 2 correlation',
             'Beam 3 correlation', 'Beam 4 correlation',
             'Beam 5 correlation',
             'Beam 1 echo amplitude', 'Beam 2 echo amplitude',
             'Beam 3 echo amplitude', 'Beam 4 echo amplitude',
             'Beam 5 echo amplitude',
             'heading', 'pitch', 'roll')
    units = ('m/s, positive toward xducer face',
             'm/s, positive toward xducer face',
             'm/s, positive toward xducer face',
             'm/s, positive toward xducer face',
             'm/s, positive toward xducer face',
             'no units', 'no units', 'no units', 'no units',
             'no units',
             'dB', 'dB', 'dB', 'dB',
             'dB',
             'degrees', 'degrees', 'degrees')
    names = ('b1', 'b2', 'b3', 'b4', 'b5',
             'cor1', 'cor2', 'cor3', 'cor4', 'cor5',
             'int1', 'int2', 'int3', 'int4', 'int5',
             'phi1', 'phi2', 'phi3')
    data_vars = {}
    for arr,name,long_name,unit in zip(arrs,names,long_names,units):

        if 'Beam5' in long_name:
            coordsn = coords5
            dimsn = dims
        elif 'phi' in name:
            coordsn = coords0
            dimsn = dims0
        else:
            coordsn = coords
            dimsn = dims

        if 'int' in name:
            arr *= 0.45 # Scale factor for echo intensity, see Sentinel V manual
                        # Sentinel V manual p. 264.

        da = DataArray(arr, coords=coordsn, dims=dimsn, attrs=dict(units=unit, long_name=long_name))
        data_vars.update({name:da})

    allcoords = {'rb':rb} # Along-beam distance for slanted beams.
    allcoords.update(coords)
    allcoords.update(coords5)
    ds = Dataset(data_vars=data_vars, coords=allcoords, attrs=dsattrs)

    return ds
Esempio n. 57
0
 def update_metrics( self, data_array: xr.DataArray, **kwargs ):
     metrics = data_array.attrs.get('metrics', {} )
     metrics.update( **kwargs )
     data_array.attrs['metrics'] = metrics
Esempio n. 58
0
 def spatial_interpolate_slice(self, persistent_classes: xr.DataArray, water_maps_slice: xr.DataArray, **kwargs ) -> xr.DataArray:
     dynamics_class = kwargs.get( "dynamics_class", 0 )
     tval = water_maps_slice.coords[ water_maps_slice.dims[0] ].values[0]
     persistent_classes_slice = persistent_classes if persistent_classes.ndim == 2 else persistent_classes.sel( **{persistent_classes.dims[0]:tval}, method="nearest" ).drop_vars( persistent_classes.dims[0] )
     dynamics_mask: xr.DataArray = persistent_classes_slice.isin( [dynamics_class] )
     return water_maps_slice.where( dynamics_mask, persistent_classes_slice  )
Esempio n. 59
0
    def test_lettered_tiles_update_existing(self):
        """Test updating lettered tiles with additional data."""
        import shutil
        import xarray as xr
        from satpy.writers.awips_tiled import AWIPSTiledWriter
        from xarray import DataArray
        from pyresample.geometry import AreaDefinition
        from pyresample.utils import proj4_str_to_dict
        import dask
        first_base_dir = os.path.join(self.base_dir, 'first')
        w = AWIPSTiledWriter(base_dir=first_base_dir, compress=True)
        area_def = AreaDefinition(
            'test',
            'test',
            'test',
            proj4_str_to_dict(
                '+proj=lcc +datum=WGS84 +ellps=WGS84 +lon_0=-95. '
                '+lat_0=25 +lat_1=25 +units=m +no_defs'),
            1000,
            2000,
            (-1000000., -1500000., 1000000., 1500000.),
        )
        now = datetime(2018, 1, 1, 12, 0, 0)
        data = np.linspace(0., 1., 2000000, dtype=np.float32).reshape(
            (2000, 1000))
        # pixels to be filled in later
        data[:, -200:] = np.nan
        ds = DataArray(da.from_array(data, chunks=500),
                       attrs=dict(name='test_ds',
                                  platform_name='PLAT',
                                  sensor='SENSOR',
                                  units='1',
                                  area=area_def,
                                  start_time=now,
                                  end_time=now + timedelta(minutes=20)))
        # tile_count should be ignored since we specified lettered_grid
        w.save_datasets([ds],
                        sector_id='LCC',
                        source_name="TESTS",
                        tile_count=(3, 3),
                        lettered_grid=True)
        all_files = sorted(glob(os.path.join(first_base_dir, 'TESTS_AII*.nc')))
        assert len(all_files) == 16
        first_files = []
        second_base_dir = os.path.join(self.base_dir, 'second')
        os.makedirs(second_base_dir)
        for fn in all_files:
            new_fn = fn.replace(first_base_dir, second_base_dir)
            shutil.copy(fn, new_fn)
            first_files.append(new_fn)

        # Second writing/updating
        # Area is about 100 pixels to the right
        area_def2 = AreaDefinition(
            'test',
            'test',
            'test',
            proj4_str_to_dict(
                '+proj=lcc +datum=WGS84 +ellps=WGS84 +lon_0=-95. '
                '+lat_0=25 +lat_1=25 +units=m +no_defs'),
            1000,
            2000,
            (-800000., -1500000., 1200000., 1500000.),
        )
        data2 = np.linspace(0., 1., 2000000, dtype=np.float32).reshape(
            (2000, 1000))
        # a gap at the beginning where old values remain
        data2[:, :200] = np.nan
        # a gap at the end where old values remain
        data2[:, -400:-300] = np.nan
        ds2 = DataArray(da.from_array(data2, chunks=500),
                        attrs=dict(name='test_ds',
                                   platform_name='PLAT',
                                   sensor='SENSOR',
                                   units='1',
                                   area=area_def2,
                                   start_time=now,
                                   end_time=now + timedelta(minutes=20)))
        w = AWIPSTiledWriter(base_dir=second_base_dir, compress=True)
        # HACK: The _copy_to_existing function hangs when opening the output
        #   file multiple times...sometimes. If we limit dask to one worker
        #   it seems to work fine.
        with dask.config.set(num_workers=1):
            w.save_datasets([ds2],
                            sector_id='LCC',
                            source_name="TESTS",
                            tile_count=(3, 3),
                            lettered_grid=True)
        all_files = glob(os.path.join(second_base_dir, 'TESTS_AII*.nc'))
        # 16 original tiles + 4 new tiles
        assert len(all_files) == 20

        # these tiles should be the right-most edge of the first image
        first_right_edge_files = [
            x for x in first_files
            if 'P02' in x or 'P04' in x or 'V02' in x or 'V04' in x
        ]
        for new_file in first_right_edge_files:
            orig_file = new_file.replace(second_base_dir, first_base_dir)
            orig_nc = xr.open_dataset(orig_file)
            orig_data = orig_nc['data'].values
            if not np.isnan(orig_data).any():
                # we only care about the tiles that had NaNs originally
                continue

            new_nc = xr.open_dataset(new_file)
            new_data = new_nc['data'].values
            # there should be at least some areas of the file
            # that old data was present and hasn't been replaced
            np.testing.assert_allclose(orig_data[:, :20], new_data[:, :20])
            # it isn't exactly 200 because the tiles aren't aligned with the
            # data (the left-most tile doesn't have data until some columns
            # in), but it should be at least that many columns
            assert np.isnan(orig_data[:, 200:]).all()
            assert not np.isnan(new_data[:, 200:]).all()
Esempio n. 60
0
def estimate_motion(varr: xr.DataArray,
                    dim="frame",
                    npart=3,
                    chunk_nfm: Optional[int] = None,
                    **kwargs) -> xr.DataArray:
    """
    Estimate motion for each frame of the input movie data.

    This function estimates motion using a recursive approach. The movie is
    splitted into chunks of `npart` frames and motion estimation is carried out
    within each chunk relative to the middle frame, then a template is generated
    for each chunk by aggregating the motion-corrected frames within each chunk.
    Next, every `npart` chunks are grouped together, and motion estimation is
    carried out within each group relative to the middle chunk using the
    aggregated templates. The chunk-level motions are added on top of the
    previous within-chunk level motions. This step is then repeated recursively
    until we are left with a single chunk representing the full movie, at which
    point the motion estimation is finished.

    The motion estimation itself is carried out with fft-based phase correlation
    by default. Alternatively, non-rigid motion correction can be carried out by
    modelling the motion of each frame as translations of individual vertices of
    a smooth BSpline mesh. The estimation of the translations can then be find
    by gradient descent using correlation between frames as objective. This
    feature is currently experimental. Additionally, various correction
    procedures can be carry out to filter out frames not suited as template for
    motion correction, or to correct for large false shifts when the quality of
    templates are low.

    Parameters
    ----------
    varr : xr.DataArray
        Input movie data.
    dim : str, optional
        The dimension along which motion estimation should be carried out. By
        default `"frame"`.
    npart : int, optional
        Number of frames/chunks to combine for the recursive algorithm. By
        default `3`.
    chunk_nfm : int, optional
        Number of frames in each parallel task. Note that this only affects dask
        graph construction, but not the recursion of the algorithm. If `None`
        then the dask chunksize along `dim` will be used. By default `None`.

    Keyword Arguments
    -----------------
    alt_error : float, optional
        Error threshold between estimated shifts from two alternative methods,
        specified in pixels. If not `None`, then for each chunk during
        recursion, the first and last frame of that chunk will be returned in
        addition to the aggregated template. And when estimating motion between
        chunks, the estimation will be carried out twice: once using the
        aggregated templates, once using the consecutive first/last frames
        between chunks. The result of these two methods will then be compared.
        If their absolute difference is larger than `alt_error`, then the result
        with smaller absolute value (closer to zero shifts) will be used. This
        is useful to correct for cases where activities of cells are sparse and
        changing across chunks, leading to wrong features being matched in
        aggregated templates. If `None` then no additional checking will be
        performed. By default `5`.
    aggregation : str, optional
        How frames should be aggregated to generate the template for each chunk.
        Should be either "mean" or "max". By default `"mean"`.
    upsample : int, optional
        The upsample factor passed to
        :func:`skimage.registration.phase_cross_correlation` to achieve
        sub-pixel accuracy.
    circ_thres : float, optional
        The circularity threshold to check whether a frame can serve as a good
        template for estimating motion. If not `None`, then for each frame a
        comparison image is computed using :func:`cv2.matchTemplate` between the
        frame and zero-padded version (up to `max_sh`) using
        `cv2.TM_SQDIFF_NORMED`. The comparison image of a good template should
        only have `< 1` values around the center and the `< 1` region should be
        circular. Hence the circularity defined as `4 * np.pi * (area /
        (perimeter ** 2))` for the `< 1` region is computed, and any frame with
        circularity smaller than `circ_thres` is excluded from propagation of
        shifts and aggregation of templates. By default `None`.
    max_sh : int, optional
        Amount of zero padding when checking for the quality of frames,
        specified in pixels. Only used if `circ_thres is not None`. See
        `circ_thres` for more detail. By default `100`.
    mesh_size : Tuple[int, int], optional
        Number of control points for the BSpline mesh in each dimension,
        specified in the order ("height", "width"). If not `None` then the
        experimental non-rigid motion estimation is enabled. By default `None`
    niter : int, optional
        Max number of iteration for the gradient descent process of estimation
        BSpline parameters. Only used if `mesh_size is not None`. By default
        `100`.
    bin_thres : int, optional
        Intensity threshold for binarizing the frames. The binarized frame will
        be used as masks for non-rigid motion estimation, where only pixels in
        the mask will be used to evaluate the gradient during optimization.
        Significantly improve performance but sacrifice accuracy of estimation
        for dim regions. Only used if `mesh_size is not None`. By default
        `None`.

    Returns
    -------
    motion : xr.DataArray
        Estimated motion for each frame. Has two dimensions `dim` and
        `"shift_dim"` representing rigid shifts in each direction if `mesh_size
        is None`, otherwise has four dimensions: `dim`, `"grid0"`, `"grid1"` and
        `"shift_dim"` representing shifts for each mesh grid control point.

    See Also
    --------
    :doc:`simpleitk:registrationOverview` :
        for overview of the non-rigid estimation method
    """
    varr = varr.transpose(..., dim, "height", "width")
    loop_dims = list(set(varr.dims) - set(["height", "width", dim]))
    if npart is None:
        # by default use a npart that result in two layers of recursion
        npart = max(3, int(np.ceil((varr.sizes[dim] / chunk_nfm)**(1 / 2))))
    if loop_dims:
        loop_labs = [varr.coords[d].values for d in loop_dims]
        res_dict = dict()
        for lab in itt.product(*loop_labs):
            va = varr.sel(
                {loop_dims[i]: lab[i]
                 for i in range(len(loop_dims))})
            vmax, sh = est_motion_part(va.data, npart, chunk_nfm, **kwargs)
            if kwargs.get("mesh_size", None):
                sh = xr.DataArray(
                    sh,
                    dims=[dim, "shift_dim", "grid0", "grid1"],
                    coords={
                        dim: va.coords[dim].values,
                        "shift_dim": ["height", "width"],
                    },
                )
            else:
                sh = xr.DataArray(
                    sh,
                    dims=[dim, "shift_dim"],
                    coords={
                        dim: va.coords[dim].values,
                        "shift_dim": ["height", "width"],
                    },
                )
            res_dict[lab] = sh.assign_coords(
                **{k: v
                   for k, v in zip(loop_dims, lab)})
        sh = xrconcat_recursive(res_dict, loop_dims)
    else:
        vmax, sh = est_motion_part(varr.data, npart, chunk_nfm, **kwargs)
        if kwargs.get("mesh_size", None):
            sh = xr.DataArray(
                sh,
                dims=[dim, "shift_dim", "grid0", "grid1"],
                coords={
                    dim: varr.coords[dim].values,
                    "shift_dim": ["height", "width"],
                },
            )
        else:
            sh = xr.DataArray(
                sh,
                dims=[dim, "shift_dim"],
                coords={
                    dim: varr.coords[dim].values,
                    "shift_dim": ["height", "width"],
                },
            )
    return sh