Esempio n. 1
0
 def test_decode_cf_with_drop_variables(self):
     original = Dataset({
         't': ('t', [0, 1, 2], {
             'units': 'days since 2000-01-01'
         }),
         'x': ("x", [9, 8, 7], {
             'units': 'km'
         }),
         'foo': (('t', 'x'), [[0, 0, 0], [1, 1, 1], [2, 2, 2]], {
             'units': 'bar'
         }),
         'y': ('t', [5, 10, -999], {
             '_FillValue': -999
         })
     })
     expected = Dataset({
         't':
         pd.date_range('2000-01-01', periods=3),
         'x': ("x", [0, 1, 2]),
         'foo': (('t', 'x'), [[0, 0, 0], [1, 1, 1], [2, 2, 2]], {
             'units': 'bar'
         }),
         'y': ('t', [5, 10, np.nan])
     })
     actual = conventions.decode_cf(original, drop_variables=("x", ))
     actual2 = conventions.decode_cf(original, drop_variables="x")
     self.assertDatasetIdentical(expected, actual)
     self.assertDatasetIdentical(expected, actual2)
Esempio n. 2
0
 def test_roundtrip_object_dtype(self):
     floats = np.array([0.0, 0.0, 1.0, 2.0, 3.0], dtype=object)
     floats_nans = np.array([np.nan, np.nan, 1.0, 2.0, 3.0], dtype=object)
     letters = np.array(['ab', 'cdef', 'g'], dtype=object)
     letters_nans = np.array(['ab', 'cdef', np.nan], dtype=object)
     all_nans = np.array([np.nan, np.nan], dtype=object)
     original = Dataset({
         'floats': ('a', floats),
         'floats_nans': ('a', floats_nans),
         'letters': ('b', letters),
         'letters_nans': ('b', letters_nans),
         'all_nans': ('c', all_nans),
         'nan': ([], np.nan)
     })
     expected = original.copy(deep=True)
     if isinstance(self, Only32BitTypes):
         # for netCDF3 tests, expect the results to come back as characters
         expected['letters_nans'] = expected['letters_nans'].astype('S')
         expected['letters'] = expected['letters'].astype('S')
     with self.roundtrip(original) as actual:
         try:
             self.assertDatasetIdentical(expected, actual)
         except AssertionError:
             # Most stores use '' for nans in strings, but some don't
             # first try the ideal case (where the store returns exactly)
             # the original Dataset), then try a more realistic case.
             # ScipyDataTest, NetCDF3ViaNetCDF4DataTest and NetCDF4DataTest
             # all end up using this case.
             expected['letters_nans'][-1] = ''
             self.assertDatasetIdentical(expected, actual)
Esempio n. 3
0
    def test_groupby(self):
        data = Dataset({'x': ('x', list('abc')),
                        'c': ('x', [0, 1, 0]),
                        'z': (['x', 'y'], np.random.randn(3, 5))})
        groupby = data.groupby('x')
        self.assertEqual(len(groupby), 3)
        expected_groups = {'a': 0, 'b': 1, 'c': 2}
        self.assertEqual(groupby.groups, expected_groups)
        expected_items = [('a', data.indexed(x=0)),
                          ('b', data.indexed(x=1)),
                          ('c', data.indexed(x=2))]
        self.assertEqual(list(groupby), expected_items)

        identity = lambda x: x
        for k in ['x', 'c', 'y']:
            actual = data.groupby(k, squeeze=False).apply(identity)
            self.assertEqual(data, actual)

        data = create_test_data()
        for n, (t, sub) in enumerate(list(data.groupby('dim1'))[:3]):
            self.assertEqual(data['dim1'][n], t)
            self.assertVariableEqual(data['var1'][n], sub['var1'])
            self.assertVariableEqual(data['var2'][n], sub['var2'])
            self.assertVariableEqual(data['var3'][:, n], sub['var3'])

        # TODO: test the other edge cases
        with self.assertRaisesRegexp(ValueError, 'must be 1 dimensional'):
            data.groupby('var1')
        with self.assertRaisesRegexp(ValueError, 'length does not match'):
            data.groupby(data['dim1'][:3])
Esempio n. 4
0
    def test_groupby_sum(self):
        array = self.make_groupby_example_array()
        grouped = array.groupby('abc')

        expected_sum_all = Dataset(
            {'foo': Variable(['abc'], np.array([self.x[:, :9].sum(),
                                                self.x[:, 10:].sum(),
                                                self.x[:, 9:10].sum()]).T),
             'abc': Variable(['abc'], np.array(['a', 'b', 'c']))})['foo']
        self.assertDataArrayAllClose(expected_sum_all, grouped.reduce(np.sum))
        self.assertDataArrayAllClose(expected_sum_all, grouped.sum())

        expected = DataArray([array['y'].values[idx].sum() for idx
                              in [slice(9), slice(10, None), slice(9, 10)]],
                             [['a', 'b', 'c']], ['abc'])
        actual = array['y'].groupby('abc').apply(np.sum)
        self.assertDataArrayAllClose(expected, actual)
        actual = array['y'].groupby('abc').sum()
        self.assertDataArrayAllClose(expected, actual)

        expected_sum_axis1 = Dataset(
            {'foo': (['x', 'abc'], np.array([self.x[:, :9].sum(1),
                                             self.x[:, 10:].sum(1),
                                             self.x[:, 9:10].sum(1)]).T),
             'x': self.ds['x'],
             'abc': Variable(['abc'], np.array(['a', 'b', 'c']))})['foo']
        self.assertDataArrayAllClose(expected_sum_axis1,
                                     grouped.reduce(np.sum, 'y'))
        self.assertDataArrayAllClose(expected_sum_axis1, grouped.sum('y'))
Esempio n. 5
0
 def setUp(self):
     self.attrs = {'attr1': 'value1', 'attr2': 2929}
     self.x = np.random.random((10, 20))
     self.v = Variable(['x', 'y'], self.x)
     self.va = Variable(['x', 'y'], self.x, self.attrs)
     self.ds = Dataset({'foo': self.v})
     self.dv = self.ds['foo']
Esempio n. 6
0
    def test_roundtrip_strings_with_fill_value(self):
        values = np.array(['ab', 'cdef', np.nan], dtype=object)
        encoding = {'_FillValue': np.string_('X'), 'dtype': np.dtype('S1')}
        original = Dataset({'x': ('t', values, {}, encoding)})
        expected = original.copy(deep=True)
        expected['x'][:2] = values[:2].astype('S')
        with self.roundtrip(original) as actual:
            self.assertDatasetIdentical(expected, actual)

        original = Dataset({'x': ('t', values, {}, {'_FillValue': '\x00'})})
        if not isinstance(self, Only32BitTypes):
            # these stores can save unicode strings
            expected = original.copy(deep=True)
        if isinstance(self, BaseNetCDF4Test):
            # netCDF4 can't keep track of an empty _FillValue for VLEN
            # variables
            expected['x'][-1] = ''
        elif (isinstance(self, (NetCDF3ViaNetCDF4DataTest,
                                NetCDF4ClassicViaNetCDF4DataTest)) or
              (has_netCDF4 and type(self) is GenericNetCDFDataTest)):
            # netCDF4 can't keep track of an empty _FillValue for nc3, either:
            # https://github.com/Unidata/netcdf4-python/issues/273
            expected['x'][-1] = np.string_('')
        with self.roundtrip(original) as actual:
            self.assertDatasetIdentical(expected, actual)
Esempio n. 7
0
 def test_roundtrip_object_dtype(self):
     floats = np.array([0.0, 0.0, 1.0, 2.0, 3.0], dtype=object)
     floats_nans = np.array([np.nan, np.nan, 1.0, 2.0, 3.0], dtype=object)
     letters = np.array(['ab', 'cdef', 'g'], dtype=object)
     letters_nans = np.array(['ab', 'cdef', np.nan], dtype=object)
     all_nans = np.array([np.nan, np.nan], dtype=object)
     original = Dataset({
         'floats': ('a', floats),
         'floats_nans': ('a', floats_nans),
         'letters': ('b', letters),
         'letters_nans': ('b', letters_nans),
         'all_nans': ('c', all_nans),
         'nan': ([], np.nan)
     })
     if PY3 and type(self) is ScipyDataTest:
         # see the note under test_zero_dimensional_variable
         del original['nan']
     expected = original.copy(deep=True)
     expected['letters_nans'][-1] = ''
     if type(self) is not NetCDF4DataTest:
         # for netCDF3 tests, expect the results to come back as characters
         expected['letters_nans'] = expected['letters_nans'].astype('S')
         expected['letters'] = expected['letters'].astype('S')
     with self.roundtrip(original) as actual:
         self.assertDatasetIdentical(expected, actual)
Esempio n. 8
0
    def test_coordinates_encoding(self):
        def equals_latlon(obj):
            return obj == 'lat lon' or obj == 'lon lat'

        original = Dataset({'temp': ('x', [0, 1]), 'precip': ('x', [0, -1])},
                           {'lat': ('x', [2, 3]), 'lon': ('x', [4, 5])})
        with self.roundtrip(original) as actual:
            self.assertDatasetIdentical(actual, original)
        with create_tmp_file() as tmp_file:
            original.to_netcdf(tmp_file)
            with open_dataset(tmp_file, decode_coords=False) as ds:
                self.assertTrue(equals_latlon(ds['temp'].attrs['coordinates']))
                self.assertTrue(equals_latlon(ds['precip'].attrs['coordinates']))
                self.assertNotIn('coordinates', ds.attrs)
                self.assertNotIn('coordinates', ds['lat'].attrs)
                self.assertNotIn('coordinates', ds['lon'].attrs)

        modified = original.drop(['temp', 'precip'])
        with self.roundtrip(modified) as actual:
            self.assertDatasetIdentical(actual, modified)
        with create_tmp_file() as tmp_file:
            modified.to_netcdf(tmp_file)
            with open_dataset(tmp_file, decode_coords=False) as ds:
                self.assertTrue(equals_latlon(ds.attrs['coordinates']))
                self.assertNotIn('coordinates', ds['lat'].attrs)
                self.assertNotIn('coordinates', ds['lon'].attrs)
Esempio n. 9
0
    def test_coordinates_encoding(self):
        def equals_latlon(obj):
            return obj == 'lat lon' or obj == 'lon lat'

        original = Dataset({
            'temp': ('x', [0, 1]),
            'precip': ('x', [0, -1])
        }, {
            'lat': ('x', [2, 3]),
            'lon': ('x', [4, 5])
        })
        with self.roundtrip(original) as actual:
            self.assertDatasetIdentical(actual, original)
        with create_tmp_file() as tmp_file:
            original.to_netcdf(tmp_file)
            with open_dataset(tmp_file, decode_coords=False) as ds:
                self.assertTrue(equals_latlon(ds['temp'].attrs['coordinates']))
                self.assertTrue(
                    equals_latlon(ds['precip'].attrs['coordinates']))
                self.assertNotIn('coordinates', ds.attrs)
                self.assertNotIn('coordinates', ds['lat'].attrs)
                self.assertNotIn('coordinates', ds['lon'].attrs)

        modified = original.drop(['temp', 'precip'])
        with self.roundtrip(modified) as actual:
            self.assertDatasetIdentical(actual, modified)
        with create_tmp_file() as tmp_file:
            modified.to_netcdf(tmp_file)
            with open_dataset(tmp_file, decode_coords=False) as ds:
                self.assertTrue(equals_latlon(ds.attrs['coordinates']))
                self.assertNotIn('coordinates', ds['lat'].attrs)
                self.assertNotIn('coordinates', ds['lon'].attrs)
Esempio n. 10
0
    def test_roundtrip_coordinates(self):
        original = Dataset({'foo': ('x', [0, 1])}, {
            'x': [2, 3],
            'y': ('a', [42]),
            'z': ('x', [4, 5])
        })

        with self.roundtrip(original) as actual:
            self.assertDatasetIdentical(original, actual)

        expected = original.drop('foo')
        with self.roundtrip(expected) as actual:
            self.assertDatasetIdentical(expected, actual)

        expected = original.copy()
        expected.attrs['coordinates'] = 'something random'
        with self.assertRaisesRegexp(ValueError, 'cannot serialize'):
            with self.roundtrip(expected):
                pass

        expected = original.copy(deep=True)
        expected['foo'].attrs['coordinates'] = 'something random'
        with self.assertRaisesRegexp(ValueError, 'cannot serialize'):
            with self.roundtrip(expected):
                pass
Esempio n. 11
0
    def test_encoding_kwarg(self):
        ds = Dataset({'x': ('y', np.arange(10.0))})
        kwargs = dict(encoding={'x': {'dtype': 'f4'}})
        with self.roundtrip(ds, save_kwargs=kwargs) as actual:
            self.assertEqual(actual.x.encoding['dtype'], 'f4')
        self.assertEqual(ds.x.encoding, {})

        kwargs = dict(encoding={'x': {'foo': 'bar'}})
        with self.assertRaisesRegexp(ValueError, 'unexpected encoding'):
            with self.roundtrip(ds, save_kwargs=kwargs) as actual:
                pass

        kwargs = dict(encoding={'x': 'foo'})
        with self.assertRaisesRegexp(ValueError, 'must be castable'):
            with self.roundtrip(ds, save_kwargs=kwargs) as actual:
                pass

        kwargs = dict(encoding={'invalid': {}})
        with self.assertRaises(KeyError):
            with self.roundtrip(ds, save_kwargs=kwargs) as actual:
                pass

        ds = Dataset({'t': pd.date_range('2000-01-01', periods=3)})
        units = 'days since 1900-01-01'
        kwargs = dict(encoding={'t': {'units': units}})
        with self.roundtrip(ds, save_kwargs=kwargs) as actual:
            self.assertEqual(actual.t.encoding['units'], units)
            self.assertDatasetIdentical(actual, ds)
Esempio n. 12
0
 def test_open_and_do_math(self):
     original = Dataset({'foo': ('x', np.random.randn(10))})
     with create_tmp_file() as tmp:
         original.to_netcdf(tmp)
         with open_mfdataset(tmp) as ds:
             actual = 1.0 * ds
             self.assertDatasetAllClose(original, actual)
Esempio n. 13
0
 def test_coordinate(self):
     a = Dataset()
     vec = np.random.random((10, ))
     attributes = {'foo': 'bar'}
     a['x'] = ('x', vec, attributes)
     self.assertTrue('x' in a.coordinates)
     self.assertIsInstance(a.coordinates['x'].as_index, pd.Index)
     self.assertVariableEqual(a.coordinates['x'], a.variables['x'])
     b = Dataset()
     b['x'] = ('x', vec, attributes)
     self.assertVariableEqual(a['x'], b['x'])
     self.assertEqual(a.dimensions, b.dimensions)
     # this should work
     a['x'] = ('x', vec[:5])
     a['z'] = ('x', np.arange(5))
     with self.assertRaises(ValueError):
         # now it shouldn't, since there is a conflicting length
         a['x'] = ('x', vec[:4])
     arr = np.random.random((
         10,
         1,
     ))
     scal = np.array(0)
     with self.assertRaises(ValueError):
         a['y'] = ('y', arr)
     with self.assertRaises(ValueError):
         a['y'] = ('y', scal)
     self.assertTrue('y' not in a.dimensions)
Esempio n. 14
0
 def test_open_and_do_math(self):
     original = Dataset({'foo': ('x', np.random.randn(10))})
     with create_tmp_file() as tmp:
         original.to_netcdf(tmp)
         with open_mfdataset(tmp) as ds:
             actual = 1.0 * ds
             self.assertDatasetAllClose(original, actual)
Esempio n. 15
0
 def test_roundtrip_object_dtype(self):
     floats = np.array([0.0, 0.0, 1.0, 2.0, 3.0], dtype=object)
     floats_nans = np.array([np.nan, np.nan, 1.0, 2.0, 3.0], dtype=object)
     letters = np.array(['ab', 'cdef', 'g'], dtype=object)
     letters_nans = np.array(['ab', 'cdef', np.nan], dtype=object)
     all_nans = np.array([np.nan, np.nan], dtype=object)
     original = Dataset({'floats': ('a', floats),
                         'floats_nans': ('a', floats_nans),
                         'letters': ('b', letters),
                         'letters_nans': ('b', letters_nans),
                         'all_nans': ('c', all_nans),
                         'nan': ([], np.nan)})
     if PY3 and type(self) is ScipyDataTest:
         # see the note under test_zero_dimensional_variable
         del original['nan']
     expected = original.copy(deep=True)
     if type(self) in [NetCDF3ViaNetCDF4DataTest, ScipyDataTest]:
         # for netCDF3 tests, expect the results to come back as characters
         expected['letters_nans'] = expected['letters_nans'].astype('S')
         expected['letters'] = expected['letters'].astype('S')
     with self.roundtrip(original) as actual:
         try:
             self.assertDatasetIdentical(expected, actual)
         except AssertionError:
             # Most stores use '' for nans in strings, but some don't
             # first try the ideal case (where the store returns exactly)
             # the original Dataset), then try a more realistic case.
             # ScipyDataTest, NetCDF3ViaNetCDF4DataTest and NetCDF4DataTest
             # all end up using this case.
             expected['letters_nans'][-1] = ''
             self.assertDatasetIdentical(expected, actual)
Esempio n. 16
0
    def test_pipe_tuple_error(self):
        df = Dataset({'A': ('x', [1, 2, 3])})
        f = lambda x, y: y
        with self.assertRaises(ValueError):
            df.pipe((f, 'y'), x=1, y=0)

        with self.assertRaises(ValueError):
            df.A.pipe((f, 'y'), x=1, y=0)
Esempio n. 17
0
    def test_pipe_tuple(self):
        df = Dataset({'A': ('x', [1, 2, 3])})
        f = lambda x, y: y
        result = df.pipe((f, 'y'), 0)
        self.assertDatasetIdentical(result, df)

        result = df.A.pipe((f, 'y'), 0)
        self.assertDataArrayIdentical(result, df.A)
Esempio n. 18
0
    def test_pipe_tuple_error(self):
        df = Dataset({"A": ("x", [1, 2, 3])})
        f = lambda x, y: y
        with self.assertRaises(ValueError):
            df.pipe((f, "y"), x=1, y=0)

        with self.assertRaises(ValueError):
            df.A.pipe((f, "y"), x=1, y=0)
Esempio n. 19
0
 def test_preprocess_mfdataset(self):
     original = Dataset({'foo': ('x', np.random.randn(10))})
     with create_tmp_file() as tmp:
         original.to_netcdf(tmp)
         preprocess = lambda ds: ds.assign_coords(z=0)
         expected = preprocess(original)
         with open_mfdataset(tmp, preprocess=preprocess) as actual:
             self.assertDatasetIdentical(expected, actual)
Esempio n. 20
0
    def test_pipe_tuple(self):
        df = Dataset({"A": ("x", [1, 2, 3])})
        f = lambda x, y: y
        result = df.pipe((f, "y"), 0)
        self.assertDatasetIdentical(result, df)

        result = df.A.pipe((f, "y"), 0)
        self.assertDataArrayIdentical(result, df.A)
Esempio n. 21
0
 def test_preprocess_mfdataset(self):
     original = Dataset({'foo': ('x', np.random.randn(10))})
     with create_tmp_file() as tmp:
         original.to_netcdf(tmp)
         preprocess = lambda ds: ds.assign_coords(z=0)
         expected = preprocess(original)
         with open_mfdataset(tmp, preprocess=preprocess) as actual:
             self.assertDatasetIdentical(expected, actual)
Esempio n. 22
0
 def test_save_mfdataset_roundtrip(self):
     original = Dataset({'foo': ('x', np.random.randn(10))})
     datasets = [original.isel(x=slice(5)), original.isel(x=slice(5, 10))]
     with create_tmp_file() as tmp1:
         with create_tmp_file() as tmp2:
             save_mfdataset(datasets, [tmp1, tmp2])
             with open_mfdataset([tmp1, tmp2]) as actual:
                 self.assertDatasetIdentical(actual, original)
Esempio n. 23
0
 def test_save_mfdataset_roundtrip(self):
     original = Dataset({'foo': ('x', np.random.randn(10))})
     datasets = [original.isel(x=slice(5)),
                 original.isel(x=slice(5, 10))]
     with create_tmp_file() as tmp1:
         with create_tmp_file() as tmp2:
             save_mfdataset(datasets, [tmp1, tmp2])
             with open_mfdataset([tmp1, tmp2]) as actual:
                 self.assertDatasetIdentical(actual, original)
Esempio n. 24
0
    def test_reduce_argmin(self):
        # regression test for #205
        ds = Dataset({'a': ('x', [0, 1])})
        expected = Dataset({'a': ([], 0)})
        actual = ds.argmin()
        self.assertDatasetIdentical(expected, actual)

        actual = ds.argmin('x')
        self.assertDatasetIdentical(expected, actual)
Esempio n. 25
0
    def test_pipe(self):
        df = Dataset({"A": ("x", [1, 2, 3])})
        f = lambda x, y: x ** y
        result = df.pipe(f, 2)
        expected = Dataset({"A": ("x", [1, 4, 9])})
        self.assertDatasetIdentical(result, expected)

        result = df.A.pipe(f, 2)
        self.assertDataArrayIdentical(result, expected.A)
Esempio n. 26
0
    def test_pipe(self):
        df = Dataset({'A': ('x', [1, 2, 3])})
        f = lambda x, y: x**y
        result = df.pipe(f, 2)
        expected = Dataset({'A': ('x', [1, 4, 9])})
        self.assertDatasetIdentical(result, expected)

        result = df.A.pipe(f, 2)
        self.assertDataArrayIdentical(result, expected.A)
Esempio n. 27
0
    def test_weakrefs(self):
        example = Dataset({'foo': ('x', np.arange(5.0))})
        expected = example.rename({'foo': 'bar', 'x': 'y'})

        with create_tmp_file() as tmp_file:
            example.to_netcdf(tmp_file, engine='scipy')
            on_disk = open_dataset(tmp_file, engine='pynio')
            actual = on_disk.rename({'foo': 'bar', 'x': 'y'})
            del on_disk  # trigger garbage collection
            self.assertDatasetIdentical(actual, expected)
Esempio n. 28
0
    def test_variable_order(self):
        # doesn't work with scipy or h5py :(
        ds = Dataset()
        ds['a'] = 1
        ds['z'] = 2
        ds['b'] = 3
        ds.coords['c'] = 4

        with self.roundtrip(ds) as actual:
            self.assertEqual(list(ds), list(actual))
Esempio n. 29
0
 def test_lock(self):
     original = Dataset({'foo': ('x', np.random.randn(10))})
     with create_tmp_file() as tmp:
         original.to_netcdf(tmp)
         with open_dataset(tmp, chunks=10) as ds:
             task = ds.foo.data.dask[ds.foo.data.name, 0]
             self.assertIsInstance(task[-1], type(Lock()))
         with open_mfdataset(tmp) as ds:
             task = ds.foo.data.dask[ds.foo.data.name, 0]
             self.assertIsInstance(task[-1], type(Lock()))
Esempio n. 30
0
    def test_variable_order(self):
        # doesn't work with scipy or h5py :(
        ds = Dataset()
        ds['a'] = 1
        ds['z'] = 2
        ds['b'] = 3
        ds.coords['c'] = 4

        with self.roundtrip(ds) as actual:
            self.assertEqual(list(ds), list(actual))
Esempio n. 31
0
 def test_concat_constant_index(self):
     # GH425
     ds1 = Dataset({'foo': 1.5}, {'y': 1})
     ds2 = Dataset({'foo': 2.5}, {'y': 1})
     expected = Dataset({'foo': ('y', [1.5, 2.5]), 'y': [1, 1]})
     for mode in ['different', 'all', ['foo']]:
         actual = concat([ds1, ds2], 'y', data_vars=mode)
         self.assertDatasetIdentical(expected, actual)
     with self.assertRaisesRegexp(ValueError, 'not equal across datasets'):
         concat([ds1, ds2], 'y', data_vars='minimal')
Esempio n. 32
0
    def test_weakrefs(self):
        example = Dataset({'foo': ('x', np.arange(5.0))})
        expected = example.rename({'foo': 'bar', 'x': 'y'})

        with create_tmp_file() as tmp_file:
            example.to_netcdf(tmp_file, engine='scipy')
            on_disk = open_dataset(tmp_file, engine='pynio')
            actual = on_disk.rename({'foo': 'bar', 'x': 'y'})
            del on_disk  # trigger garbage collection
            self.assertDatasetIdentical(actual, expected)
Esempio n. 33
0
    def test_groupby_returns_new_type(self):
        data = Dataset({'z': (['x', 'y'], np.random.randn(3, 5))})

        actual = data.groupby('x').apply(lambda ds: ds['z'])
        expected = data['z']
        self.assertDataArrayIdentical(expected, actual)

        actual = data['z'].groupby('x').apply(lambda x: x.to_dataset())
        expected = data
        self.assertDatasetIdentical(expected, actual)
Esempio n. 34
0
    def test_dims(self):
        arr = self.dv
        self.assertEqual(arr.dims, ('x', 'y'))

        arr.dims = ('w', 'z')
        self.assertEqual(arr.dims, ('w', 'z'))

        x = Dataset({'x': ('x', np.arange(5))})['x']
        x.dims = ('y',)
        self.assertEqual(x.dims, ('y',))
        self.assertEqual(x.name, 'y')
Esempio n. 35
0
 def test_open_dataset(self):
     original = Dataset({'foo': ('x', np.random.randn(10))})
     with create_tmp_file() as tmp:
         original.to_netcdf(tmp)
         with open_dataset(tmp, chunks={'x': 5}) as actual:
             self.assertIsInstance(actual.foo.variable.data, da.Array)
             self.assertEqual(actual.foo.variable.data.chunks, ((5, 5),))
             self.assertDatasetAllClose(original, actual)
         with open_dataset(tmp) as actual:
             self.assertIsInstance(actual.foo.variable.data, np.ndarray)
             self.assertDatasetAllClose(original, actual)
Esempio n. 36
0
 def test_dataset(self):
     original = Dataset({
         't': ('t', [0, 1, 2], {'units': 'days since 2000-01-01'}),
         'foo': ('t', [0, 0, 0], {'coordinates': 'y', 'units': 'bar'}),
         'y': ('t', [5, 10, -999], {'_FillValue': -999})
     })
     expected = Dataset({'foo': ('t', [0, 0, 0], {'units': 'bar'})},
                        {'t': pd.date_range('2000-01-01', periods=3),
                         'y': ('t', [5.0, 10.0, np.nan])})
     actual = conventions.decode_cf(original)
     self.assertDatasetIdentical(expected, actual)
Esempio n. 37
0
 def test_concat_coords(self):
     data = Dataset({'foo': ('x', np.random.randn(10))})
     expected = data.assign_coords(c=('x', [0] * 5 + [1] * 5))
     objs = [data.isel(x=slice(5)).assign_coords(c=0),
             data.isel(x=slice(5, None)).assign_coords(c=1)]
     for coords in ['different', 'all', ['c']]:
         actual = concat(objs, dim='x', coords=coords)
         self.assertDatasetIdentical(expected, actual)
     for coords in ['minimal', []]:
         with self.assertRaisesRegexp(ValueError, 'not equal across'):
             concat(objs, dim='x', coords=coords)
Esempio n. 38
0
 def test_squeeze(self):
     data = Dataset({'foo': (['x', 'y', 'z'], [[[1], [2]]])})
     for args in [[], [['x']], [['x', 'z']]]:
         def get_args(v):
             return [set(args[0]) & set(v.dimensions)] if args else []
         expected = Dataset({k: v.squeeze(*get_args(v))
                            for k, v in data.variables.iteritems()})
         self.assertDatasetIdentical(expected, data.squeeze(*args))
     # invalid squeeze
     with self.assertRaisesRegexp(ValueError, 'cannot select a dimension'):
         data.squeeze('y')
Esempio n. 39
0
    def test_dims(self):
        arr = self.dv
        self.assertEqual(arr.dims, ('x', 'y'))

        arr.dims = ('w', 'z')
        self.assertEqual(arr.dims, ('w', 'z'))

        x = Dataset({'x': ('x', np.arange(5))})['x']
        x.dims = ('y',)
        self.assertEqual(x.dims, ('y',))
        self.assertEqual(x.name, 'y')
Esempio n. 40
0
    def test_dataset_repr_with_netcdf4_datetimes(self):
        # regression test for #347
        attrs = {'units': 'days since 0001-01-01', 'calendar': 'noleap'}
        with warnings.catch_warnings():
            warnings.filterwarnings('ignore', 'unable to decode time')
            ds = decode_cf(Dataset({'time': ('time', [0, 1], attrs)}))
            self.assertIn('(time) object', repr(ds))

        attrs = {'units': 'days since 1900-01-01'}
        ds = decode_cf(Dataset({'time': ('time', [0, 1], attrs)}))
        self.assertIn('(time) datetime64[ns]', repr(ds))
Esempio n. 41
0
    def test_simultaneous_compute(self):
        ds = Dataset({'foo': ('x', range(5)),
                      'bar': ('x', range(5))}).chunk()

        count = [0]
        def counting_get(*args, **kwargs):
            count[0] += 1
            return dask.get(*args, **kwargs)

        with dask.set_options(get=counting_get):
            ds.load()
        self.assertEqual(count[0], 1)
Esempio n. 42
0
    def test_simultaneous_compute(self):
        ds = Dataset({"foo": ("x", range(5)), "bar": ("x", range(5))}).chunk()

        count = [0]

        def counting_get(*args, **kwargs):
            count[0] += 1
            return dask.get(*args, **kwargs)

        with dask.set_options(get=counting_get):
            ds.load()
        self.assertEqual(count[0], 1)
Esempio n. 43
0
 def test_init(self):
     var1 = Variable('x', 2 * np.arange(100))
     var2 = Variable('x', np.arange(1000))
     var3 = Variable(['x', 'y'], np.arange(1000).reshape(100, 10))
     with self.assertRaisesRegexp(ValueError, 'but already exists'):
         Dataset({'a': var1, 'b': var2})
     with self.assertRaisesRegexp(ValueError, 'must be defined with 1-d'):
         Dataset({'a': var1, 'x': var3})
     # verify handling of DataArrays
     expected = Dataset({'x': var1, 'z': var3})
     actual = Dataset({'z': expected['z']})
     self.assertDatasetIdentical(expected, actual)
Esempio n. 44
0
 def test_lazy_load(self):
     store = InaccessibleVariableDataStore()
     store.set_dimension('dim', 10)
     store.set_variable('dim', XArray(('dim'),
                                       np.arange(10)))
     store.set_variable('var', XArray(('dim'),
                                       np.random.uniform(size=10)))
     ds = Dataset()
     ds = ds.load_store(store, decode_cf=False)
     self.assertRaises(UnexpectedDataAccess, lambda: ds['var'].data)
     ds = ds.load_store(store, decode_cf=True)
     self.assertRaises(UnexpectedDataAccess, lambda: ds['var'].data)
Esempio n. 45
0
    def test_simultaneous_compute(self):
        ds = Dataset({'foo': ('x', range(5)),
                      'bar': ('x', range(5))}).reblock()

        count = np.array(0)
        def counting_get(*args, **kwargs):
            count[...] += 1
            return dask.get(*args, **kwargs)

        with dask.set_options(get=counting_get):
            ds.load_data()
        self.assertEqual(count, 1)
Esempio n. 46
0
    def test_reset_coords(self):
        data = DataArray(np.zeros((3, 4)), {
            'bar': ('x', ['a', 'b', 'c']),
            'baz': ('y', range(4))
        },
                         dims=['x', 'y'],
                         name='foo')

        actual = data.reset_coords()
        expected = Dataset({
            'foo': (['x', 'y'], np.zeros((3, 4))),
            'bar': ('x', ['a', 'b', 'c']),
            'baz': ('y', range(4))
        })
        self.assertDatasetIdentical(actual, expected)

        actual = data.reset_coords(['bar', 'baz'])
        self.assertDatasetIdentical(actual, expected)

        actual = data.reset_coords('bar')
        expected = Dataset(
            {
                'foo': (['x', 'y'], np.zeros((3, 4))),
                'bar': ('x', ['a', 'b', 'c'])
            }, {'baz': ('y', range(4))})
        self.assertDatasetIdentical(actual, expected)

        actual = data.reset_coords(['bar'])
        self.assertDatasetIdentical(actual, expected)

        actual = data.reset_coords(drop=True)
        expected = DataArray(np.zeros((3, 4)), dims=['x', 'y'], name='foo')
        self.assertDataArrayIdentical(actual, expected)

        actual = data.copy()
        actual.reset_coords(drop=True, inplace=True)
        self.assertDataArrayIdentical(actual, expected)

        actual = data.reset_coords('bar', drop=True)
        expected = DataArray(np.zeros((3, 4)), {'baz': ('y', range(4))},
                             dims=['x', 'y'],
                             name='foo')
        self.assertDataArrayIdentical(actual, expected)

        with self.assertRaisesRegexp(ValueError, 'cannot reset coord'):
            data.reset_coords(inplace=True)
        with self.assertRaises(KeyError):
            data.reset_coords('foo', drop=True)
        with self.assertRaisesRegexp(ValueError, 'cannot be found'):
            data.reset_coords('not_found')
        with self.assertRaisesRegexp(ValueError, 'cannot remove index'):
            data.reset_coords('y')
Esempio n. 47
0
 def test_concat_coords(self):
     data = Dataset({'foo': ('x', np.random.randn(10))})
     expected = data.assign_coords(c=('x', [0] * 5 + [1] * 5))
     objs = [
         data.isel(x=slice(5)).assign_coords(c=0),
         data.isel(x=slice(5, None)).assign_coords(c=1)
     ]
     for coords in ['different', 'all', ['c']]:
         actual = concat(objs, dim='x', coords=coords)
         self.assertDatasetIdentical(expected, actual)
     for coords in ['minimal', []]:
         with self.assertRaisesRegexp(ValueError, 'not equal across'):
             concat(objs, dim='x', coords=coords)
Esempio n. 48
0
 def test_lock(self):
     original = Dataset({'foo': ('x', np.random.randn(10))})
     with create_tmp_file() as tmp:
         original.to_netcdf(tmp, format='NETCDF3_CLASSIC')
         with open_dataset(tmp, chunks=10) as ds:
             task = ds.foo.data.dask[ds.foo.data.name, 0]
             self.assertIsInstance(task[-1], type(Lock()))
         with open_mfdataset(tmp) as ds:
             task = ds.foo.data.dask[ds.foo.data.name, 0]
             self.assertIsInstance(task[-1], type(Lock()))
         with open_mfdataset(tmp, engine='scipy') as ds:
             task = ds.foo.data.dask[ds.foo.data.name, 0]
             self.assertNotIsInstance(task[-1], type(Lock()))
Esempio n. 49
0
 def test_open_dataset(self):
     original = Dataset({'foo': ('x', np.random.randn(10))})
     with create_tmp_file() as tmp:
         original.to_netcdf(tmp)
         with open_dataset(tmp, chunks={'x': 5}) as actual:
             self.assertIsInstance(actual.foo.variable.data, da.Array)
             self.assertEqual(actual.foo.variable.data.chunks, ((5, 5), ))
             self.assertDatasetIdentical(original, actual)
         with open_dataset(tmp, chunks=5) as actual:
             self.assertDatasetIdentical(original, actual)
         with open_dataset(tmp) as actual:
             self.assertIsInstance(actual.foo.variable.data, np.ndarray)
             self.assertDatasetIdentical(original, actual)
Esempio n. 50
0
    def test_to_and_from_dataframe(self):
        x = np.random.randn(10)
        y = np.random.randn(10)
        t = list('abcdefghij')
        ds = Dataset(
            OrderedDict([
                ('a', ('t', x)),
                ('b', ('t', y)),
                ('t', ('t', t)),
            ]))
        expected = pd.DataFrame(np.array([x, y]).T,
                                columns=['a', 'b'],
                                index=pd.Index(t, name='t'))
        actual = ds.to_dataframe()
        # use the .equals method to check all DataFrame metadata
        assert expected.equals(actual), (expected, actual)

        # check roundtrip
        self.assertDatasetIdentical(ds, Dataset.from_dataframe(actual))

        # test a case with a MultiIndex
        w = np.random.randn(2, 3)
        ds = Dataset({'w': (('x', 'y'), w)})
        ds['y'] = ('y', list('abc'))
        exp_index = pd.MultiIndex.from_arrays(
            [[0, 0, 0, 1, 1, 1], ['a', 'b', 'c', 'a', 'b', 'c']],
            names=['x', 'y'])
        expected = pd.DataFrame(w.reshape(-1), columns=['w'], index=exp_index)
        actual = ds.to_dataframe()
        self.assertTrue(expected.equals(actual))

        # check roundtrip
        self.assertDatasetIdentical(ds, Dataset.from_dataframe(actual))
Esempio n. 51
0
def test_dataset():
    # need to create all the dimensions that GCMDataset likes
    # oceanic parameters, cartesian coordinates, doubly periodic
    H = 5000.
    Lx = 4e6
    Ly = 3e6
    Nz = 10
    Nx = 25
    Ny = 20
    dz = H / Nz
    dx = Lx / Nx
    dy = Ly / Ny

    ds = Dataset()
    ds.attrs['H'] = H
    ds.attrs['Lx'] = Lx
    ds.attrs['Ly'] = Ly
    ds.attrs['Nz'] = Nz
    ds.attrs['Nx'] = Nx
    ds.attrs['Ny'] = Ny
    ds.attrs['dz'] = dz
    ds.attrs['dx'] = dx
    ds.attrs['dy'] = dy
    # vertical grid
    ds['Z'] = ('Z', dz/2 + dz*np.arange(Nz))
    ds['Zp1'] = ('Zp1', dz*np.arange(Nz+1))
    ds['Zl'] = ('Zl', dz*np.arange(Nz))
    ds['Zu'] = ('Zu', dz + dz*np.arange(Nz))
    # vertical spacing
    ds['drF'] = ('Z', np.full(Nz, dz))
    ds['drC'] = ('Zp1', np.hstack([dz/2, np.full(Nz-1, dz), dz/2]))
    # horizontal grid
    ds['X'] = ('X', dx/2 + dx*np.arange(Nx))
    ds['Xp1'] = ('Xp1', dx*np.arange(Nx))
    ds['Y'] = ('Y', dy/2 + dy*np.arange(Ny))
    ds['Yp1'] = ('Yp1', dy*np.arange(Ny))
    xc, yc = np.meshgrid(ds.X, ds.Y)
    xg, yg = np.meshgrid(ds.Xp1, ds.Yp1)
    ds['XC'] = (('Y','X'), xc)
    ds['YC'] = (('Y','X'), yc)
    ds['XG'] = (('Yp1','Xp1'), xg)
    ds['YG'] = (('Yp1','Xp1'), yg)
    # horizontal spacing
    ds['dxC'] = (('Y','Xp1'), np.full((Ny,Nx), dx))
    ds['dyC'] = (('Yp1','X'), np.full((Ny,Nx), dy))
    ds['dxG'] = (('Yp1','X'), np.full((Ny,Nx), dx))
    ds['dyG'] = (('Y','Xp1'), np.full((Ny,Nx), dx))

    return ds
Esempio n. 52
0
    def test_unselect(self):
        data = create_test_data()

        self.assertEqual(data, data.unselect())

        expected = Dataset({k: data[k] for k in data if k != 'time'})
        actual = data.unselect('time')
        self.assertEqual(expected, actual)

        expected = Dataset({k: data[k] for k in ['dim2', 'dim3', 'time']})
        actual = data.unselect('dim1')
        self.assertEqual(expected, actual)

        with self.assertRaisesRegexp(ValueError, 'does not exist in this'):
            data.unselect('not_found_here')
Esempio n. 53
0
    def test_concat_errors(self):
        data = create_test_data()
        split_data = [data.isel(dim1=slice(3)), data.isel(dim1=slice(3, None))]

        with self.assertRaisesRegexp(ValueError, 'must supply at least one'):
            concat([], 'dim1')

        with self.assertRaisesRegexp(ValueError, 'are not coordinates'):
            concat([data, data], 'new_dim', coords=['not_found'])

        with self.assertRaisesRegexp(ValueError, 'global attributes not'):
            data0, data1 = deepcopy(split_data)
            data1.attrs['foo'] = 'bar'
            concat([data0, data1], 'dim1', compat='identical')
        self.assertDatasetIdentical(
            data, concat([data0, data1], 'dim1', compat='equals'))

        with self.assertRaisesRegexp(ValueError, 'encountered unexpected'):
            data0, data1 = deepcopy(split_data)
            data1['foo'] = ('bar', np.random.randn(10))
            concat([data0, data1], 'dim1')

        with self.assertRaisesRegexp(ValueError, 'not equal across datasets'):
            data0, data1 = deepcopy(split_data)
            data1['dim2'] = 2 * data1['dim2']
            concat([data0, data1], 'dim1', coords='minimal')

        with self.assertRaisesRegexp(ValueError, 'it is not 1-dimensional'):
            concat([data0, data1], 'dim1')

        with self.assertRaisesRegexp(ValueError, 'compat.* invalid'):
            concat(split_data, 'dim1', compat='foobar')

        with self.assertRaisesRegexp(ValueError, 'unexpected value for'):
            concat([data, data], 'new_dim', coords='foobar')

        with self.assertRaisesRegexp(
                ValueError, 'coordinate in some datasets but not others'):
            concat([Dataset({'x': 0}), Dataset({'x': [1]})], dim='z')

        with self.assertRaisesRegexp(
                ValueError, 'coordinate in some datasets but not others'):
            concat([Dataset({'x': 0}), Dataset({}, {'x': 1})], dim='z')

        with self.assertRaisesRegexp(ValueError, 'no longer a valid'):
            concat([data, data], 'new_dim', mode='different')
        with self.assertRaisesRegexp(ValueError, 'no longer a valid'):
            concat([data, data], 'new_dim', concat_over='different')
Esempio n. 54
0
 def setUp(self):
     self.attrs = {'attr1': 'value1', 'attr2': 2929}
     self.x = np.random.random((10, 20))
     self.v = Variable(['x', 'y'], self.x)
     self.va = Variable(['x', 'y'], self.x, self.attrs)
     self.ds = Dataset({'foo': self.v})
     self.dv = self.ds['foo']
Esempio n. 55
0
 def roundtrip(self, data, decode_cf=True):
     store = CFEncodedInMemoryStore()
     data.dump_to_store(store)
     if decode_cf:
         yield conventions.decode_cf(store)
     else:
         yield Dataset.load_store(store)
Esempio n. 56
0
def create_test_data(seed=None):
    rs = np.random.RandomState(seed)
    _vars = {'var1': ['dim1', 'dim2'],
             'var2': ['dim1', 'dim2'],
             'var3': ['dim3', 'dim1']}
    _dims = {'dim1': 8, 'dim2': 9, 'dim3': 10}

    obj = Dataset()
    obj['time'] = ('time', pd.date_range('2000-01-01', periods=20))
    obj['dim1'] = ('dim1', np.arange(_dims['dim1']))
    obj['dim2'] = ('dim2', 0.5 * np.arange(_dims['dim2']))
    obj['dim3'] = ('dim3', list('abcdefghij'))
    for v, dims in sorted(_vars.items()):
        data = rs.normal(size=tuple(_dims[d] for d in dims))
        obj[v] = (dims, data, {'foo': 'variable'})
    obj.coords['numbers'] = ('dim3', [0, 1, 2, 0, 0, 1, 1, 2, 2, 3])
    return obj
Esempio n. 57
0
    def test_groupby(self):
        data = Dataset({'x': ('x', list('abc')),
                        'c': ('x', [0, 1, 0]),
                        'z': (['x', 'y'], np.random.randn(3, 5))})
        groupby = data.groupby('x')
        self.assertEqual(len(groupby), 3)
        expected_groups = {'a': 0, 'b': 1, 'c': 2}
        self.assertEqual(groupby.groups, expected_groups)
        expected_items = [('a', data.isel(x=0)),
                          ('b', data.isel(x=1)),
                          ('c', data.isel(x=2))]
        self.assertEqual(list(groupby), expected_items)

        identity = lambda x: x
        for k in ['x', 'c', 'y']:
            actual = data.groupby(k, squeeze=False).apply(identity)
            self.assertEqual(data, actual)
Esempio n. 58
0
    def test_open_mfdataset(self):
        original = Dataset({'foo': ('x', np.random.randn(10))})
        with create_tmp_file() as tmp1:
            with create_tmp_file() as tmp2:
                original.isel(x=slice(5)).to_netcdf(tmp1)
                original.isel(x=slice(5, 10)).to_netcdf(tmp2)
                with open_mfdataset([tmp1, tmp2]) as actual:
                    self.assertIsInstance(actual.foo.variable.data, da.Array)
                    self.assertEqual(actual.foo.variable.data.chunks,
                                     ((5, 5),))
                    self.assertDatasetAllClose(original, actual)
                with open_mfdataset([tmp1, tmp2], chunks={'x': 3}) as actual:
                    self.assertEqual(actual.foo.variable.data.chunks,
                                     ((3, 2, 3, 2),))

        with self.assertRaisesRegexp(IOError, 'no files to open'):
            open_mfdataset('foo-bar-baz-*.nc')
Esempio n. 59
0
 def test_write_store(self):
     expected = create_test_data()
     with self.create_store() as store:
         expected.dump_to_store(store)
         # the test data contains times.  In case the store
         # cf_encodes them we need to cf_decode them.
         actual = Dataset.load_store(store, cf_decoder)
     self.assertDatasetAllClose(expected, actual)
Esempio n. 60
0
 def test_zero_dimensional_variable(self):
     expected = create_test_data()
     expected['xray_awesomeness'] = ([], np.array(1.e9),
                                     {'units': 'units of awesome'})
     with self.create_store() as store:
         expected.dump_to_store(store)
         actual = Dataset.load_store(store)
     self.assertDatasetAllClose(expected, actual)