Example #1
0
    def test_rename(self):
        data = create_test_data()
        newnames = {'var1': 'renamed_var1', 'dim2': 'renamed_dim2'}
        renamed = data.rename(newnames)

        variables = OrderedDict(data.variables)
        for k, v in iteritems(newnames):
            variables[v] = variables.pop(k)

        for k, v in iteritems(variables):
            dims = list(v.dims)
            for name, newname in iteritems(newnames):
                if name in dims:
                    dims[dims.index(name)] = newname

            self.assertVariableEqual(Variable(dims, v.values, v.attrs),
                                     renamed.variables[k])
            self.assertEqual(v.encoding, renamed.variables[k].encoding)
            self.assertEqual(type(v), type(renamed.variables[k]))

        self.assertTrue('var1' not in renamed.variables)
        self.assertTrue('dim2' not in renamed.variables)

        with self.assertRaisesRegexp(ValueError, "cannot rename 'not_a_var'"):
            data.rename({'not_a_var': 'nada'})

        # verify that we can rename a variable without accessing the data
        var1 = data['var1']
        data['var1'] = (var1.dims, InaccessibleArray(var1.values))
        renamed = data.rename(newnames)
        with self.assertRaises(UnexpectedDataAccess):
            renamed['renamed_var1'].values
Example #2
0
    def test_open_encodings(self):
        # Create a netCDF file with explicit time units
        # and make sure it makes it into the encodings
        # and survives a round trip
        with create_tmp_file() as tmp_file:
            with nc4.Dataset(tmp_file, 'w') as ds:
                ds.createDimension('time', size=10)
                ds.createVariable('time', np.int32, dimensions=('time', ))
                units = 'days since 1999-01-01'
                ds.variables['time'].setncattr('units', units)
                ds.variables['time'][:] = np.arange(10) + 4

            expected = Dataset()

            time = pd.date_range('1999-01-05', periods=10)
            encoding = {'units': units, 'dtype': np.dtype('int32')}
            expected['time'] = ('time', time, {}, encoding)

            with open_dataset(tmp_file) as actual:
                self.assertVariableEqual(actual['time'], expected['time'])
                actual_encoding = dict(
                    (k, v) for k, v in iteritems(actual['time'].encoding)
                    if k in expected['time'].encoding)
                self.assertDictEqual(actual_encoding,
                                     expected['time'].encoding)
Example #3
0
    def test_open_encodings(self):
        # Create a netCDF file with explicit time units
        # and make sure it makes it into the encodings
        # and survives a round trip
        with create_tmp_file() as tmp_file:
            with nc4.Dataset(tmp_file, 'w') as ds:
                ds.createDimension('time', size=10)
                ds.createVariable('time', np.int32, dimensions=('time',))
                units = 'days since 1999-01-01'
                ds.variables['time'].setncattr('units', units)
                ds.variables['time'][:] = np.arange(10) + 4

            expected = Dataset()

            time = pd.date_range('1999-01-05', periods=10)
            encoding = {'units': units, 'dtype': np.dtype('int32')}
            expected['time'] = ('time', time, {}, encoding)

            actual = open_dataset(tmp_file)

            self.assertVariableEqual(actual['time'], expected['time'])
            actual_encoding = dict((k, v) for k, v
                                   in iteritems(actual['time'].encoding)
                                   if k in expected['time'].encoding)
            self.assertDictEqual(actual_encoding, expected['time'].encoding)
Example #4
0
 def rectify_dim_order(dataset):
     # return a new dataset with all variable dimensions tranposed into
     # the order in which they are found in `data`
     return Dataset(dict((k, v.transpose(*data[k].dims))
                         for k, v in iteritems(dataset.data_vars)),
                    dataset.coords,
                    attrs=dataset.attrs)
Example #5
0
def null_wrap(ds):
    """
    Given a data store this wraps each variable in a NullWrapper so that
    it appears to be out of memory.
    """
    variables = dict((k, Variable(v.dims, NullWrapper(v.values), v.attrs))
                     for k, v in iteritems(ds))
    return InMemoryDataStore(variables=variables, attributes=ds.attrs)
Example #6
0
def null_wrap(ds):
    """
    Given a data store this wraps each variable in a NullWrapper so that
    it appears to be out of memory.
    """
    variables = dict((k, Variable(v.dims, NullWrapper(v.values), v.attrs))
                     for k, v in iteritems(ds))
    return InMemoryDataStore(variables=variables, attributes=ds.attrs)
Example #7
0
    def test_concat(self):
        # TODO: simplify and split this test case

        # drop the third dimension to keep things relatively understandable
        data = create_test_data().drop('dim3')

        split_data = [data.isel(dim1=slice(3)), data.isel(dim1=slice(3, None))]
        self.assertDatasetIdentical(data, concat(split_data, 'dim1'))

        def rectify_dim_order(dataset):
            # return a new dataset with all variable dimensions tranposed into
            # the order in which they are found in `data`
            return Dataset(dict((k, v.transpose(*data[k].dims))
                                for k, v in iteritems(dataset.data_vars)),
                           dataset.coords,
                           attrs=dataset.attrs)

        for dim in ['dim1', 'dim2']:
            datasets = [g for _, g in data.groupby(dim, squeeze=False)]
            self.assertDatasetIdentical(data, concat(datasets, dim))
            self.assertDatasetIdentical(data, concat(datasets, data[dim]))
            self.assertDatasetIdentical(
                data, concat(datasets, data[dim], coords='minimal'))

            datasets = [g for _, g in data.groupby(dim, squeeze=True)]
            concat_over = [
                k for k, v in iteritems(data.coords)
                if dim in v.dims and k != dim
            ]
            actual = concat(datasets, data[dim], coords=concat_over)
            self.assertDatasetIdentical(data, rectify_dim_order(actual))

            actual = concat(datasets, data[dim], coords='different')
            self.assertDatasetIdentical(data, rectify_dim_order(actual))

        # make sure the coords argument behaves as expected
        data.coords['extra'] = ('dim4', np.arange(3))
        for dim in ['dim1', 'dim2']:
            datasets = [g for _, g in data.groupby(dim, squeeze=True)]
            actual = concat(datasets, data[dim], coords='all')
            expected = np.array(
                [data['extra'].values for _ in range(data.dims[dim])])
            self.assertArrayEqual(actual['extra'].values, expected)

            actual = concat(datasets, data[dim], coords='different')
            self.assertDataArrayEqual(data['extra'], actual['extra'])
            actual = concat(datasets, data[dim], coords='minimal')
            self.assertDataArrayEqual(data['extra'], actual['extra'])

        # verify that the dim argument takes precedence over
        # concatenating dataset variables of the same name
        dim = (2 * data['dim1']).rename('dim1')
        datasets = [g for _, g in data.groupby('dim1', squeeze=False)]
        expected = data.copy()
        expected['dim1'] = dim
        self.assertDatasetIdentical(expected, concat(datasets, dim))
Example #8
0
    def test_concat(self):
        # TODO: simplify and split this test case

        # drop the third dimension to keep things relatively understandable
        data = create_test_data().drop('dim3')

        split_data = [data.isel(dim1=slice(3)),
                      data.isel(dim1=slice(3, None))]
        self.assertDatasetIdentical(data, concat(split_data, 'dim1'))

        def rectify_dim_order(dataset):
            # return a new dataset with all variable dimensions tranposed into
            # the order in which they are found in `data`
            return Dataset(dict((k, v.transpose(*data[k].dims))
                                for k, v in iteritems(dataset.data_vars)),
                           dataset.coords, attrs=dataset.attrs)

        for dim in ['dim1', 'dim2']:
            datasets = [g for _, g in data.groupby(dim, squeeze=False)]
            self.assertDatasetIdentical(data, concat(datasets, dim))
            self.assertDatasetIdentical(
                data, concat(datasets, data[dim]))
            self.assertDatasetIdentical(
                data, concat(datasets, data[dim], coords='minimal'))

            datasets = [g for _, g in data.groupby(dim, squeeze=True)]
            concat_over = [k for k, v in iteritems(data.coords)
                           if dim in v.dims and k != dim]
            actual = concat(datasets, data[dim], coords=concat_over)
            self.assertDatasetIdentical(data, rectify_dim_order(actual))

            actual = concat(datasets, data[dim], coords='different')
            self.assertDatasetIdentical(data, rectify_dim_order(actual))

        # make sure the coords argument behaves as expected
        data.coords['extra'] = ('dim4', np.arange(3))
        for dim in ['dim1', 'dim2']:
            datasets = [g for _, g in data.groupby(dim, squeeze=True)]
            actual = concat(datasets, data[dim], coords='all')
            expected = np.array([data['extra'].values
                                 for _ in range(data.dims[dim])])
            self.assertArrayEqual(actual['extra'].values, expected)

            actual = concat(datasets, data[dim], coords='different')
            self.assertDataArrayEqual(data['extra'], actual['extra'])
            actual = concat(datasets, data[dim], coords='minimal')
            self.assertDataArrayEqual(data['extra'], actual['extra'])

        # verify that the dim argument takes precedence over
        # concatenating dataset variables of the same name
        dim = (2 * data['dim1']).rename('dim1')
        datasets = [g for _, g in data.groupby('dim1', squeeze=False)]
        expected = data.copy()
        expected['dim1'] = dim
        self.assertDatasetIdentical(expected, concat(datasets, dim))
Example #9
0
 def test_squeeze(self):
     data = Dataset({'foo': (['x', 'y', 'z'], [[[1], [2]]])})
     for args in [[], [['x']], [['x', 'z']]]:
         def get_args(v):
             return [set(args[0]) & set(v.dims)] if args else []
         expected = Dataset(dict((k, v.squeeze(*get_args(v)))
                                 for k, v in iteritems(data.variables)))
         expected.set_coords(data.coords, inplace=True)
         self.assertDatasetIdentical(expected, data.squeeze(*args))
     # invalid squeeze
     with self.assertRaisesRegexp(ValueError, 'cannot select a dimension'):
         data.squeeze('y')
Example #10
0
    def test_compression_encoding(self):
        data = create_test_data()
        data['var2'].encoding.update({'zlib': True,
                                      'chunksizes': (5, 5),
                                      'least_significant_digit': 2})
        with self.roundtrip(data) as actual:
            for k, v in iteritems(data['var2'].encoding):
                self.assertEqual(v, actual['var2'].encoding[k])

        # regression test for #156
        expected = data.isel(dim1=0).reset_coords()
        with self.roundtrip(expected) as actual:
            self.assertDatasetEqual(expected, actual)
Example #11
0
    def test_compression_encoding(self):
        data = create_test_data()
        data['var2'].encoding.update({'zlib': True,
                                      'chunksizes': (5, 5),
                                      'fletcher32': True,
                                      'original_shape': data.var2.shape})
        with self.roundtrip(data) as actual:
            for k, v in iteritems(data['var2'].encoding):
                self.assertEqual(v, actual['var2'].encoding[k])

        # regression test for #156
        expected = data.isel(dim1=0)
        with self.roundtrip(expected) as actual:
            self.assertDatasetEqual(expected, actual)
Example #12
0
    def test_compression_encoding(self):
        data = create_test_data()
        data['var2'].encoding.update({
            'zlib': True,
            'chunksizes': (5, 5),
            'fletcher32': True
        })
        with self.roundtrip(data) as actual:
            for k, v in iteritems(data['var2'].encoding):
                self.assertEqual(v, actual['var2'].encoding[k])

        # regression test for #156
        expected = data.isel(dim1=0)
        with self.roundtrip(expected) as actual:
            self.assertDatasetEqual(expected, actual)
Example #13
0
    def test_isel(self):
        data = create_test_data()
        slicers = {'dim1': slice(None, None, 2), 'dim2': slice(0, 2)}
        ret = data.isel(**slicers)

        # Verify that only the specified dimension was altered
        self.assertItemsEqual(data.dims, ret.dims)
        for d in data.dims:
            if d in slicers:
                self.assertEqual(ret.dims[d],
                                 np.arange(data.dims[d])[slicers[d]].size)
            else:
                self.assertEqual(data.dims[d], ret.dims[d])
        # Verify that the data is what we expect
        for v in data.variables:
            self.assertEqual(data[v].dims, ret[v].dims)
            self.assertEqual(data[v].attrs, ret[v].attrs)
            slice_list = [slice(None)] * data[v].values.ndim
            for d, s in iteritems(slicers):
                if d in data[v].dims:
                    inds = np.nonzero(np.array(data[v].dims) == d)[0]
                    for ind in inds:
                        slice_list[ind] = s
            expected = data[v].values[slice_list]
            actual = ret[v].values
            np.testing.assert_array_equal(expected, actual)

        with self.assertRaises(ValueError):
            data.isel(not_a_dim=slice(0, 2))

        ret = data.isel(dim1=0)
        self.assertEqual({'time': 20, 'dim2': 9, 'dim3': 10}, ret.dims)
        self.assertItemsEqual(data, ret)
        self.assertItemsEqual(data.coords, ret.coords)
        self.assertItemsEqual(data.indexes, list(ret.indexes) + ['dim1'])

        ret = data.isel(time=slice(2), dim1=0, dim2=slice(5))
        self.assertEqual({'time': 2, 'dim2': 5, 'dim3': 10}, ret.dims)
        self.assertItemsEqual(data, ret)
        self.assertItemsEqual(data.coords, ret.coords)
        self.assertItemsEqual(data.indexes, list(ret.indexes) + ['dim1'])

        ret = data.isel(time=0, dim1=0, dim2=slice(5))
        self.assertItemsEqual({'dim2': 5, 'dim3': 10}, ret.dims)
        self.assertItemsEqual(data, ret)
        self.assertItemsEqual(data.coords, ret.coords)
        self.assertItemsEqual(data.indexes,
                              list(ret.indexes) + ['dim1', 'time'])
Example #14
0
 def test_properties(self):
     self.assertVariableEqual(self.dv.variable, self.v)
     self.assertArrayEqual(self.dv.values, self.v.values)
     for attr in ['dims', 'dtype', 'shape', 'size', 'ndim', 'attrs']:
         self.assertEqual(getattr(self.dv, attr), getattr(self.v, attr))
     self.assertEqual(len(self.dv), len(self.v))
     self.assertVariableEqual(self.dv, self.v)
     self.assertItemsEqual(list(self.dv.coords), list(self.ds.coords))
     for k, v in iteritems(self.dv.coords):
         self.assertArrayEqual(v, self.ds.coords[k])
     with self.assertRaises(AttributeError):
         self.dv.dataset = self.ds
     self.assertIsInstance(self.ds['x'].to_index(), pd.Index)
     with self.assertRaisesRegexp(ValueError, 'must be 1-dimensional'):
         self.ds['foo'].to_index()
     with self.assertRaises(AttributeError):
         self.dv.variable = self.v
Example #15
0
 def test_properties(self):
     self.assertVariableEqual(self.dv.variable, self.v)
     self.assertArrayEqual(self.dv.values, self.v.values)
     for attr in ['dims', 'dtype', 'shape', 'size', 'ndim', 'attrs']:
         self.assertEqual(getattr(self.dv, attr), getattr(self.v, attr))
     self.assertEqual(len(self.dv), len(self.v))
     self.assertVariableEqual(self.dv, self.v)
     self.assertItemsEqual(list(self.dv.coords), list(self.ds.coords))
     for k, v in iteritems(self.dv.coords):
         self.assertArrayEqual(v, self.ds.coords[k])
     with self.assertRaises(AttributeError):
         self.dv.dataset
     self.assertIsInstance(self.ds['x'].to_index(), pd.Index)
     with self.assertRaisesRegexp(ValueError, 'must be 1-dimensional'):
         self.ds['foo'].to_index()
     with self.assertRaises(AttributeError):
         self.dv.variable = self.v
Example #16
0
    def test_reduce(self):
        data = create_test_data()

        self.assertEqual(len(data.mean().coords), 0)

        actual = data.max()
        expected = Dataset(dict((k, v.max())
                                for k, v in iteritems(data)))
        self.assertDatasetEqual(expected, actual)

        self.assertDatasetEqual(data.min(dim=['dim1']),
                                data.min(dim='dim1'))

        for reduct, expected in [('dim2', ['dim1', 'dim3', 'time']),
                                 (['dim2', 'time'], ['dim1', 'dim3']),
                                 (('dim2', 'time'), ['dim1', 'dim3']),
                                 ((), ['dim1', 'dim2', 'dim3', 'time'])]:
            actual = data.min(dim=reduct).dims
            print(reduct, actual, expected)
            self.assertItemsEqual(actual, expected)

        self.assertDatasetEqual(data.mean(dim=[]), data)
Example #17
0
    def test_concat(self):
        data = create_test_data()

        split_data = [data.isel(dim1=slice(10)),
                      data.isel(dim1=slice(10, None))]
        self.assertDatasetIdentical(data, concat(split_data, 'dim1'))

        def rectify_dim_order(dataset):
            # return a new dataset with all variable dimensions tranposed into
            # the order in which they are found in `data`
            return Dataset(dict((k, v.transpose(*data[k].dims))
                                for k, v in iteritems(dataset)),
                           dataset.coords, attrs=dataset.attrs)

        for dim in ['dim1', 'dim2', 'dim3']:
            datasets = [g for _, g in data.groupby(dim, squeeze=False)]
            self.assertDatasetIdentical(data, concat(datasets, dim))
            self.assertDatasetIdentical(
                data, concat(datasets, data[dim]))
            self.assertDatasetIdentical(
                data, concat(datasets, data[dim], mode='minimal'))

            datasets = [g for _, g in data.groupby(dim, squeeze=True)]
            concat_over = [k for k, v in iteritems(data.variables)
                           if dim in v.dims and k != dim]
            actual = concat(datasets, data[dim], concat_over=concat_over)
            self.assertDatasetIdentical(data, rectify_dim_order(actual))

            actual = concat(datasets, data[dim], mode='different')
            self.assertDatasetIdentical(data, rectify_dim_order(actual))

        # Now add a new variable that doesn't depend on any of the current
        # dims and make sure the mode argument behaves as expected
        data['var4'] = ('dim4', np.arange(data.dims['dim3']))
        for dim in ['dim1', 'dim2', 'dim3']:
            datasets = [g for _, g in data.groupby(dim, squeeze=False)]
            actual = concat(datasets, data[dim], mode='all')
            expected = np.array([data['var4'].values
                                 for _ in range(data.dims[dim])])
            self.assertArrayEqual(actual['var4'].values, expected)

            actual = concat(datasets, data[dim], mode='different')
            self.assertDataArrayEqual(data['var4'], actual['var4'])
            actual = concat(datasets, data[dim], mode='minimal')
            self.assertDataArrayEqual(data['var4'], actual['var4'])

        # verify that the dim argument takes precedence over
        # concatenating dataset variables of the same name
        dim = (2 * data['dim1']).rename('dim1')
        datasets = [g for _, g in data.groupby('dim1', squeeze=False)]
        expected = data.copy()
        expected['dim1'] = dim
        self.assertDatasetIdentical(expected, concat(datasets, dim))

        # TODO: factor this into several distinct tests
        data = create_test_data()
        split_data = [data.isel(dim1=slice(10)),
                      data.isel(dim1=slice(10, None))]

        with self.assertRaisesRegexp(ValueError, 'must supply at least one'):
            concat([], 'dim1')

        with self.assertRaisesRegexp(ValueError, 'not all elements in'):
            concat(split_data, 'dim1', concat_over=['not_found'])

        with self.assertRaisesRegexp(ValueError, 'global attributes not'):
            data0, data1 = deepcopy(split_data)
            data1.attrs['foo'] = 'bar'
            concat([data0, data1], 'dim1', compat='identical')
        self.assertDatasetIdentical(
            data, concat([data0, data1], 'dim1', compat='equals'))

        with self.assertRaisesRegexp(ValueError, 'encountered unexpected'):
            data0, data1 = deepcopy(split_data)
            data1['foo'] = ('bar', np.random.randn(10))
            concat([data0, data1], 'dim1')

        with self.assertRaisesRegexp(ValueError, 'not equal across datasets'):
            data0, data1 = deepcopy(split_data)
            data1['dim2'] = 2 * data1['dim2']
            concat([data0, data1], 'dim1')
Example #18
0
 def rectify_dim_order(dataset):
     # return a new dataset with all variable dimensions tranposed into
     # the order in which they are found in `data`
     return Dataset(dict((k, v.transpose(*data[k].dims))
                         for k, v in iteritems(dataset.data_vars)),
                    dataset.coords, attrs=dataset.attrs)