def test_rename(self): data = create_test_data() newnames = {'var1': 'renamed_var1', 'dim2': 'renamed_dim2'} renamed = data.rename(newnames) variables = OrderedDict(data.variables) for k, v in iteritems(newnames): variables[v] = variables.pop(k) for k, v in iteritems(variables): dims = list(v.dims) for name, newname in iteritems(newnames): if name in dims: dims[dims.index(name)] = newname self.assertVariableEqual(Variable(dims, v.values, v.attrs), renamed.variables[k]) self.assertEqual(v.encoding, renamed.variables[k].encoding) self.assertEqual(type(v), type(renamed.variables[k])) self.assertTrue('var1' not in renamed.variables) self.assertTrue('dim2' not in renamed.variables) with self.assertRaisesRegexp(ValueError, "cannot rename 'not_a_var'"): data.rename({'not_a_var': 'nada'}) # verify that we can rename a variable without accessing the data var1 = data['var1'] data['var1'] = (var1.dims, InaccessibleArray(var1.values)) renamed = data.rename(newnames) with self.assertRaises(UnexpectedDataAccess): renamed['renamed_var1'].values
def test_open_encodings(self): # Create a netCDF file with explicit time units # and make sure it makes it into the encodings # and survives a round trip with create_tmp_file() as tmp_file: with nc4.Dataset(tmp_file, 'w') as ds: ds.createDimension('time', size=10) ds.createVariable('time', np.int32, dimensions=('time', )) units = 'days since 1999-01-01' ds.variables['time'].setncattr('units', units) ds.variables['time'][:] = np.arange(10) + 4 expected = Dataset() time = pd.date_range('1999-01-05', periods=10) encoding = {'units': units, 'dtype': np.dtype('int32')} expected['time'] = ('time', time, {}, encoding) with open_dataset(tmp_file) as actual: self.assertVariableEqual(actual['time'], expected['time']) actual_encoding = dict( (k, v) for k, v in iteritems(actual['time'].encoding) if k in expected['time'].encoding) self.assertDictEqual(actual_encoding, expected['time'].encoding)
def test_open_encodings(self): # Create a netCDF file with explicit time units # and make sure it makes it into the encodings # and survives a round trip with create_tmp_file() as tmp_file: with nc4.Dataset(tmp_file, 'w') as ds: ds.createDimension('time', size=10) ds.createVariable('time', np.int32, dimensions=('time',)) units = 'days since 1999-01-01' ds.variables['time'].setncattr('units', units) ds.variables['time'][:] = np.arange(10) + 4 expected = Dataset() time = pd.date_range('1999-01-05', periods=10) encoding = {'units': units, 'dtype': np.dtype('int32')} expected['time'] = ('time', time, {}, encoding) actual = open_dataset(tmp_file) self.assertVariableEqual(actual['time'], expected['time']) actual_encoding = dict((k, v) for k, v in iteritems(actual['time'].encoding) if k in expected['time'].encoding) self.assertDictEqual(actual_encoding, expected['time'].encoding)
def rectify_dim_order(dataset): # return a new dataset with all variable dimensions tranposed into # the order in which they are found in `data` return Dataset(dict((k, v.transpose(*data[k].dims)) for k, v in iteritems(dataset.data_vars)), dataset.coords, attrs=dataset.attrs)
def null_wrap(ds): """ Given a data store this wraps each variable in a NullWrapper so that it appears to be out of memory. """ variables = dict((k, Variable(v.dims, NullWrapper(v.values), v.attrs)) for k, v in iteritems(ds)) return InMemoryDataStore(variables=variables, attributes=ds.attrs)
def test_concat(self): # TODO: simplify and split this test case # drop the third dimension to keep things relatively understandable data = create_test_data().drop('dim3') split_data = [data.isel(dim1=slice(3)), data.isel(dim1=slice(3, None))] self.assertDatasetIdentical(data, concat(split_data, 'dim1')) def rectify_dim_order(dataset): # return a new dataset with all variable dimensions tranposed into # the order in which they are found in `data` return Dataset(dict((k, v.transpose(*data[k].dims)) for k, v in iteritems(dataset.data_vars)), dataset.coords, attrs=dataset.attrs) for dim in ['dim1', 'dim2']: datasets = [g for _, g in data.groupby(dim, squeeze=False)] self.assertDatasetIdentical(data, concat(datasets, dim)) self.assertDatasetIdentical(data, concat(datasets, data[dim])) self.assertDatasetIdentical( data, concat(datasets, data[dim], coords='minimal')) datasets = [g for _, g in data.groupby(dim, squeeze=True)] concat_over = [ k for k, v in iteritems(data.coords) if dim in v.dims and k != dim ] actual = concat(datasets, data[dim], coords=concat_over) self.assertDatasetIdentical(data, rectify_dim_order(actual)) actual = concat(datasets, data[dim], coords='different') self.assertDatasetIdentical(data, rectify_dim_order(actual)) # make sure the coords argument behaves as expected data.coords['extra'] = ('dim4', np.arange(3)) for dim in ['dim1', 'dim2']: datasets = [g for _, g in data.groupby(dim, squeeze=True)] actual = concat(datasets, data[dim], coords='all') expected = np.array( [data['extra'].values for _ in range(data.dims[dim])]) self.assertArrayEqual(actual['extra'].values, expected) actual = concat(datasets, data[dim], coords='different') self.assertDataArrayEqual(data['extra'], actual['extra']) actual = concat(datasets, data[dim], coords='minimal') self.assertDataArrayEqual(data['extra'], actual['extra']) # verify that the dim argument takes precedence over # concatenating dataset variables of the same name dim = (2 * data['dim1']).rename('dim1') datasets = [g for _, g in data.groupby('dim1', squeeze=False)] expected = data.copy() expected['dim1'] = dim self.assertDatasetIdentical(expected, concat(datasets, dim))
def test_concat(self): # TODO: simplify and split this test case # drop the third dimension to keep things relatively understandable data = create_test_data().drop('dim3') split_data = [data.isel(dim1=slice(3)), data.isel(dim1=slice(3, None))] self.assertDatasetIdentical(data, concat(split_data, 'dim1')) def rectify_dim_order(dataset): # return a new dataset with all variable dimensions tranposed into # the order in which they are found in `data` return Dataset(dict((k, v.transpose(*data[k].dims)) for k, v in iteritems(dataset.data_vars)), dataset.coords, attrs=dataset.attrs) for dim in ['dim1', 'dim2']: datasets = [g for _, g in data.groupby(dim, squeeze=False)] self.assertDatasetIdentical(data, concat(datasets, dim)) self.assertDatasetIdentical( data, concat(datasets, data[dim])) self.assertDatasetIdentical( data, concat(datasets, data[dim], coords='minimal')) datasets = [g for _, g in data.groupby(dim, squeeze=True)] concat_over = [k for k, v in iteritems(data.coords) if dim in v.dims and k != dim] actual = concat(datasets, data[dim], coords=concat_over) self.assertDatasetIdentical(data, rectify_dim_order(actual)) actual = concat(datasets, data[dim], coords='different') self.assertDatasetIdentical(data, rectify_dim_order(actual)) # make sure the coords argument behaves as expected data.coords['extra'] = ('dim4', np.arange(3)) for dim in ['dim1', 'dim2']: datasets = [g for _, g in data.groupby(dim, squeeze=True)] actual = concat(datasets, data[dim], coords='all') expected = np.array([data['extra'].values for _ in range(data.dims[dim])]) self.assertArrayEqual(actual['extra'].values, expected) actual = concat(datasets, data[dim], coords='different') self.assertDataArrayEqual(data['extra'], actual['extra']) actual = concat(datasets, data[dim], coords='minimal') self.assertDataArrayEqual(data['extra'], actual['extra']) # verify that the dim argument takes precedence over # concatenating dataset variables of the same name dim = (2 * data['dim1']).rename('dim1') datasets = [g for _, g in data.groupby('dim1', squeeze=False)] expected = data.copy() expected['dim1'] = dim self.assertDatasetIdentical(expected, concat(datasets, dim))
def test_squeeze(self): data = Dataset({'foo': (['x', 'y', 'z'], [[[1], [2]]])}) for args in [[], [['x']], [['x', 'z']]]: def get_args(v): return [set(args[0]) & set(v.dims)] if args else [] expected = Dataset(dict((k, v.squeeze(*get_args(v))) for k, v in iteritems(data.variables))) expected.set_coords(data.coords, inplace=True) self.assertDatasetIdentical(expected, data.squeeze(*args)) # invalid squeeze with self.assertRaisesRegexp(ValueError, 'cannot select a dimension'): data.squeeze('y')
def test_compression_encoding(self): data = create_test_data() data['var2'].encoding.update({'zlib': True, 'chunksizes': (5, 5), 'least_significant_digit': 2}) with self.roundtrip(data) as actual: for k, v in iteritems(data['var2'].encoding): self.assertEqual(v, actual['var2'].encoding[k]) # regression test for #156 expected = data.isel(dim1=0).reset_coords() with self.roundtrip(expected) as actual: self.assertDatasetEqual(expected, actual)
def test_compression_encoding(self): data = create_test_data() data['var2'].encoding.update({'zlib': True, 'chunksizes': (5, 5), 'fletcher32': True, 'original_shape': data.var2.shape}) with self.roundtrip(data) as actual: for k, v in iteritems(data['var2'].encoding): self.assertEqual(v, actual['var2'].encoding[k]) # regression test for #156 expected = data.isel(dim1=0) with self.roundtrip(expected) as actual: self.assertDatasetEqual(expected, actual)
def test_compression_encoding(self): data = create_test_data() data['var2'].encoding.update({ 'zlib': True, 'chunksizes': (5, 5), 'fletcher32': True }) with self.roundtrip(data) as actual: for k, v in iteritems(data['var2'].encoding): self.assertEqual(v, actual['var2'].encoding[k]) # regression test for #156 expected = data.isel(dim1=0) with self.roundtrip(expected) as actual: self.assertDatasetEqual(expected, actual)
def test_isel(self): data = create_test_data() slicers = {'dim1': slice(None, None, 2), 'dim2': slice(0, 2)} ret = data.isel(**slicers) # Verify that only the specified dimension was altered self.assertItemsEqual(data.dims, ret.dims) for d in data.dims: if d in slicers: self.assertEqual(ret.dims[d], np.arange(data.dims[d])[slicers[d]].size) else: self.assertEqual(data.dims[d], ret.dims[d]) # Verify that the data is what we expect for v in data.variables: self.assertEqual(data[v].dims, ret[v].dims) self.assertEqual(data[v].attrs, ret[v].attrs) slice_list = [slice(None)] * data[v].values.ndim for d, s in iteritems(slicers): if d in data[v].dims: inds = np.nonzero(np.array(data[v].dims) == d)[0] for ind in inds: slice_list[ind] = s expected = data[v].values[slice_list] actual = ret[v].values np.testing.assert_array_equal(expected, actual) with self.assertRaises(ValueError): data.isel(not_a_dim=slice(0, 2)) ret = data.isel(dim1=0) self.assertEqual({'time': 20, 'dim2': 9, 'dim3': 10}, ret.dims) self.assertItemsEqual(data, ret) self.assertItemsEqual(data.coords, ret.coords) self.assertItemsEqual(data.indexes, list(ret.indexes) + ['dim1']) ret = data.isel(time=slice(2), dim1=0, dim2=slice(5)) self.assertEqual({'time': 2, 'dim2': 5, 'dim3': 10}, ret.dims) self.assertItemsEqual(data, ret) self.assertItemsEqual(data.coords, ret.coords) self.assertItemsEqual(data.indexes, list(ret.indexes) + ['dim1']) ret = data.isel(time=0, dim1=0, dim2=slice(5)) self.assertItemsEqual({'dim2': 5, 'dim3': 10}, ret.dims) self.assertItemsEqual(data, ret) self.assertItemsEqual(data.coords, ret.coords) self.assertItemsEqual(data.indexes, list(ret.indexes) + ['dim1', 'time'])
def test_properties(self): self.assertVariableEqual(self.dv.variable, self.v) self.assertArrayEqual(self.dv.values, self.v.values) for attr in ['dims', 'dtype', 'shape', 'size', 'ndim', 'attrs']: self.assertEqual(getattr(self.dv, attr), getattr(self.v, attr)) self.assertEqual(len(self.dv), len(self.v)) self.assertVariableEqual(self.dv, self.v) self.assertItemsEqual(list(self.dv.coords), list(self.ds.coords)) for k, v in iteritems(self.dv.coords): self.assertArrayEqual(v, self.ds.coords[k]) with self.assertRaises(AttributeError): self.dv.dataset = self.ds self.assertIsInstance(self.ds['x'].to_index(), pd.Index) with self.assertRaisesRegexp(ValueError, 'must be 1-dimensional'): self.ds['foo'].to_index() with self.assertRaises(AttributeError): self.dv.variable = self.v
def test_properties(self): self.assertVariableEqual(self.dv.variable, self.v) self.assertArrayEqual(self.dv.values, self.v.values) for attr in ['dims', 'dtype', 'shape', 'size', 'ndim', 'attrs']: self.assertEqual(getattr(self.dv, attr), getattr(self.v, attr)) self.assertEqual(len(self.dv), len(self.v)) self.assertVariableEqual(self.dv, self.v) self.assertItemsEqual(list(self.dv.coords), list(self.ds.coords)) for k, v in iteritems(self.dv.coords): self.assertArrayEqual(v, self.ds.coords[k]) with self.assertRaises(AttributeError): self.dv.dataset self.assertIsInstance(self.ds['x'].to_index(), pd.Index) with self.assertRaisesRegexp(ValueError, 'must be 1-dimensional'): self.ds['foo'].to_index() with self.assertRaises(AttributeError): self.dv.variable = self.v
def test_reduce(self): data = create_test_data() self.assertEqual(len(data.mean().coords), 0) actual = data.max() expected = Dataset(dict((k, v.max()) for k, v in iteritems(data))) self.assertDatasetEqual(expected, actual) self.assertDatasetEqual(data.min(dim=['dim1']), data.min(dim='dim1')) for reduct, expected in [('dim2', ['dim1', 'dim3', 'time']), (['dim2', 'time'], ['dim1', 'dim3']), (('dim2', 'time'), ['dim1', 'dim3']), ((), ['dim1', 'dim2', 'dim3', 'time'])]: actual = data.min(dim=reduct).dims print(reduct, actual, expected) self.assertItemsEqual(actual, expected) self.assertDatasetEqual(data.mean(dim=[]), data)
def test_concat(self): data = create_test_data() split_data = [data.isel(dim1=slice(10)), data.isel(dim1=slice(10, None))] self.assertDatasetIdentical(data, concat(split_data, 'dim1')) def rectify_dim_order(dataset): # return a new dataset with all variable dimensions tranposed into # the order in which they are found in `data` return Dataset(dict((k, v.transpose(*data[k].dims)) for k, v in iteritems(dataset)), dataset.coords, attrs=dataset.attrs) for dim in ['dim1', 'dim2', 'dim3']: datasets = [g for _, g in data.groupby(dim, squeeze=False)] self.assertDatasetIdentical(data, concat(datasets, dim)) self.assertDatasetIdentical( data, concat(datasets, data[dim])) self.assertDatasetIdentical( data, concat(datasets, data[dim], mode='minimal')) datasets = [g for _, g in data.groupby(dim, squeeze=True)] concat_over = [k for k, v in iteritems(data.variables) if dim in v.dims and k != dim] actual = concat(datasets, data[dim], concat_over=concat_over) self.assertDatasetIdentical(data, rectify_dim_order(actual)) actual = concat(datasets, data[dim], mode='different') self.assertDatasetIdentical(data, rectify_dim_order(actual)) # Now add a new variable that doesn't depend on any of the current # dims and make sure the mode argument behaves as expected data['var4'] = ('dim4', np.arange(data.dims['dim3'])) for dim in ['dim1', 'dim2', 'dim3']: datasets = [g for _, g in data.groupby(dim, squeeze=False)] actual = concat(datasets, data[dim], mode='all') expected = np.array([data['var4'].values for _ in range(data.dims[dim])]) self.assertArrayEqual(actual['var4'].values, expected) actual = concat(datasets, data[dim], mode='different') self.assertDataArrayEqual(data['var4'], actual['var4']) actual = concat(datasets, data[dim], mode='minimal') self.assertDataArrayEqual(data['var4'], actual['var4']) # verify that the dim argument takes precedence over # concatenating dataset variables of the same name dim = (2 * data['dim1']).rename('dim1') datasets = [g for _, g in data.groupby('dim1', squeeze=False)] expected = data.copy() expected['dim1'] = dim self.assertDatasetIdentical(expected, concat(datasets, dim)) # TODO: factor this into several distinct tests data = create_test_data() split_data = [data.isel(dim1=slice(10)), data.isel(dim1=slice(10, None))] with self.assertRaisesRegexp(ValueError, 'must supply at least one'): concat([], 'dim1') with self.assertRaisesRegexp(ValueError, 'not all elements in'): concat(split_data, 'dim1', concat_over=['not_found']) with self.assertRaisesRegexp(ValueError, 'global attributes not'): data0, data1 = deepcopy(split_data) data1.attrs['foo'] = 'bar' concat([data0, data1], 'dim1', compat='identical') self.assertDatasetIdentical( data, concat([data0, data1], 'dim1', compat='equals')) with self.assertRaisesRegexp(ValueError, 'encountered unexpected'): data0, data1 = deepcopy(split_data) data1['foo'] = ('bar', np.random.randn(10)) concat([data0, data1], 'dim1') with self.assertRaisesRegexp(ValueError, 'not equal across datasets'): data0, data1 = deepcopy(split_data) data1['dim2'] = 2 * data1['dim2'] concat([data0, data1], 'dim1')