def test_decode_cf_with_dask(self): import dask.array as da original = Dataset({ "t": ("t", [0, 1, 2], { "units": "days since 2000-01-01" }), "foo": ("t", [0, 0, 0], { "coordinates": "y", "units": "bar" }), "bar": ("string2", [b"a", b"b"]), "baz": (("x"), [b"abc"], { "_Encoding": "utf-8" }), "y": ("t", [5, 10, -999], { "_FillValue": -999 }), }).chunk() decoded = conventions.decode_cf(original) print(decoded) assert all( isinstance(var.data, da.Array) for name, var in decoded.variables.items() if name not in decoded.indexes) assert_identical(decoded, conventions.decode_cf(original).compute())
def test_decode_cf_with_drop_variables(self) -> None: original = Dataset( { "t": ("t", [0, 1, 2], {"units": "days since 2000-01-01"}), "x": ("x", [9, 8, 7], {"units": "km"}), "foo": ( ("t", "x"), [[0, 0, 0], [1, 1, 1], [2, 2, 2]], {"units": "bar"}, ), "y": ("t", [5, 10, -999], {"_FillValue": -999}), } ) expected = Dataset( { "t": pd.date_range("2000-01-01", periods=3), "foo": ( ("t", "x"), [[0, 0, 0], [1, 1, 1], [2, 2, 2]], {"units": "bar"}, ), "y": ("t", [5, 10, np.nan]), } ) actual = conventions.decode_cf(original, drop_variables=("x",)) actual2 = conventions.decode_cf(original, drop_variables="x") assert_identical(expected, actual) assert_identical(expected, actual2)
def test_decode_cf_with_dask(self): import dask.array as da original = Dataset({ 't': ('t', [0, 1, 2], { 'units': 'days since 2000-01-01' }), 'foo': ('t', [0, 0, 0], { 'coordinates': 'y', 'units': 'bar' }), 'bar': ('string2', [b'a', b'b']), 'baz': (('x'), [b'abc'], { '_Encoding': 'utf-8' }), 'y': ('t', [5, 10, -999], { '_FillValue': -999 }) }).chunk() decoded = conventions.decode_cf(original) print(decoded) assert all( isinstance(var.data, da.Array) for name, var in decoded.variables.items() if name not in decoded.indexes) assert_identical(decoded, conventions.decode_cf(original).compute())
def test_decode_cf_time_kwargs(self) -> None: ds = Dataset.from_dict( { "coords": { "timedelta": { "data": np.array([1, 2, 3], dtype="int64"), "dims": "timedelta", "attrs": {"units": "days"}, }, "time": { "data": np.array([1, 2, 3], dtype="int64"), "dims": "time", "attrs": {"units": "days since 2000-01-01"}, }, }, "dims": {"time": 3, "timedelta": 3}, "data_vars": { "a": {"dims": ("time", "timedelta"), "data": np.ones((3, 3))}, }, } ) dsc = conventions.decode_cf(ds) assert dsc.timedelta.dtype == np.dtype("m8[ns]") assert dsc.time.dtype == np.dtype("M8[ns]") dsc = conventions.decode_cf(ds, decode_times=False) assert dsc.timedelta.dtype == np.dtype("int64") assert dsc.time.dtype == np.dtype("int64") dsc = conventions.decode_cf(ds, decode_times=True, decode_timedelta=False) assert dsc.timedelta.dtype == np.dtype("int64") assert dsc.time.dtype == np.dtype("M8[ns]") dsc = conventions.decode_cf(ds, decode_times=False, decode_timedelta=True) assert dsc.timedelta.dtype == np.dtype("m8[ns]") assert dsc.time.dtype == np.dtype("int64")
def test_decode_cf_with_drop_variables(self): original = Dataset({ 't': ('t', [0, 1, 2], { 'units': 'days since 2000-01-01' }), 'x': ("x", [9, 8, 7], { 'units': 'km' }), 'foo': (('t', 'x'), [[0, 0, 0], [1, 1, 1], [2, 2, 2]], { 'units': 'bar' }), 'y': ('t', [5, 10, -999], { '_FillValue': -999 }) }) expected = Dataset({ 't': pd.date_range('2000-01-01', periods=3), 'x': ("x", [0, 1, 2]), 'foo': (('t', 'x'), [[0, 0, 0], [1, 1, 1], [2, 2, 2]], { 'units': 'bar' }), 'y': ('t', [5, 10, np.nan]) }) actual = conventions.decode_cf(original, drop_variables=("x", )) actual2 = conventions.decode_cf(original, drop_variables="x") self.assertDatasetIdentical(expected, actual) self.assertDatasetIdentical(expected, actual2)
def test_dataset_repr_with_netcdf4_datetimes(self) -> None: # regression test for #347 attrs = {"units": "days since 0001-01-01", "calendar": "noleap"} with warnings.catch_warnings(): warnings.filterwarnings("ignore", "unable to decode time") ds = decode_cf(Dataset({"time": ("time", [0, 1], attrs)})) assert "(time) object" in repr(ds) attrs = {"units": "days since 1900-01-01"} ds = decode_cf(Dataset({"time": ("time", [0, 1], attrs)})) assert "(time) datetime64[ns]" in repr(ds)
def test_dataset_repr_with_netcdf4_datetimes(self): # regression test for #347 attrs = {'units': 'days since 0001-01-01', 'calendar': 'noleap'} with warnings.catch_warnings(): warnings.filterwarnings('ignore', 'unable to decode time') ds = decode_cf(Dataset({'time': ('time', [0, 1], attrs)})) assert '(time) object' in repr(ds) attrs = {'units': 'days since 1900-01-01'} ds = decode_cf(Dataset({'time': ('time', [0, 1], attrs)})) assert '(time) datetime64[ns]' in repr(ds)
def test_decode_cf_with_dask(self): import dask.array as da original = Dataset({ 't': ('t', [0, 1, 2], {'units': 'days since 2000-01-01'}), 'foo': ('t', [0, 0, 0], {'coordinates': 'y', 'units': 'bar'}), 'bar': ('string2', [b'a', b'b']), 'baz': (('x'), [b'abc'], {'_Encoding': 'utf-8'}), 'y': ('t', [5, 10, -999], {'_FillValue': -999}) }).chunk() decoded = conventions.decode_cf(original) print(decoded) assert all(isinstance(var.data, da.Array) for name, var in decoded.variables.items() if name not in decoded.indexes) assert_identical(decoded, conventions.decode_cf(original).compute())
def test_multidimensional_coordinates(self): # regression test for GH1763 # Set up test case with coordinates that have overlapping (but not # identical) dimensions. zeros1 = np.zeros((1, 5, 3)) zeros2 = np.zeros((1, 6, 3)) zeros3 = np.zeros((1, 5, 4)) orig = Dataset({ 'lon1': (['x1', 'y1'], zeros1.squeeze(0), {}), 'lon2': (['x2', 'y1'], zeros2.squeeze(0), {}), 'lon3': (['x1', 'y2'], zeros3.squeeze(0), {}), 'lat1': (['x1', 'y1'], zeros1.squeeze(0), {}), 'lat2': (['x2', 'y1'], zeros2.squeeze(0), {}), 'lat3': (['x1', 'y2'], zeros3.squeeze(0), {}), 'foo1': (['time', 'x1', 'y1'], zeros1, {'coordinates': 'lon1 lat1'}), 'foo2': (['time', 'x2', 'y1'], zeros2, {'coordinates': 'lon2 lat2'}), 'foo3': (['time', 'x1', 'y2'], zeros3, {'coordinates': 'lon3 lat3'}), 'time': ('time', [0.], {'units': 'hours since 2017-01-01'}), }) orig = conventions.decode_cf(orig) # Encode the coordinates, as they would be in a netCDF output file. enc, attrs = conventions.encode_dataset_coordinates(orig) # Make sure we have the right coordinates for each variable. foo1_coords = enc['foo1'].attrs.get('coordinates', '') foo2_coords = enc['foo2'].attrs.get('coordinates', '') foo3_coords = enc['foo3'].attrs.get('coordinates', '') assert set(foo1_coords.split()) == set(['lat1', 'lon1']) assert set(foo2_coords.split()) == set(['lat2', 'lon2']) assert set(foo3_coords.split()) == set(['lat3', 'lon3']) # Should not have any global coordinates. assert 'coordinates' not in attrs
def test_decode_cf_with_drop_variables(self): original = Dataset({ 't': ('t', [0, 1, 2], {'units': 'days since 2000-01-01'}), 'x': ("x", [9, 8, 7], {'units': 'km'}), 'foo': (('t', 'x'), [[0, 0, 0], [1, 1, 1], [2, 2, 2]], {'units': 'bar'}), 'y': ('t', [5, 10, -999], {'_FillValue': -999}) }) expected = Dataset({ 't': pd.date_range('2000-01-01', periods=3), 'foo': (('t', 'x'), [[0, 0, 0], [1, 1, 1], [2, 2, 2]], {'units': 'bar'}), 'y': ('t', [5, 10, np.nan]) }) actual = conventions.decode_cf(original, drop_variables=("x",)) actual2 = conventions.decode_cf(original, drop_variables="x") self.assertDatasetIdentical(expected, actual) self.assertDatasetIdentical(expected, actual2)
def test_decode_coordinates(self) -> None: # regression test for GH610 original = Dataset( {"foo": ("t", [1, 2], {"coordinates": "x"}), "x": ("t", [4, 5])} ) actual = conventions.decode_cf(original) assert actual.foo.encoding["coordinates"] == "x"
def test_multidimensional_coordinates(self) -> None: # regression test for GH1763 # Set up test case with coordinates that have overlapping (but not # identical) dimensions. zeros1 = np.zeros((1, 5, 3)) zeros2 = np.zeros((1, 6, 3)) zeros3 = np.zeros((1, 5, 4)) orig = Dataset( { "lon1": (["x1", "y1"], zeros1.squeeze(0), {}), "lon2": (["x2", "y1"], zeros2.squeeze(0), {}), "lon3": (["x1", "y2"], zeros3.squeeze(0), {}), "lat1": (["x1", "y1"], zeros1.squeeze(0), {}), "lat2": (["x2", "y1"], zeros2.squeeze(0), {}), "lat3": (["x1", "y2"], zeros3.squeeze(0), {}), "foo1": (["time", "x1", "y1"], zeros1, {"coordinates": "lon1 lat1"}), "foo2": (["time", "x2", "y1"], zeros2, {"coordinates": "lon2 lat2"}), "foo3": (["time", "x1", "y2"], zeros3, {"coordinates": "lon3 lat3"}), "time": ("time", [0.0], {"units": "hours since 2017-01-01"}), } ) orig = conventions.decode_cf(orig) # Encode the coordinates, as they would be in a netCDF output file. enc, attrs = conventions.encode_dataset_coordinates(orig) # Make sure we have the right coordinates for each variable. foo1_coords = enc["foo1"].attrs.get("coordinates", "") foo2_coords = enc["foo2"].attrs.get("coordinates", "") foo3_coords = enc["foo3"].attrs.get("coordinates", "") assert set(foo1_coords.split()) == {"lat1", "lon1"} assert set(foo2_coords.split()) == {"lat2", "lon2"} assert set(foo3_coords.split()) == {"lat3", "lon3"} # Should not have any global coordinates. assert "coordinates" not in attrs
def test_dataset_repr_with_netcdf4_datetimes(self): # regression test for #347 attrs = {'units': 'days since 0001-01-01', 'calendar': 'noleap'} with warnings.catch_warnings(): warnings.filterwarnings('ignore', 'unable to decode time') ds = decode_cf(Dataset({'time': ('time', [0, 1], attrs)})) self.assertIn('(time) object', repr(ds)) attrs = {'units': 'days since 1900-01-01'} ds = decode_cf(Dataset({'time': ('time', [0, 1], attrs)})) self.assertIn('(time) datetime64[ns]', repr(ds)) # this should not throw a warning (GH1111) with warnings.catch_warnings(): warnings.filterwarnings('error') conventions.DecodedCFDatetimeArray(np.asarray([722624]), "days since 0001-01-01")
def test_decode_dask_times(self) -> None: original = Dataset.from_dict( { "coords": {}, "dims": {"time": 5}, "data_vars": { "average_T1": { "dims": ("time",), "attrs": {"units": "days since 1958-01-01 00:00:00"}, "data": [87659.0, 88024.0, 88389.0, 88754.0, 89119.0], } }, } ) assert_identical( conventions.decode_cf(original.chunk()), conventions.decode_cf(original).chunk(), )
def test_decode_coordinates(self): # regression test for GH610 original = Dataset({ 'foo': ('t', [1, 2], { 'coordinates': 'x' }), 'x': ('t', [4, 5]) }) actual = conventions.decode_cf(original) self.assertEqual(actual.foo.encoding['coordinates'], 'x')
def test_decode_cf_datetime_transition_to_invalid(self): # manually create dataset with not-decoded date from datetime import datetime ds = Dataset(coords={'time': [0, 266 * 365]}) units = 'days since 2000-01-01 00:00:00' ds.time.attrs = dict(units=units) ds_decoded = conventions.decode_cf(ds) expected = [datetime(2000, 1, 1, 0, 0), datetime(2265, 10, 28, 0, 0)] self.assertArrayEqual(ds_decoded.time.values, expected)
def test_dataset(self): original = Dataset({ 't': ('t', [0, 1, 2], {'units': 'days since 2000-01-01'}), 'foo': ('t', [0, 0, 0], {'coordinates': 'y', 'units': 'bar'}), 'y': ('t', [5, 10, -999], {'_FillValue': -999}) }) expected = Dataset({'foo': ('t', [0, 0, 0], {'units': 'bar'})}, {'t': pd.date_range('2000-01-01', periods=3), 'y': ('t', [5.0, 10.0, np.nan])}) actual = conventions.decode_cf(original) assert_identical(expected, actual)
def test_decode_cf_datetime_transition_to_invalid(self): # manually create dataset with not-decoded date from datetime import datetime ds = Dataset(coords={'time' : [0, 266 * 365]}) units = 'days since 2000-01-01 00:00:00' ds.time.attrs = dict(units=units) ds_decoded = conventions.decode_cf(ds) expected = [datetime(2000, 1, 1, 0, 0), datetime(2265, 10, 28, 0, 0)] self.assertArrayEqual(ds_decoded.time.values, expected)
def test_decode_cf_datetime_transition_to_invalid(self): # manually create dataset with not-decoded date from datetime import datetime ds = Dataset(coords={'time': [0, 266 * 365]}) units = 'days since 2000-01-01 00:00:00' ds.time.attrs = dict(units=units) with warnings.catch_warnings(): warnings.filterwarnings('ignore', 'unable to decode time') ds_decoded = conventions.decode_cf(ds) expected = [datetime(2000, 1, 1, 0, 0), datetime(2265, 10, 28, 0, 0)] assert_array_equal(ds_decoded.time.values, expected)
def maybe_decode_store(store, lock=False): # TODO: THINK ON ALL THESE OPTIONS. ESPECIALLY FILL ds = conventions.decode_cf( store # mask_and_scale=mask_and_scale, # decode_times=decode_times, # concat_characters=concat_characters, # decode_coords=decode_coords, # drop_variables=drop_variables ) # TODO: this is where we would apply caching return ds
def test_decode_cf_with_dask(self): import dask original = Dataset({ 't': ('t', [0, 1, 2], { 'units': 'days since 2000-01-01' }), 'foo': ('t', [0, 0, 0], { 'coordinates': 'y', 'units': 'bar' }), 'y': ('t', [5, 10, -999], { '_FillValue': -999 }) }).chunk({'t': 1}) decoded = conventions.decode_cf(original) assert dask.is_dask_collection(decoded.y.data)
def test_dataset(self) -> None: original = Dataset( { "t": ("t", [0, 1, 2], {"units": "days since 2000-01-01"}), "foo": ("t", [0, 0, 0], {"coordinates": "y", "units": "bar"}), "y": ("t", [5, 10, -999], {"_FillValue": -999}), } ) expected = Dataset( {"foo": ("t", [0, 0, 0], {"units": "bar"})}, { "t": pd.date_range("2000-01-01", periods=3), "y": ("t", [5.0, 10.0, np.nan]), }, ) actual = conventions.decode_cf(original) assert_identical(expected, actual)
def test_invalid_coordinates(self): # regression test for GH308 original = Dataset({'foo': ('t', [1, 2], {'coordinates': 'invalid'})}) actual = conventions.decode_cf(original) self.assertDatasetIdentical(original, actual)
def test_invalid_coordinates(self) -> None: # regression test for GH308 original = Dataset({"foo": ("t", [1, 2], {"coordinates": "invalid"})}) actual = conventions.decode_cf(original) assert_identical(original, actual)
def test_invalid_units_raises_eagerly(self): ds = Dataset({'time': ('time', [0, 1], {'units': 'foobar since 123'})}) with self.assertRaisesRegexp(ValueError, 'unable to decode time'): decode_cf(ds)
def test_invalid_time_units_raises_eagerly(self): ds = Dataset({"time": ("time", [0, 1], {"units": "foobar since 123"})}) with raises_regex(ValueError, "unable to decode time"): decode_cf(ds)
def test_invalid_time_units_raises_eagerly(self) -> None: ds = Dataset({"time": ("time", [0, 1], {"units": "foobar since 123"})}) with pytest.raises(ValueError, match=r"unable to decode time"): decode_cf(ds)
def test_invalid_time_units_raises_eagerly(self): ds = Dataset({'time': ('time', [0, 1], {'units': 'foobar since 123'})}) with raises_regex(ValueError, 'unable to decode time'): decode_cf(ds)
def test_decode_coordinates(self): # regression test for GH610 original = Dataset({'foo': ('t', [1, 2], {'coordinates': 'x'}), 'x': ('t', [4, 5])}) actual = conventions.decode_cf(original) assert actual.foo.encoding['coordinates'] == 'x'
def test_invalid_coordinates(self): # regression test for GH308 original = Dataset({'foo': ('t', [1, 2], {'coordinates': 'invalid'})}) actual = conventions.decode_cf(original) assert_identical(original, actual)