Exemple #1
0
    def test_decode_cf_with_dask(self):
        import dask.array as da

        original = Dataset({
            "t": ("t", [0, 1, 2], {
                "units": "days since 2000-01-01"
            }),
            "foo": ("t", [0, 0, 0], {
                "coordinates": "y",
                "units": "bar"
            }),
            "bar": ("string2", [b"a", b"b"]),
            "baz": (("x"), [b"abc"], {
                "_Encoding": "utf-8"
            }),
            "y": ("t", [5, 10, -999], {
                "_FillValue": -999
            }),
        }).chunk()
        decoded = conventions.decode_cf(original)
        print(decoded)
        assert all(
            isinstance(var.data, da.Array)
            for name, var in decoded.variables.items()
            if name not in decoded.indexes)
        assert_identical(decoded, conventions.decode_cf(original).compute())
 def test_decode_cf_with_drop_variables(self) -> None:
     original = Dataset(
         {
             "t": ("t", [0, 1, 2], {"units": "days since 2000-01-01"}),
             "x": ("x", [9, 8, 7], {"units": "km"}),
             "foo": (
                 ("t", "x"),
                 [[0, 0, 0], [1, 1, 1], [2, 2, 2]],
                 {"units": "bar"},
             ),
             "y": ("t", [5, 10, -999], {"_FillValue": -999}),
         }
     )
     expected = Dataset(
         {
             "t": pd.date_range("2000-01-01", periods=3),
             "foo": (
                 ("t", "x"),
                 [[0, 0, 0], [1, 1, 1], [2, 2, 2]],
                 {"units": "bar"},
             ),
             "y": ("t", [5, 10, np.nan]),
         }
     )
     actual = conventions.decode_cf(original, drop_variables=("x",))
     actual2 = conventions.decode_cf(original, drop_variables="x")
     assert_identical(expected, actual)
     assert_identical(expected, actual2)
 def test_decode_cf_with_dask(self):
     import dask.array as da
     original = Dataset({
         't': ('t', [0, 1, 2], {
             'units': 'days since 2000-01-01'
         }),
         'foo': ('t', [0, 0, 0], {
             'coordinates': 'y',
             'units': 'bar'
         }),
         'bar': ('string2', [b'a', b'b']),
         'baz': (('x'), [b'abc'], {
             '_Encoding': 'utf-8'
         }),
         'y': ('t', [5, 10, -999], {
             '_FillValue': -999
         })
     }).chunk()
     decoded = conventions.decode_cf(original)
     print(decoded)
     assert all(
         isinstance(var.data, da.Array)
         for name, var in decoded.variables.items()
         if name not in decoded.indexes)
     assert_identical(decoded, conventions.decode_cf(original).compute())
    def test_decode_cf_time_kwargs(self) -> None:
        ds = Dataset.from_dict(
            {
                "coords": {
                    "timedelta": {
                        "data": np.array([1, 2, 3], dtype="int64"),
                        "dims": "timedelta",
                        "attrs": {"units": "days"},
                    },
                    "time": {
                        "data": np.array([1, 2, 3], dtype="int64"),
                        "dims": "time",
                        "attrs": {"units": "days since 2000-01-01"},
                    },
                },
                "dims": {"time": 3, "timedelta": 3},
                "data_vars": {
                    "a": {"dims": ("time", "timedelta"), "data": np.ones((3, 3))},
                },
            }
        )

        dsc = conventions.decode_cf(ds)
        assert dsc.timedelta.dtype == np.dtype("m8[ns]")
        assert dsc.time.dtype == np.dtype("M8[ns]")
        dsc = conventions.decode_cf(ds, decode_times=False)
        assert dsc.timedelta.dtype == np.dtype("int64")
        assert dsc.time.dtype == np.dtype("int64")
        dsc = conventions.decode_cf(ds, decode_times=True, decode_timedelta=False)
        assert dsc.timedelta.dtype == np.dtype("int64")
        assert dsc.time.dtype == np.dtype("M8[ns]")
        dsc = conventions.decode_cf(ds, decode_times=False, decode_timedelta=True)
        assert dsc.timedelta.dtype == np.dtype("m8[ns]")
        assert dsc.time.dtype == np.dtype("int64")
 def test_decode_cf_with_drop_variables(self):
     original = Dataset({
         't': ('t', [0, 1, 2], {
             'units': 'days since 2000-01-01'
         }),
         'x': ("x", [9, 8, 7], {
             'units': 'km'
         }),
         'foo': (('t', 'x'), [[0, 0, 0], [1, 1, 1], [2, 2, 2]], {
             'units': 'bar'
         }),
         'y': ('t', [5, 10, -999], {
             '_FillValue': -999
         })
     })
     expected = Dataset({
         't':
         pd.date_range('2000-01-01', periods=3),
         'x': ("x", [0, 1, 2]),
         'foo': (('t', 'x'), [[0, 0, 0], [1, 1, 1], [2, 2, 2]], {
             'units': 'bar'
         }),
         'y': ('t', [5, 10, np.nan])
     })
     actual = conventions.decode_cf(original, drop_variables=("x", ))
     actual2 = conventions.decode_cf(original, drop_variables="x")
     self.assertDatasetIdentical(expected, actual)
     self.assertDatasetIdentical(expected, actual2)
    def test_dataset_repr_with_netcdf4_datetimes(self) -> None:
        # regression test for #347
        attrs = {"units": "days since 0001-01-01", "calendar": "noleap"}
        with warnings.catch_warnings():
            warnings.filterwarnings("ignore", "unable to decode time")
            ds = decode_cf(Dataset({"time": ("time", [0, 1], attrs)}))
            assert "(time) object" in repr(ds)

        attrs = {"units": "days since 1900-01-01"}
        ds = decode_cf(Dataset({"time": ("time", [0, 1], attrs)}))
        assert "(time) datetime64[ns]" in repr(ds)
Exemple #7
0
    def test_dataset_repr_with_netcdf4_datetimes(self):
        # regression test for #347
        attrs = {'units': 'days since 0001-01-01', 'calendar': 'noleap'}
        with warnings.catch_warnings():
            warnings.filterwarnings('ignore', 'unable to decode time')
            ds = decode_cf(Dataset({'time': ('time', [0, 1], attrs)}))
            assert '(time) object' in repr(ds)

        attrs = {'units': 'days since 1900-01-01'}
        ds = decode_cf(Dataset({'time': ('time', [0, 1], attrs)}))
        assert '(time) datetime64[ns]' in repr(ds)
    def test_dataset_repr_with_netcdf4_datetimes(self):
        # regression test for #347
        attrs = {'units': 'days since 0001-01-01', 'calendar': 'noleap'}
        with warnings.catch_warnings():
            warnings.filterwarnings('ignore', 'unable to decode time')
            ds = decode_cf(Dataset({'time': ('time', [0, 1], attrs)}))
            assert '(time) object' in repr(ds)

        attrs = {'units': 'days since 1900-01-01'}
        ds = decode_cf(Dataset({'time': ('time', [0, 1], attrs)}))
        assert '(time) datetime64[ns]' in repr(ds)
Exemple #9
0
 def test_decode_cf_with_dask(self):
     import dask.array as da
     original = Dataset({
         't': ('t', [0, 1, 2], {'units': 'days since 2000-01-01'}),
         'foo': ('t', [0, 0, 0], {'coordinates': 'y', 'units': 'bar'}),
         'bar': ('string2', [b'a', b'b']),
         'baz': (('x'), [b'abc'], {'_Encoding': 'utf-8'}),
         'y': ('t', [5, 10, -999], {'_FillValue': -999})
     }).chunk()
     decoded = conventions.decode_cf(original)
     print(decoded)
     assert all(isinstance(var.data, da.Array)
                for name, var in decoded.variables.items()
                if name not in decoded.indexes)
     assert_identical(decoded, conventions.decode_cf(original).compute())
 def test_multidimensional_coordinates(self):
     # regression test for GH1763
     # Set up test case with coordinates that have overlapping (but not
     # identical) dimensions.
     zeros1 = np.zeros((1, 5, 3))
     zeros2 = np.zeros((1, 6, 3))
     zeros3 = np.zeros((1, 5, 4))
     orig = Dataset({
         'lon1': (['x1', 'y1'], zeros1.squeeze(0), {}),
         'lon2': (['x2', 'y1'], zeros2.squeeze(0), {}),
         'lon3': (['x1', 'y2'], zeros3.squeeze(0), {}),
         'lat1': (['x1', 'y1'], zeros1.squeeze(0), {}),
         'lat2': (['x2', 'y1'], zeros2.squeeze(0), {}),
         'lat3': (['x1', 'y2'], zeros3.squeeze(0), {}),
         'foo1': (['time', 'x1', 'y1'], zeros1,
                  {'coordinates': 'lon1 lat1'}),
         'foo2': (['time', 'x2', 'y1'], zeros2,
                  {'coordinates': 'lon2 lat2'}),
         'foo3': (['time', 'x1', 'y2'], zeros3,
                  {'coordinates': 'lon3 lat3'}),
         'time': ('time', [0.], {'units': 'hours since 2017-01-01'}),
     })
     orig = conventions.decode_cf(orig)
     # Encode the coordinates, as they would be in a netCDF output file.
     enc, attrs = conventions.encode_dataset_coordinates(orig)
     # Make sure we have the right coordinates for each variable.
     foo1_coords = enc['foo1'].attrs.get('coordinates', '')
     foo2_coords = enc['foo2'].attrs.get('coordinates', '')
     foo3_coords = enc['foo3'].attrs.get('coordinates', '')
     assert set(foo1_coords.split()) == set(['lat1', 'lon1'])
     assert set(foo2_coords.split()) == set(['lat2', 'lon2'])
     assert set(foo3_coords.split()) == set(['lat3', 'lon3'])
     # Should not have any global coordinates.
     assert 'coordinates' not in attrs
Exemple #11
0
 def test_decode_cf_with_drop_variables(self):
     original = Dataset({
         't': ('t', [0, 1, 2], {'units': 'days since 2000-01-01'}),
         'x': ("x", [9, 8, 7], {'units': 'km'}),
         'foo': (('t', 'x'), [[0, 0, 0], [1, 1, 1], [2, 2, 2]], {'units': 'bar'}),
         'y': ('t', [5, 10, -999], {'_FillValue': -999})
     })
     expected = Dataset({
         't': pd.date_range('2000-01-01', periods=3),
         'foo': (('t', 'x'), [[0, 0, 0], [1, 1, 1], [2, 2, 2]], {'units': 'bar'}),
         'y': ('t', [5, 10, np.nan])
     })
     actual = conventions.decode_cf(original, drop_variables=("x",))
     actual2 = conventions.decode_cf(original, drop_variables="x")
     self.assertDatasetIdentical(expected, actual)
     self.assertDatasetIdentical(expected, actual2)
Exemple #12
0
 def test_multidimensional_coordinates(self):
     # regression test for GH1763
     # Set up test case with coordinates that have overlapping (but not
     # identical) dimensions.
     zeros1 = np.zeros((1, 5, 3))
     zeros2 = np.zeros((1, 6, 3))
     zeros3 = np.zeros((1, 5, 4))
     orig = Dataset({
         'lon1': (['x1', 'y1'], zeros1.squeeze(0), {}),
         'lon2': (['x2', 'y1'], zeros2.squeeze(0), {}),
         'lon3': (['x1', 'y2'], zeros3.squeeze(0), {}),
         'lat1': (['x1', 'y1'], zeros1.squeeze(0), {}),
         'lat2': (['x2', 'y1'], zeros2.squeeze(0), {}),
         'lat3': (['x1', 'y2'], zeros3.squeeze(0), {}),
         'foo1': (['time', 'x1', 'y1'], zeros1,
                  {'coordinates': 'lon1 lat1'}),
         'foo2': (['time', 'x2', 'y1'], zeros2,
                  {'coordinates': 'lon2 lat2'}),
         'foo3': (['time', 'x1', 'y2'], zeros3,
                  {'coordinates': 'lon3 lat3'}),
         'time': ('time', [0.], {'units': 'hours since 2017-01-01'}),
     })
     orig = conventions.decode_cf(orig)
     # Encode the coordinates, as they would be in a netCDF output file.
     enc, attrs = conventions.encode_dataset_coordinates(orig)
     # Make sure we have the right coordinates for each variable.
     foo1_coords = enc['foo1'].attrs.get('coordinates', '')
     foo2_coords = enc['foo2'].attrs.get('coordinates', '')
     foo3_coords = enc['foo3'].attrs.get('coordinates', '')
     assert set(foo1_coords.split()) == set(['lat1', 'lon1'])
     assert set(foo2_coords.split()) == set(['lat2', 'lon2'])
     assert set(foo3_coords.split()) == set(['lat3', 'lon3'])
     # Should not have any global coordinates.
     assert 'coordinates' not in attrs
 def test_decode_coordinates(self) -> None:
     # regression test for GH610
     original = Dataset(
         {"foo": ("t", [1, 2], {"coordinates": "x"}), "x": ("t", [4, 5])}
     )
     actual = conventions.decode_cf(original)
     assert actual.foo.encoding["coordinates"] == "x"
 def test_multidimensional_coordinates(self) -> None:
     # regression test for GH1763
     # Set up test case with coordinates that have overlapping (but not
     # identical) dimensions.
     zeros1 = np.zeros((1, 5, 3))
     zeros2 = np.zeros((1, 6, 3))
     zeros3 = np.zeros((1, 5, 4))
     orig = Dataset(
         {
             "lon1": (["x1", "y1"], zeros1.squeeze(0), {}),
             "lon2": (["x2", "y1"], zeros2.squeeze(0), {}),
             "lon3": (["x1", "y2"], zeros3.squeeze(0), {}),
             "lat1": (["x1", "y1"], zeros1.squeeze(0), {}),
             "lat2": (["x2", "y1"], zeros2.squeeze(0), {}),
             "lat3": (["x1", "y2"], zeros3.squeeze(0), {}),
             "foo1": (["time", "x1", "y1"], zeros1, {"coordinates": "lon1 lat1"}),
             "foo2": (["time", "x2", "y1"], zeros2, {"coordinates": "lon2 lat2"}),
             "foo3": (["time", "x1", "y2"], zeros3, {"coordinates": "lon3 lat3"}),
             "time": ("time", [0.0], {"units": "hours since 2017-01-01"}),
         }
     )
     orig = conventions.decode_cf(orig)
     # Encode the coordinates, as they would be in a netCDF output file.
     enc, attrs = conventions.encode_dataset_coordinates(orig)
     # Make sure we have the right coordinates for each variable.
     foo1_coords = enc["foo1"].attrs.get("coordinates", "")
     foo2_coords = enc["foo2"].attrs.get("coordinates", "")
     foo3_coords = enc["foo3"].attrs.get("coordinates", "")
     assert set(foo1_coords.split()) == {"lat1", "lon1"}
     assert set(foo2_coords.split()) == {"lat2", "lon2"}
     assert set(foo3_coords.split()) == {"lat3", "lon3"}
     # Should not have any global coordinates.
     assert "coordinates" not in attrs
Exemple #15
0
    def test_dataset_repr_with_netcdf4_datetimes(self):
        # regression test for #347
        attrs = {'units': 'days since 0001-01-01', 'calendar': 'noleap'}
        with warnings.catch_warnings():
            warnings.filterwarnings('ignore', 'unable to decode time')
            ds = decode_cf(Dataset({'time': ('time', [0, 1], attrs)}))
            self.assertIn('(time) object', repr(ds))

        attrs = {'units': 'days since 1900-01-01'}
        ds = decode_cf(Dataset({'time': ('time', [0, 1], attrs)}))
        self.assertIn('(time) datetime64[ns]', repr(ds))

        # this should not throw a warning (GH1111)
        with warnings.catch_warnings():
            warnings.filterwarnings('error')
            conventions.DecodedCFDatetimeArray(np.asarray([722624]),
                                               "days since 0001-01-01")
    def test_dataset_repr_with_netcdf4_datetimes(self):
        # regression test for #347
        attrs = {'units': 'days since 0001-01-01', 'calendar': 'noleap'}
        with warnings.catch_warnings():
            warnings.filterwarnings('ignore', 'unable to decode time')
            ds = decode_cf(Dataset({'time': ('time', [0, 1], attrs)}))
            self.assertIn('(time) object', repr(ds))

        attrs = {'units': 'days since 1900-01-01'}
        ds = decode_cf(Dataset({'time': ('time', [0, 1], attrs)}))
        self.assertIn('(time) datetime64[ns]', repr(ds))

        # this should not throw a warning (GH1111)
        with warnings.catch_warnings():
            warnings.filterwarnings('error')
            conventions.DecodedCFDatetimeArray(np.asarray([722624]),
                                               "days since 0001-01-01")
 def test_decode_dask_times(self) -> None:
     original = Dataset.from_dict(
         {
             "coords": {},
             "dims": {"time": 5},
             "data_vars": {
                 "average_T1": {
                     "dims": ("time",),
                     "attrs": {"units": "days since 1958-01-01 00:00:00"},
                     "data": [87659.0, 88024.0, 88389.0, 88754.0, 89119.0],
                 }
             },
         }
     )
     assert_identical(
         conventions.decode_cf(original.chunk()),
         conventions.decode_cf(original).chunk(),
     )
 def test_decode_coordinates(self):
     # regression test for GH610
     original = Dataset({
         'foo': ('t', [1, 2], {
             'coordinates': 'x'
         }),
         'x': ('t', [4, 5])
     })
     actual = conventions.decode_cf(original)
     self.assertEqual(actual.foo.encoding['coordinates'], 'x')
    def test_decode_cf_datetime_transition_to_invalid(self):
        # manually create dataset with not-decoded date
        from datetime import datetime
        ds = Dataset(coords={'time': [0, 266 * 365]})
        units = 'days since 2000-01-01 00:00:00'
        ds.time.attrs = dict(units=units)
        ds_decoded = conventions.decode_cf(ds)

        expected = [datetime(2000, 1, 1, 0, 0), datetime(2265, 10, 28, 0, 0)]

        self.assertArrayEqual(ds_decoded.time.values, expected)
Exemple #20
0
 def test_dataset(self):
     original = Dataset({
         't': ('t', [0, 1, 2], {'units': 'days since 2000-01-01'}),
         'foo': ('t', [0, 0, 0], {'coordinates': 'y', 'units': 'bar'}),
         'y': ('t', [5, 10, -999], {'_FillValue': -999})
     })
     expected = Dataset({'foo': ('t', [0, 0, 0], {'units': 'bar'})},
                        {'t': pd.date_range('2000-01-01', periods=3),
                         'y': ('t', [5.0, 10.0, np.nan])})
     actual = conventions.decode_cf(original)
     assert_identical(expected, actual)
 def test_dataset(self):
     original = Dataset({
         't': ('t', [0, 1, 2], {'units': 'days since 2000-01-01'}),
         'foo': ('t', [0, 0, 0], {'coordinates': 'y', 'units': 'bar'}),
         'y': ('t', [5, 10, -999], {'_FillValue': -999})
     })
     expected = Dataset({'foo': ('t', [0, 0, 0], {'units': 'bar'})},
                        {'t': pd.date_range('2000-01-01', periods=3),
                         'y': ('t', [5.0, 10.0, np.nan])})
     actual = conventions.decode_cf(original)
     assert_identical(expected, actual)
Exemple #22
0
    def test_decode_cf_datetime_transition_to_invalid(self):
        # manually create dataset with not-decoded date
        from datetime import datetime
        ds = Dataset(coords={'time' : [0, 266 * 365]})
        units = 'days since 2000-01-01 00:00:00'
        ds.time.attrs = dict(units=units)
        ds_decoded = conventions.decode_cf(ds)

        expected = [datetime(2000, 1, 1, 0, 0),
                    datetime(2265, 10, 28, 0, 0)]

        self.assertArrayEqual(ds_decoded.time.values, expected)
    def test_decode_cf_datetime_transition_to_invalid(self):
        # manually create dataset with not-decoded date
        from datetime import datetime
        ds = Dataset(coords={'time': [0, 266 * 365]})
        units = 'days since 2000-01-01 00:00:00'
        ds.time.attrs = dict(units=units)
        with warnings.catch_warnings():
            warnings.filterwarnings('ignore', 'unable to decode time')
            ds_decoded = conventions.decode_cf(ds)

        expected = [datetime(2000, 1, 1, 0, 0), datetime(2265, 10, 28, 0, 0)]

        assert_array_equal(ds_decoded.time.values, expected)
Exemple #24
0
    def maybe_decode_store(store, lock=False):
        # TODO: THINK ON ALL THESE OPTIONS. ESPECIALLY FILL
        ds = conventions.decode_cf(
            store
            # mask_and_scale=mask_and_scale,
            # decode_times=decode_times,
            # concat_characters=concat_characters,
            # decode_coords=decode_coords,
            # drop_variables=drop_variables
        )

        # TODO: this is where we would apply caching

        return ds
Exemple #25
0
    def test_decode_cf_datetime_transition_to_invalid(self):
        # manually create dataset with not-decoded date
        from datetime import datetime
        ds = Dataset(coords={'time': [0, 266 * 365]})
        units = 'days since 2000-01-01 00:00:00'
        ds.time.attrs = dict(units=units)
        with warnings.catch_warnings():
            warnings.filterwarnings('ignore', 'unable to decode time')
            ds_decoded = conventions.decode_cf(ds)

        expected = [datetime(2000, 1, 1, 0, 0),
                    datetime(2265, 10, 28, 0, 0)]

        assert_array_equal(ds_decoded.time.values, expected)
Exemple #26
0
 def test_decode_cf_with_dask(self):
     import dask
     original = Dataset({
         't': ('t', [0, 1, 2], {
             'units': 'days since 2000-01-01'
         }),
         'foo': ('t', [0, 0, 0], {
             'coordinates': 'y',
             'units': 'bar'
         }),
         'y': ('t', [5, 10, -999], {
             '_FillValue': -999
         })
     }).chunk({'t': 1})
     decoded = conventions.decode_cf(original)
     assert dask.is_dask_collection(decoded.y.data)
 def test_dataset(self) -> None:
     original = Dataset(
         {
             "t": ("t", [0, 1, 2], {"units": "days since 2000-01-01"}),
             "foo": ("t", [0, 0, 0], {"coordinates": "y", "units": "bar"}),
             "y": ("t", [5, 10, -999], {"_FillValue": -999}),
         }
     )
     expected = Dataset(
         {"foo": ("t", [0, 0, 0], {"units": "bar"})},
         {
             "t": pd.date_range("2000-01-01", periods=3),
             "y": ("t", [5.0, 10.0, np.nan]),
         },
     )
     actual = conventions.decode_cf(original)
     assert_identical(expected, actual)
 def test_invalid_coordinates(self):
     # regression test for GH308
     original = Dataset({'foo': ('t', [1, 2], {'coordinates': 'invalid'})})
     actual = conventions.decode_cf(original)
     self.assertDatasetIdentical(original, actual)
 def test_invalid_coordinates(self) -> None:
     # regression test for GH308
     original = Dataset({"foo": ("t", [1, 2], {"coordinates": "invalid"})})
     actual = conventions.decode_cf(original)
     assert_identical(original, actual)
 def test_invalid_units_raises_eagerly(self):
     ds = Dataset({'time': ('time', [0, 1], {'units': 'foobar since 123'})})
     with self.assertRaisesRegexp(ValueError, 'unable to decode time'):
         decode_cf(ds)
Exemple #31
0
 def test_invalid_time_units_raises_eagerly(self):
     ds = Dataset({"time": ("time", [0, 1], {"units": "foobar since 123"})})
     with raises_regex(ValueError, "unable to decode time"):
         decode_cf(ds)
 def test_invalid_time_units_raises_eagerly(self) -> None:
     ds = Dataset({"time": ("time", [0, 1], {"units": "foobar since 123"})})
     with pytest.raises(ValueError, match=r"unable to decode time"):
         decode_cf(ds)
Exemple #33
0
 def test_invalid_time_units_raises_eagerly(self):
     ds = Dataset({'time': ('time', [0, 1], {'units': 'foobar since 123'})})
     with raises_regex(ValueError, 'unable to decode time'):
         decode_cf(ds)
Exemple #34
0
 def test_decode_coordinates(self):
     # regression test for GH610
     original = Dataset({'foo': ('t', [1, 2], {'coordinates': 'x'}),
                         'x': ('t', [4, 5])})
     actual = conventions.decode_cf(original)
     assert actual.foo.encoding['coordinates'] == 'x'
Exemple #35
0
 def test_invalid_coordinates(self):
     # regression test for GH308
     original = Dataset({'foo': ('t', [1, 2], {'coordinates': 'invalid'})})
     actual = conventions.decode_cf(original)
     assert_identical(original, actual)