def test_getitem_clash_standard_name(): ds = xr.Dataset() ds.coords["area"] = xr.DataArray(np.ones(10), attrs={"standard_name": "cell_area"}) assert_identical(ds.cf["cell_area"], ds["area"].reset_coords(drop=True)) ds = xr.Dataset() ds["time"] = ( "time", np.arange(10), {"standard_name": "time", "bounds": "time_bounds"}, ) ds["time_bounds"] = ( ("time", "bounds"), np.ones((10, 2)), {"standard_name": "time"}, ) ds["lat"] = ( "lat", np.arange(10), {"units": "degrees_north", "bounds": "lat_bounds"}, ) ds["lat_bounds"] = ( ("lat", "bounds"), np.ones((10, 2)), {"units": "degrees_north"}, ) assert_identical(ds["lat"], ds.cf["latitude"])
def test_convert_calendar_missing(source, target, freq): src = DataArray( date_range( "2004-01-01", "2004-12-31" if source != "360_day" else "2004-12-30", freq=freq, calendar=source, ), dims=("time", ), name="time", ) da_src = DataArray(np.linspace(0, 1, src.size), dims=("time", ), coords={"time": src}) out = convert_calendar(da_src, target, missing=np.nan, align_on="date") assert infer_freq(out.time) == freq expected = date_range( "2004-01-01", "2004-12-31" if target != "360_day" else "2004-12-30", freq=freq, calendar=target, ) np.testing.assert_array_equal(out.time, expected) if freq != "M": out_without_missing = convert_calendar(da_src, target, align_on="date") expected_nan = out.isel(time=~out.time.isin(out_without_missing.time)) assert expected_nan.isnull().all() expected_not_nan = out.sel(time=out_without_missing.time) assert_identical(expected_not_nan, out_without_missing)
def test_decode_cf_with_drop_variables(self): original = Dataset({ 't': ('t', [0, 1, 2], { 'units': 'days since 2000-01-01' }), 'x': ("x", [9, 8, 7], { 'units': 'km' }), 'foo': (('t', 'x'), [[0, 0, 0], [1, 1, 1], [2, 2, 2]], { 'units': 'bar' }), 'y': ('t', [5, 10, -999], { '_FillValue': -999 }) }) expected = Dataset({ 't': pd.date_range('2000-01-01', periods=3), 'foo': (('t', 'x'), [[0, 0, 0], [1, 1, 1], [2, 2, 2]], { 'units': 'bar' }), 'y': ('t', [5, 10, np.nan]) }) actual = conventions.decode_cf(original, drop_variables=("x", )) actual2 = conventions.decode_cf(original, drop_variables="x") assert_identical(expected, actual) assert_identical(expected, actual2)
def test_args_match_both_dims_args() -> None: """Match both a Dataset dimension or coordinate and a method argument. When the dictionary of constraints refers to both dimensions or coordinates of the Dataset and arguments of the method xr.Dataset.sel, ensure that the selection result is the same as the one already provided by default on xarray. """ ds = xr.Dataset(data_vars={ 'ball_trajectory': (['frame', 'cartesian_coords'], np.exp(np.linspace((-6, -8), (3, 2), 250))) }, coords={ 'frame': np.arange(1, 251), 'cartesian_coords': ['x', 'y'] }, attrs={ 'match_id': 7, 'resolution_fps': 25 }) selection: Mapping[Hashable, Any] = {'cartesian_coords': 'x'} assert_identical(ds.events.sel(selection, drop=True), ds.sel(selection, drop=True))
def test_decode_cf_with_dask(self): import dask.array as da original = Dataset({ 't': ('t', [0, 1, 2], { 'units': 'days since 2000-01-01' }), 'foo': ('t', [0, 0, 0], { 'coordinates': 'y', 'units': 'bar' }), 'bar': ('string2', [b'a', b'b']), 'baz': (('x'), [b'abc'], { '_Encoding': 'utf-8' }), 'y': ('t', [5, 10, -999], { '_FillValue': -999 }) }).chunk() decoded = conventions.decode_cf(original) print(decoded) assert all( isinstance(var.data, da.Array) for name, var in decoded.variables.items() if name not in decoded.indexes) assert_identical(decoded, conventions.decode_cf(original).compute())
def test_combine_along_y(self, tmp_path_factory, bout_xyt_example_files): path = bout_xyt_example_files( tmp_path_factory, nxpe=1, nype=3, nt=1, syn_data_type="stepped", write_to_disk=True, ) with pytest.warns(UserWarning): actual = open_boutdataset(datapath=path, keep_xboundaries=False) bout_ds = create_bout_ds expected = concat( [bout_ds(0), bout_ds(1), bout_ds(2)], dim="y", data_vars="minimal") expected = expected.set_coords(["t_array", "dx", "dy", "dz"]).rename(t_array="t") xrt.assert_equal( actual.drop_vars(["x", "y", "z"]).load(), expected.drop_vars(METADATA_VARS + _BOUT_PER_PROC_VARIABLES, errors="ignore"), ) # check creation without writing to disk gives identical result fake_ds_list = bout_xyt_example_files(None, nxpe=1, nype=3, nt=1, syn_data_type="stepped") with pytest.warns(UserWarning): fake = open_boutdataset(datapath=fake_ds_list, keep_xboundaries=False) xrt.assert_identical(actual, fake)
def test_decode_cf_with_multiple_missing_values(self) -> None: original = Variable(["t"], [0, 1, 2], {"missing_value": np.array([0, 1])}) expected = Variable(["t"], [np.nan, np.nan, 2], {}) with warnings.catch_warnings(record=True) as w: actual = conventions.decode_cf_variable("t", original) assert_identical(expected, actual) assert "has multiple fill" in str(w[0].message)
def test_interpolate_parallel_toroidal_points_list(self, bout_xyt_example_files): dataset_list, grid_ds = bout_xyt_example_files( None, lengths=(2, 3, 16, 3), nxpe=1, nype=3, nt=1, grid="grid", guards={"y": 2}, topology="single-null", ) ds = open_boutdataset( datapath=dataset_list, gridfilepath=grid_ds, geometry="toroidal", keep_yboundaries=True, ) n_highres = ds["n"].bout.interpolate_parallel() points_list = [1, 2] n_highres_truncated = ds["n"].bout.interpolate_parallel( toroidal_points=points_list ) xrt.assert_identical(n_highres_truncated, n_highres.isel(zeta=points_list))
def test_squashed_file(self, tmp_path_factory, bout_xyt_example_files): path = bout_xyt_example_files(tmp_path_factory, nxpe=4, nype=3, nt=1, squashed=True, write_to_disk=True) with pytest.warns(UserWarning): actual = open_boutdataset(datapath=path, keep_xboundaries=False) expected = create_bout_ds(lengths=(6, 8, 12, 7)) expected = expected.set_coords(["t_array", "dx", "dy", "dz"]).rename(t_array="t") xrt.assert_equal( actual.drop_vars(["x", "y", "z"]).load(), expected.drop_vars( METADATA_VARS + _BOUT_PER_PROC_VARIABLES + _BOUT_TIME_DEPENDENT_META_VARS, errors="ignore", ), ) # check creation without writing to disk gives identical result fake_ds_list = bout_xyt_example_files(None, nxpe=4, nype=3, nt=1, squashed=True) with pytest.warns(UserWarning): fake = open_boutdataset(datapath=fake_ds_list, keep_xboundaries=False) xrt.assert_identical(actual, fake)
def test_stack(obj): expected = obj.stack(latlon=["lat", "lon"]) actual = obj.cf.stack(latlon=["latitude", "longitude"]) assert_identical(expected, actual) actual = obj.cf.stack({"latlon": ["latitude", "longitude"]}) assert_identical(expected, actual)
def test_preserve_unused_keys(): ds = airds.copy(deep=True) ds.time.attrs.clear() actual = ds.cf.sel(X=260, Y=40, time=airds.time[:2], method="nearest") expected = ds.sel(lon=260, lat=40, time=airds.time[:2], method="nearest") assert_identical(actual, expected)
def test_string_object_warning(self): original = Variable(("x", ), np.array(["foo", "bar"], dtype=object)).chunk() with pytest.warns(SerializationWarning, match="dask array with dtype=object"): encoded = conventions.encode_cf_variable(original) assert_identical(original, encoded)
def test_decode_cf_with_dask(self): import dask.array as da original = Dataset({ "t": ("t", [0, 1, 2], { "units": "days since 2000-01-01" }), "foo": ("t", [0, 0, 0], { "coordinates": "y", "units": "bar" }), "bar": ("string2", [b"a", b"b"]), "baz": (("x"), [b"abc"], { "_Encoding": "utf-8" }), "y": ("t", [5, 10, -999], { "_FillValue": -999 }), }).chunk() decoded = conventions.decode_cf(original) print(decoded) assert all( isinstance(var.data, da.Array) for name, var in decoded.variables.items() if name not in decoded.indexes) assert_identical(decoded, conventions.decode_cf(original).compute())
def test_salpha(self, tmp_path_factory, bout_xyt_example_files): path = bout_xyt_example_files( tmp_path_factory, nxpe=3, nype=3, nt=1, syn_data_type="stepped", grid="grid", write_to_disk=True, ) actual = open_boutdataset( datapath=path, geometry="s-alpha", gridfilepath=path.parent.joinpath("grid.nc"), ) # check dataset can be saved save_dir = tmp_path_factory.mktemp("data") actual.bout.save(save_dir.joinpath("boutdata.nc")) # check creation without writing to disk gives identical result fake_ds_list, fake_grid_ds = bout_xyt_example_files( None, nxpe=3, nype=3, nt=1, syn_data_type="stepped", grid="grid") fake = open_boutdataset(datapath=fake_ds_list, geometry="s-alpha", gridfilepath=fake_grid_ds) xrt.assert_identical(actual, fake)
def test_string_object_warning(self): original = Variable(('x', ), np.array([u'foo', u'bar'], dtype=object)).chunk() with pytest.warns(SerializationWarning, match='dask array with dtype=object'): encoded = conventions.encode_cf_variable(original) assert_identical(original, encoded)
def test_decode_cf_with_drop_variables(self) -> None: original = Dataset( { "t": ("t", [0, 1, 2], {"units": "days since 2000-01-01"}), "x": ("x", [9, 8, 7], {"units": "km"}), "foo": ( ("t", "x"), [[0, 0, 0], [1, 1, 1], [2, 2, 2]], {"units": "bar"}, ), "y": ("t", [5, 10, -999], {"_FillValue": -999}), } ) expected = Dataset( { "t": pd.date_range("2000-01-01", periods=3), "foo": ( ("t", "x"), [[0, 0, 0], [1, 1, 1], [2, 2, 2]], {"units": "bar"}, ), "y": ("t", [5, 10, np.nan]), } ) actual = conventions.decode_cf(original, drop_variables=("x",)) actual2 = conventions.decode_cf(original, drop_variables="x") assert_identical(expected, actual) assert_identical(expected, actual2)
def test_string_object_warning(self): original = Variable( ('x',), np.array([u'foo', u'bar'], dtype=object)).chunk() with pytest.warns(SerializationWarning, match='dask array with dtype=object'): encoded = conventions.encode_cf_variable(original) assert_identical(original, encoded)
def test_decode_cf_with_conflicting_fill_missing_value(): var = Variable(['t'], np.arange(10), { 'units': 'foobar', 'missing_value': 0, '_FillValue': 1 }) with raises_regex(ValueError, "_FillValue and missing_value"): conventions.decode_cf_variable('t', var) expected = Variable(['t'], np.arange(10), {'units': 'foobar'}) var = Variable(['t'], np.arange(10), { 'units': 'foobar', 'missing_value': np.nan, '_FillValue': np.nan }) actual = conventions.decode_cf_variable('t', var) assert_identical(actual, expected) var = Variable( ['t'], np.arange(10), { 'units': 'foobar', 'missing_value': np.float32(np.nan), '_FillValue': np.float32(np.nan) }) actual = conventions.decode_cf_variable('t', var) assert_identical(actual, expected)
def test_valid_mapping() -> None: """Use a correct mapping. When ds_df_mapping is correctly provided, ensure that it is correctly stored. """ events = pd.DataFrame({ 'event_type': ['pass', 'goal'], 'start_frame': [1, 100], 'end_frame': [200, 250] }) ds = xr.Dataset(data_vars={ 'ball_trajectory': (['frame', 'cartesian_coords'], np.exp(np.linspace((-6, -8), (3, 2), 250))) }, coords={ 'frame': np.arange(1, 251), 'cartesian_coords': ['x', 'y'] }, attrs={ 'match_id': 7, 'resolution_fps': 25 }) ds_df_mapping = {'frame': ('start_frame', 'end_frame')} result = ds.assign_attrs(_events=events, _ds_df_mapping=ds_df_mapping) assert_identical(ds.events.load(events, ds_df_mapping), result)
def test_merge_attrs_drop_conflicts(self): ds1 = xr.Dataset(attrs={"a": 0, "b": 0, "c": 0}) ds2 = xr.Dataset(attrs={"b": 0, "c": 1, "d": 0}) ds3 = xr.Dataset(attrs={"a": 0, "b": 1, "c": 0, "e": 0}) actual = xr.merge([ds1, ds2, ds3], combine_attrs="drop_conflicts") expected = xr.Dataset(attrs={"a": 0, "d": 0, "e": 0}) assert_identical(actual, expected)
def test_rename(obj): cf_dict = { "air_temperature" if isinstance(obj, Dataset) else "longitude": "renamed" } xr_dict = {"air" if isinstance(obj, Dataset) else "lon": "renamed"} assert_identical(obj.rename(xr_dict), obj.cf.rename(cf_dict)) assert_identical(obj.rename(**xr_dict), obj.cf.rename(**cf_dict))
def test_decode_cf_with_multiple_missing_values(self): original = Variable(['t'], [0, 1, 2], {'missing_value': np.array([0, 1])}) expected = Variable(['t'], [np.nan, np.nan, 2], {}) with warnings.catch_warnings(record=True) as w: actual = conventions.decode_cf_variable('t', original) assert_identical(expected, actual) assert 'has multiple fill' in str(w[0].message)
def test_serialization(self): with create_tmp_geotiff(additional_attrs={}) as (tmp_file, expected): # Write it to a netcdf and read again (roundtrip) with xr.open_rasterio(tmp_file) as rioda: with create_tmp_file(suffix=".nc") as tmp_nc_file: rioda.to_netcdf(tmp_nc_file) with xr.open_dataarray(tmp_nc_file) as ncds: assert_identical(rioda, ncds)
def test_coordinates_quantified(): # note: import order is important from .. import units # noqa pytest.importorskip("pint_xarray") quantified = popds.pint.quantify() assert_identical(quantified.cf[["latitude"]].pint.dequantify(), popds.cf[["latitude"]])
def test_kwargs_expand_key_to_multiple_keys(): actual = multiple.cf.isel(X=5, Y=3) expected = multiple.isel(x1=5, y1=3, x2=5, y2=3) assert_identical(actual, expected) actual = multiple.cf.mean("X") expected = multiple.mean(["x1", "x2"]) assert_identical(actual, expected)
def test_drop_dims(ds): # Add data_var and coord to test _get_dims ds["lon_var"] = ds["lon"] ds = ds.assign_coords(lon_coord=ds["lon"]) # Axis and coordinate for cf_name in ["X", "longitude"]: assert_identical(ds.drop_dims("lon"), ds.cf.drop_dims(cf_name))
def test_merge_datasets(self): data = create_test_data() actual = xr.merge([data[["var1"]], data[["var2"]]]) expected = data[["var1", "var2"]] assert_identical(actual, expected) actual = xr.merge([data, data]) assert_identical(actual, data)
def test_merge_no_conflicts_broadcast(self): datasets = [xr.Dataset({"x": ("y", [0])}), xr.Dataset({"x": np.nan})] actual = xr.merge(datasets) expected = xr.Dataset({"x": ("y", [0])}) assert_identical(expected, actual) datasets = [xr.Dataset({"x": ("y", [np.nan])}), xr.Dataset({"x": 0})] actual = xr.merge(datasets) assert_identical(expected, actual)
def test_load_all(): bd = get_data() result = xemc3.load.all(bd) expected = xr.open_dataset(bd + ".nc") # Remove new attributes, so we don't have to regenerate the data that often for k in list(result) + list(result.coords): for a in [a for a in result[k].attrs]: if a not in expected[k].attrs: del result[k].attrs[a] assert_identical(result, expected)
def test_kwargs_methods(obj): with raise_if_dask_computes(): expected = obj.isel(time=slice(2)) actual = obj.cf.isel(T=slice(2)) assert_identical(expected, actual) with raise_if_dask_computes(): expected = obj.isel({"time": slice(2)}) actual = obj.cf.isel({"T": slice(2)}) assert_identical(expected, actual)
def test_escribir_leer_arch_resultados(símismo): for ll, mod in símismo.modelos.items(): for frmt in ['.json', '.csv']: with símismo.subTest(mod=ll, frmt=frmt): arch = ll + '_prb' mod.guardar_resultados(nombre=arch, frmt=frmt) leídos = EnvolturaMDS.leer_arch_resultados(archivo=arch, var='Lago') refs = mod.leer_resultados('Lago') xrt.assert_identical(leídos, refs) os.remove(arch + frmt)
def test_dataarray_getitem(): air = airds.air air.name = None assert_identical(air.cf["longitude"], air["lon"]) with pytest.raises(KeyError): air.cf[["longitude"]] with pytest.raises(KeyError): air.cf[["longitude", "latitude"]],
def test_dataset(self): original = Dataset({ 't': ('t', [0, 1, 2], {'units': 'days since 2000-01-01'}), 'foo': ('t', [0, 0, 0], {'coordinates': 'y', 'units': 'bar'}), 'y': ('t', [5, 10, -999], {'_FillValue': -999}) }) expected = Dataset({'foo': ('t', [0, 0, 0], {'units': 'bar'})}, {'t': pd.date_range('2000-01-01', periods=3), 'y': ('t', [5.0, 10.0, np.nan])}) actual = conventions.decode_cf(original) assert_identical(expected, actual)
def test_decode_cf_with_dask(self): import dask.array as da original = Dataset({ 't': ('t', [0, 1, 2], {'units': 'days since 2000-01-01'}), 'foo': ('t', [0, 0, 0], {'coordinates': 'y', 'units': 'bar'}), 'bar': ('string2', [b'a', b'b']), 'baz': (('x'), [b'abc'], {'_Encoding': 'utf-8'}), 'y': ('t', [5, 10, -999], {'_FillValue': -999}) }).chunk() decoded = conventions.decode_cf(original) print(decoded) assert all(isinstance(var.data, da.Array) for name, var in decoded.variables.items() if name not in decoded.indexes) assert_identical(decoded, conventions.decode_cf(original).compute())
def test_decode_cf_with_drop_variables(self): original = Dataset({ 't': ('t', [0, 1, 2], {'units': 'days since 2000-01-01'}), 'x': ("x", [9, 8, 7], {'units': 'km'}), 'foo': (('t', 'x'), [[0, 0, 0], [1, 1, 1], [2, 2, 2]], {'units': 'bar'}), 'y': ('t', [5, 10, -999], {'_FillValue': -999}) }) expected = Dataset({ 't': pd.date_range('2000-01-01', periods=3), 'foo': (('t', 'x'), [[0, 0, 0], [1, 1, 1], [2, 2, 2]], {'units': 'bar'}), 'y': ('t', [5, 10, np.nan]) }) actual = conventions.decode_cf(original, drop_variables=("x",)) actual2 = conventions.decode_cf(original, drop_variables="x") assert_identical(expected, actual) assert_identical(expected, actual2)
def test_decode_cf_with_conflicting_fill_missing_value(): expected = Variable(['t'], [np.nan, np.nan, 2], {'units': 'foobar'}) var = Variable(['t'], np.arange(3), {'units': 'foobar', 'missing_value': 0, '_FillValue': 1}) with warnings.catch_warnings(record=True) as w: actual = conventions.decode_cf_variable('t', var) assert_identical(actual, expected) assert 'has multiple fill' in str(w[0].message) expected = Variable(['t'], np.arange(10), {'units': 'foobar'}) var = Variable(['t'], np.arange(10), {'units': 'foobar', 'missing_value': np.nan, '_FillValue': np.nan}) actual = conventions.decode_cf_variable('t', var) assert_identical(actual, expected) var = Variable(['t'], np.arange(10), {'units': 'foobar', 'missing_value': np.float32(np.nan), '_FillValue': np.float32(np.nan)}) actual = conventions.decode_cf_variable('t', var) assert_identical(actual, expected)
def test_datetime_to_numeric_cftime(): times = xr.cftime_range('2000', periods=5, freq='7D') da = xr.DataArray(times, coords=[times], dims=['time']) result = utils.datetime_to_numeric(da, datetime_unit='h') expected = 24 * xr.DataArray(np.arange(0, 35, 7), coords=da.coords) assert_identical(result, expected) offset = da.isel(time=1) result = utils.datetime_to_numeric(da, offset=offset, datetime_unit='h') expected = 24 * xr.DataArray(np.arange(-7, 28, 7), coords=da.coords) assert_identical(result, expected) dtype = np.float32 result = utils.datetime_to_numeric(da, datetime_unit='h', dtype=dtype) expected = 24 * xr.DataArray( np.arange(0, 35, 7), coords=da.coords).astype(dtype) assert_identical(result, expected)
def test_0d_int32_encoding(self): original = Variable((), np.int32(0), encoding={'dtype': 'int64'}) expected = Variable((), np.int64(0)) actual = conventions.maybe_encode_nonstring_dtype(original) assert_identical(expected, actual)
def test_invalid_coordinates(self): # regression test for GH308 original = Dataset({'foo': ('t', [1, 2], {'coordinates': 'invalid'})}) actual = conventions.decode_cf(original) assert_identical(original, actual)
def assertVariableIdentical(self, v1, v2): __tracebackhide__ = True # noqa: F841 assert_identical(v1, v2)
def assertDatasetIdentical(self, d1, d2): assert_identical(d1, d2)
def assertDataArrayIdentical(self, ar1, ar2): __tracebackhide__ = True # noqa: F841 assert_identical(ar1, ar2)
def assertVariableIdentical(self, v1, v2): assert_identical(v1, v2)
def assertDatasetIdentical(self, d1, d2): __tracebackhide__ = True # noqa: F841 assert_identical(d1, d2)
def assertDataArrayIdentical(self, ar1, ar2): assert_identical(ar1, ar2)