def test_tolerance(self): da = open_dataset(self.nc_poslons).tas lon = -72.5 lat = 46.2 out = subset.subset_gridpoint(da, lon=lon, lat=lat, tolerance=1) assert out.isnull().all() subset.subset_gridpoint(da, lon=lon, lat=lat, tolerance=1e5)
def test_positive_lons(self): da = open_dataset(self.nc_poslons).tas lon = -72.4 lat = 46.1 out = subset.subset_gridpoint(da, lon=lon, lat=lat) np.testing.assert_almost_equal(out.lon, lon + 360, 1) np.testing.assert_almost_equal(out.lat, lat, 1) out = subset.subset_gridpoint(da, lon=lon + 360, lat=lat) np.testing.assert_almost_equal(out.lon, lon + 360, 1) np.testing.assert_almost_equal(out.lat, lat, 1)
def test_simple(self, lat, lon, add_distance): da = open_dataset(self.nc_file).tasmax out = subset.subset_gridpoint(da, lon=lon, lat=lat, add_distance=add_distance) np.testing.assert_almost_equal(out.lon, lon, 1) np.testing.assert_almost_equal(out.lat, lat, 1) assert ("site" in out.dims) ^ (len(lat) == 1) assert ("distance" in out.coords) ^ (not add_distance)
def test_dataset(self): da = xr.open_mfdataset( [self.nc_file, self.nc_file.replace("tasmax", "tasmin")], combine="by_coords", ) lon = -72.4 lat = 46.1 out = subset.subset_gridpoint(da, lon=lon, lat=lat) np.testing.assert_almost_equal(out.lon, lon, 1) np.testing.assert_almost_equal(out.lat, lat, 1) np.testing.assert_array_equal(out.tasmin.shape, out.tasmax.shape)
def compare_vals(ds, sub, vari, flag_2d=False): # check subsetted values against original imask = np.where(~np.isnan(sub[vari].isel(time=0))) if len(imask[0]) > 70: ii = np.random.randint(0, len(imask[0]), 70) else: ii = np.arange(0, len(imask[0])) for i in zip(imask[0][ii], imask[1][ii]): if flag_2d: lat1 = sub.lat[i[0], i[1]] lon1 = sub.lon[i[0], i[1]] np.testing.assert_array_equal( subset.subset_gridpoint(sub, lon=lon1, lat=lat1)[vari], subset.subset_gridpoint(ds, lon=lon1, lat=lat1)[vari], ) else: lat1 = sub.lat.isel(lat=i[0]) lon1 = sub.lon.isel(lon=i[1]) np.testing.assert_array_equal( sub[vari].sel(lon=lon1, lat=lat1), ds[vari].sel(lon=lon1, lat=lat1))
def test_time_simple(self): da = open_dataset(self.nc_poslons).tas da = da.assign_coords(lon=(da.lon - 360)) lon = -72.4 lat = 46.1 yr_st = "2050" yr_ed = "2059" out = subset.subset_gridpoint(da, lon=lon, lat=lat, start_date=yr_st, end_date=yr_ed) np.testing.assert_almost_equal(out.lon, lon, 1) np.testing.assert_almost_equal(out.lat, lat, 1) np.testing.assert_array_equal(len(np.unique(out.time.dt.year)), 10) np.testing.assert_array_equal(out.time.dt.year.max(), int(yr_ed)) np.testing.assert_array_equal(out.time.dt.year.min(), int(yr_st))
def _subset(resource: ComplexInput): nonlocal count # if not subsetting by time, it's not necessary to decode times time_subset = start_date is not None or end_date is not None dataset = try_opendap(resource, decode_times=time_subset) with lock: count += 1 write_log( process, f"Subsetting file {count} of {n_files} ({getattr(resource, resource.prop)})", subtask_percentage=(count - 1) * 100 // n_files, ) dataset = dataset[variables] if variables else dataset subsetted = subset_gridpoint( dataset, lon=longitudes, lat=latitudes, start_date=start_date, end_date=end_date, ) if 'site' in subsetted.dims: subsetted = subsetted.rename(site='region') else: subsetted = subsetted.expand_dims('region') if not all(subsetted.dims.values()): LOGGER.warning(f"Subset is empty for dataset: {resource.url}") return p = make_subset_file_name(resource) output_filename = Path(process.workdir) / p dataset_to_netcdf(subsetted, output_filename) output_files.append(output_filename)
def test_raise(self): da = open_dataset(self.nc_poslons).tas with pytest.raises(ValueError): subset.subset_gridpoint(da, lon=-72.4, lat=46.1, start_date="2055-03-15", end_date="2055-03-14") subset.subset_gridpoint(da, lon=-72.4, lat=46.1, start_date="2055", end_date="2052") da = open_dataset( self.nc_2dlonlat).tasmax.drop_vars(names=["lon", "lat"]) with pytest.raises(Exception): subset.subset_gridpoint(da, lon=-72.4, lat=46.1)
def test_irregular(self): da = open_dataset(self.nc_2dlonlat).tasmax lon = -72.4 lat = 46.1 out = subset.subset_gridpoint(da, lon=lon, lat=lat) np.testing.assert_almost_equal(out.lon, lon, 1) np.testing.assert_almost_equal(out.lat, lat, 1) assert "site" not in out.dims lon = [-72.4, -67.1] lat = [46.1, 48.2] out = subset.subset_gridpoint(da, lon=lon, lat=lat) np.testing.assert_almost_equal(out.lon, lon, 1) np.testing.assert_almost_equal(out.lat, lat, 1) assert "site" in out.dims # dask for lon lat da.lon.chunk({"rlon": 10}) da.lat.chunk({"rlon": 10}) out = subset.subset_gridpoint(da, lon=lon, lat=lat) np.testing.assert_almost_equal(out.lon, lon, 1) np.testing.assert_almost_equal(out.lat, lat, 1) # test_irregular transposed: da1 = open_dataset(self.nc_2dlonlat).tasmax dims = list(da1.dims) dims.reverse() daT = xr.DataArray(np.transpose(da1.values), dims=dims) for d in daT.dims: args = dict() args[d] = da1[d] daT = daT.assign_coords(**args) daT = daT.assign_coords(lon=(["rlon", "rlat"], np.transpose(da1.lon.values))) daT = daT.assign_coords(lat=(["rlon", "rlat"], np.transpose(da1.lat.values))) out1 = subset.subset_gridpoint(daT, lon=lon, lat=lat) np.testing.assert_almost_equal(out1.lon, lon, 1) np.testing.assert_almost_equal(out1.lat, lat, 1) np.testing.assert_array_equal(out, out1) # Dataset with tasmax, lon and lat as data variables (i.e. lon, lat not coords of tasmax) daT1 = xr.DataArray(np.transpose(da1.values), dims=dims) for d in daT1.dims: args = dict() args[d] = da1[d] daT1 = daT1.assign_coords(**args) dsT = xr.Dataset(data_vars=None, coords=daT1.coords) dsT["tasmax"] = daT1 dsT["lon"] = xr.DataArray(np.transpose(da1.lon.values), dims=["rlon", "rlat"]) dsT["lat"] = xr.DataArray(np.transpose(da1.lat.values), dims=["rlon", "rlat"]) out2 = subset.subset_gridpoint(dsT, lon=lon, lat=lat) np.testing.assert_almost_equal(out2.lon, lon, 1) np.testing.assert_almost_equal(out2.lat, lat, 1) np.testing.assert_array_equal(out, out2.tasmax) # Dataset with lon and lat as 1D arrays lon = -60 lat = -45 da = xr.DataArray( np.random.rand(5, 4), dims=("time", "site"), coords={ "time": np.arange(5), "site": np.arange(4) }, ) ds = xr.Dataset( data_vars={ "da": da, "lon": ("site", np.linspace(lon, lon + 10, 4)), "lat": ("site", np.linspace(lat, lat + 5, 4)), }) gp = subset.subset_gridpoint(ds, lon=lon, lat=lat) np.testing.assert_almost_equal(gp.lon, lon) np.testing.assert_almost_equal(gp.lat, lat) assert gp.site == 0