def test_lateral_fill_4D_3Dmask(): ds = pop_tools.get_grid('POP_gx3v7') field = ds.KMT.copy() * 1. field = field.where(ds.KMT > 0) field.values[20:40, 80:] = np.nan da_in = (xr.DataArray(np.ones((3)), dims=('time')) * xr.DataArray(np.ones((len(ds.z_t))), dims=('z_t')) * field) attrs = {'long_name': 'test field', 'units': 'none'} da_in.attrs = attrs # make 3D mask nk = len(ds.z_t) nj, ni = ds.KMT.shape # make 3D array of 0:km zero_to_km = xr.DataArray(np.arange(0, nk), dims=('z_t')) ONES_3d = xr.DataArray(np.ones((nk, nj, ni)), dims=('z_t', 'nlat', 'nlon')) ZERO_TO_KM = (zero_to_km * ONES_3d) # mask out cells where k is below KMT valid_points = ZERO_TO_KM.where(ZERO_TO_KM < ds.KMT) valid_points = xr.where(valid_points.notnull(), True, False) da_out = pop_tools.lateral_fill(da_in, valid_points) for k in range(0, da_out.shape[1]): for l in range(0, da_out.shape[0]): if l == 0: arr_0 = da_out[0, k, :, :] arr_i = da_out[l, k, :, :] np.testing.assert_array_equal(arr_0, arr_i) assert da_out.attrs == attrs
def add_coords_regrid_vertical(ds_dst_xy, pop_grid='POP_gx1v7'): """perform vertical regridding""" ydim = 'lat' xdim = 'lon' zdim = 'depth' ds_dst_xy = ds_dst_xy.assign_coords({zdim: ds_dst_xy[zdim] * 1e2}) # m --> cm grid_vars = ['TLONG', 'TLAT', 'TAREA', 'z_t', 'dz', 'KMT', 'dz'] ds_pop = pop_tools.get_grid(pop_grid) ds_dst = ds_pop[grid_vars].set_coords(grid_vars) for v in ds_dst_xy.data_vars: da = ds_dst_xy[v] if zdim not in da.dims: continue else: with xr.set_options(keep_attrs=True): da_out = da.interp(coords={zdim: ds_pop.z_t}, method='linear', assume_sorted=True, kwargs={'bounds_error': False}) da_out = xr.where(np.isclose(da_out, 0., atol=1e-10), 0., da_out) da_out.encoding['_FillValue'] = nc.default_fillvals['f8'] da_out.encoding['coordinates'] = 'TLONG TLAT z_t' ds_dst[v] = da_out ds_dst[v].attrs = da.attrs for v in grid_vars: ds_dst[v].encoding['_FillValue'] = None return ds_dst.drop([zdim]).rename({ydim: 'nlat', xdim: 'nlon'})
def test_get_grid_scrip(): ds_test = pop_tools.get_grid('POP_gx3v7', scrip=True) ds_ref = xr.open_dataset(DATASETS.fetch('POP_gx3v7.nc')) assert ds_compare(ds_test, ds_ref, assertion='allclose', rtol=1e-14, atol=1e-14)
def test_get_grid(): for grid in pop_tools.grid_defs.keys(): print('-' * 80) print(grid) ds = pop_tools.get_grid(grid) ds.info() assert isinstance(ds, xr.Dataset) print()
def test_get_grid_scrip(): ds_test = pop_tools.get_grid('POP_gx3v7', scrip=True) ds_ref = xr.open_zarr(f'{testdata_dir}/POP_gx3v7.zarr') assert ds_compare(ds_test, ds_ref, assertion='allclose', rtol=1e-14, atol=1e-14)
def test_get_grid_to_netcdf(): for grid in pop_tools.grid_defs.keys(): print('-' * 80) print(grid) ds = pop_tools.get_grid(grid) for format in ['NETCDF4', 'NETCDF3_64BIT']: gridfile = f'{grid}_{format}.nc' ds.to_netcdf(gridfile, format=format) os.system(f'rm -f {gridfile}')
def compute_regional_integrated_MHT(MHT_vertical, basin, longitude, latitude, gy) -> float: ''' Integrates the vertically integrated MHT in a basin across a given latitude Regions are defined by poptools. options are ['Black Sea', 'Baltic Sea', 'Red Sea', 'Southern Ocean', 'Pacific Ocean', 'Indian Ocean', 'Persian Gulf', 'Atlantic Ocean', 'Mediterranean Sea', 'Lab. Sea & Baffin Bay', 'GIN Seas', 'Arctic Ocean', 'Hudson Bay'] Grid must be gx1v6. gy is the latitude of interest (where we calculate MHT) MHT_vertical must be dimensions of time x lat x lon ''' # get regions with pop tools grid_name = 'POP_gx1v6' ds = pop_tools.get_grid(grid_name) TLAT = ds.TLAT TLONG = ds.TLONG # raise an error if basin is not an option # raise an error if MHT_vertical is not right shape #if (MHT_vertical.ndim < 2): # raise ValueError("MHT_vertical is not at least rank-2") #if (MHT_vertical.shape[2] != longitude.shape[0]): # raise ValueError("MHT_vertical axis 1 is not shape of longitude") #if (MHT_vertical.shape[1] != latitude.shape[0]): # raise ValueError("MHT_vertical axis 2 is not shape of latitude") # raise an error if latitude of choice is not in the basin # select grid. region will be ones, all else will be zeros. can view regions at: https://pop-tools.readthedocs.io/en/latest/examples/re gion-mask.html#Alternative-region-masks mask3d = pop_tools.region_mask_3d(grid_name, mask_name='Pacific-Indian-Atlantic') mask2d = mask3d.sel(region=basin) # interpolate this to the grid we have [xx, yy] = np.meshgrid(longitude, latitude) m = griddata((TLONG.values.flatten(), TLAT.values.flatten()), mask2d.values.flatten(), (xx, yy), method='nearest') for i in range(0, MHT_vertical.shape[0]): # loop over timesteps MHT_vertical[i, ...] = MHT_vertical[ i, ...] * m # regions not in our basin become zero # find latitude closest to the one we asked for, integrate across idy = np.searchsorted(latitude, gy) # find distance between points at this latitude p1 = (latitude[idy], longitude[0]) p2 = (latitude[idy], longitude[1] ) # should be the same at any given longitude -> check dx = geodesic(p1, p2).km * 1000 # turn into meters instead of km # integrate across the latitude of choice. need to use cumsum because of nan's #tmp=np.cumsum(MHT_vertical[:,idy_MHT,idx1:idxend],axis=1) #MHT=tmp[:,-1] MHT_vertical = MHT_vertical * dx MHT = np.sum(MHT_vertical[:, idy, :], axis=1) return MHT
def test_lateral_fill_2D(): ds = pop_tools.get_grid('POP_gx3v7') field = ds.KMT.copy() * 1. field = field.where(ds.KMT > 0) field.values[20:40, 80:] = np.nan da_in = field attrs = {'long_name': 'test field', 'units': 'none'} da_in.attrs = attrs valid_points = (ds.KMT > 0) da_out = pop_tools.lateral_fill(da_in, valid_points) assert (da_out.notnull() == valid_points).all() assert da_out.attrs == attrs
def _ensure_grid_file(grid_name, clobber): """ensure that grid file exists""" grid_file = f'{regrid_dir}/{grid_name}.nc' if os.path.exists(grid_file) and not clobber: return grid_file # generate file if needed if grid_name in [ 'POP_gx1v6', 'POP_gx1v7', 'POP_gx3v7', ]: dso = pop_tools.get_grid(grid_name, scrip=True) else: raise ValueError('unknown grid') dso.to_netcdf(grid_file) return grid_file
def test_lateral_fill_3D(): ds = pop_tools.get_grid('POP_gx3v7') field = ds.KMT.copy() * 1. field = field.where(ds.KMT > 0) field.values[20:40, 80:] = np.nan da_in = xr.DataArray(np.ones((3)), dims=('z_t')) * field attrs = {'long_name': 'test field', 'units': 'none'} da_in.attrs = attrs valid_points = (ds.KMT > 0) da_out = pop_tools.lateral_fill(da_in, valid_points) for k in range(0, da_out.shape[0]): if k == 0: arr_0 = da_out[k, :, :] continue arr_i = da_out[k, :, :] np.testing.assert_array_equal(arr_0, arr_i) assert da_out.attrs == attrs
def _ensure_grid_file(self, clobber, **kwargs): """ensure that grid file exists""" if os.path.exists(self.grid_file) and not clobber: print(f'exists: {self.grid_file}') return # generate file if needed if self.grid_name in [ 'POP_gx1v6', 'POP_gx1v7', 'POP_gx3v7', ]: dso = pop_tools.get_grid(self.grid_name, scrip=True) elif 'latlon' in self.grid_name: dso = latlon_to_scrip(**kwargs) else: raise ValueError('unknown grid') print(f'writing: {self.grid_file}') dso.to_netcdf(self.grid_file)
def test_lateral_fill_4D(): ds = pop_tools.get_grid('POP_gx3v7') field = ds.KMT.copy() * 1.0 field = field.where(ds.KMT > 0) field.values[20:40, 80:] = np.nan da_in = (xr.DataArray(np.ones( (3)), dims=('time')) * xr.DataArray(np.ones( (5)), dims=('z_t')) * field) attrs = {'long_name': 'test field', 'units': 'none'} da_in.attrs = attrs valid_points = ds.KMT > 0 da_out = pop_tools.lateral_fill(da_in, valid_points) arr_0 = da_out[0, 0, :, :] for k in range(0, da_out.shape[1]): for l in range(0, da_out.shape[0]): arr_i = da_out[l, k, :, :] np.testing.assert_array_equal(arr_0, arr_i) assert da_out.attrs == attrs
import intake catalog = intake.open_esm_datastore('data/campaign-cesm2-cmip6-timeseries.json') df = catalog.search(experiment='historical', component='ocn', stream='pop.h').df variables = df.variable.unique() [v for v in variables if 'Fe' in v or 'iron' in v.lower() or 'sed' in v.lower()] ### Spin up dask cluster cluster, client = utils.get_ClusterClient() cluster.scale(12) #adapt(minimum_jobs=0, maximum_jobs=24) client ### Read in the pop-grid ds_grid = pop_tools.get_grid('POP_gx1v7') ds_grid ## Operate on dataset using `xpersist` to cache output nmolcm3_to_nM = 1e3 nmolcm2s_to_mmolm2yr = 1e-9 * 1e3 * 1e4 * 86400 * 365. µmolm2d_to_mmolm2yr = 1e-3 * 365. time_slice = slice("1990-01-15", "2015-01-15") varlist = [ 'Fe', 'IRON_FLUX', 'Fe_RIV_FLUX', 'pfeToSed', ]
def test_get_grid_twice(): ds1 = pop_tools.get_grid('POP_gx1v7') ds2 = pop_tools.get_grid('POP_gx1v7') xr.testing.assert_identical(ds1, ds2)
def test_get_grid(grid): print(grid) ds = pop_tools.get_grid(grid) ds.info() assert isinstance(ds, xr.Dataset)
def zonal_mean_via_fortran(ds, var=None, grid=None, region_mask=None, replace_kmt=False): """ Write ds to a temporary netCDF file, compute zonal mean for a given variable based on Keith L's fortran program, read resulting netcdf file, and return the new xarray dataset If three_ocean_regions=True, use a region mask that extends the Pacific, Indian, and Atlantic to the coast of Antarctica (and does not provide separate Arctic Ocean, Lab Sea, etc regions) """ if replace_kmt and (var is None or ',' in var): raise ValueError( 'if "replace_kmt" is True, a single "var" must be specified.') ds_in_file = tempfile.NamedTemporaryFile(suffix='.nc') ds_out_file = tempfile.NamedTemporaryFile(suffix='.nc') ds = ds.copy() ds.attrs = { } # for some reason, za does not like file attrs---perhaps "coordinates"? ds.to_netcdf(ds_in_file.name) za_exe = '/glade/u/home/klindsay/bin/zon_avg/za' grid_file = None rmask_file = None if grid is not None: grid = pop_tools.get_grid(grid) grid_file = tempfile.NamedTemporaryFile(suffix='.nc') grid_file_name = grid_file.name if replace_kmt: grid['KMT'] = compute_kmt(ds[var]) grid.to_netcdf(grid_file_name) else: # Assume xarray dataset contains all needed fields grid_file_name = ds_in_file.name # Set up the call to za with correct options za_call = [za_exe] if var is not None: za_call += ['-v', var] if region_mask is not None: rmask_file = tempfile.NamedTemporaryFile(suffix='.nc') region_mask.to_netcdf(rmask_file.name) za_call += ['-rmask_file', rmask_file.name] za_call += [ '-grid_file', grid_file_name, '-kmt_file', grid_file_name, '-O', '-o', ds_out_file.name, # -O overwrites existing file, -o gives file name ds_in_file.name ] # Use subprocess to call za, allows us to capture stdout and print it proc = subprocess.Popen(za_call, stdout=subprocess.PIPE) (out, err) = proc.communicate() subprocess.check_call( ['cp', '-v', ds_in_file.name, f'{os.environ["TMPDIR"]}/za-in.nc']) subprocess.check_call( ['cp', '-v', grid_file_name, f'{os.environ["TMPDIR"]}/za-grid.nc']) subprocess.check_call( ['cp', '-v', rmask_file.name, f'{os.environ["TMPDIR"]}/za-rmask.nc']) if not out: # Read in the newly-generated file print('za ran successfully, writing netcdf output') ds_out = xr.open_dataset(ds_out_file.name) else: print(f'za reported an error:\n{out.decode("utf-8")}') print(za_call) return # clean up ds_in_file.close() ds_out_file.close() if grid_file is not None: grid_file.close() if rmask_file is not None: rmask_file.close() return ds_out