def xr_average(fyear, tar, modules): """xarray-based processing routines for lat-lon model output Parameters ---------- fyear : str Year being processed (YYYY) tar : tarfile In-memory tarfile object modules : dict Mappings of netCDF file names inside the tar file to output db file names """ members = [ x for x in modules if netcdf.tar_member_exists(tar, f"{fyear}.{x}.nc") ] for member in members: print(f"{fyear}.{member}.nc") data_file = netcdf.extract_from_tar(tar, f"{fyear}.{member}.nc") dset = netcdf.in_mem_xr(data_file) grid_file = (f"{fyear}.ocean_static.nc" if netcdf.tar_member_exists( tar, f"{fyear}.ocean_static.nc") else f"{fyear}.ocean_month.nc") grid_file = netcdf.extract_from_tar(tar, grid_file) ds_grid = netcdf.in_mem_xr(grid_file) # Retain only time-dependent variables variables = list(dset.variables.keys()) for x in variables: if "time" not in dset[x].dims: del dset[x] _area = "areacello" if "areacello" in list( ds_grid.variables) else "area_t" if "wet" in list(ds_grid.variables): _wet = ds_grid["wet"] else: _wet = 1.0 warnings.warn("Unable to find wet mask") _area = ds_grid[_area] * _wet for region in ["global", "nh", "sh", "tropics"]: _masked_area = xrtools.xr_mask_by_latitude(_area, ds_grid.geolat, region=region) gmeantools.write_sqlite_data( f"{fyear}.{region}Ave{modules[member]}.db", "area", fyear, _masked_area.sum().data, ) weights = dset.average_DT.astype("float") * _masked_area _dset_weighted = xrtools.xr_weighted_avg(dset, weights) xrtools.xr_to_db(_dset_weighted, fyear, f"{fyear}.{region}Ave{modules[member]}.db")
def xr_average(fyear, tar, modules): """xarray-based processing routines for cubed sphere atmos. output Parameters ---------- fyear : str Year being processed (YYYY) tar : tarfile In-memory tarfile object modules : dict Mappings of netCDF file names inside the tar file to output db file names """ members = [ x for x in modules if netcdf.tar_member_exists(tar, f"{fyear}.{x}.tile1.nc") ] for member in members: print(f"{fyear}.{member}.nc") data_files = [ netcdf.extract_from_tar(tar, f"{fyear}.{member}.tile{x}.nc") for x in range(1, 7) ] data_files = [netcdf.in_mem_xr(x) for x in data_files] dset = xr.concat(data_files, "tile") # Retain only time-dependent variables variables = list(dset.variables.keys()) for x in variables: if "time" not in dset[x].dims: del dset[x] # Aggregate grid spec tiles grid_files = [ netcdf.extract_from_tar(tar, f"{fyear}.grid_spec.tile{x}.nc") for x in range(1, 7) ] grid_files = [netcdf.in_mem_xr(x) for x in grid_files] ds_grid = xr.concat(grid_files, "tile") dset["area"] = ds_grid["area"] for region in ["global", "nh", "sh", "tropics"]: _masked_area = xrtools.xr_mask_by_latitude( dset.area, ds_grid.grid_latt, region=region ) gmeantools.write_sqlite_data( f"{fyear}.{region}Ave{modules[member]}.db", "area", fyear, _masked_area.sum().data, ) weights = dset.average_DT.astype("float") * _masked_area _dset_weighted = xrtools.xr_weighted_avg(dset, weights) xrtools.xr_to_db( _dset_weighted, fyear, f"{fyear}.{region}Ave{modules[member]}.db" )
def generic_driver(fyear, tar, modules, average, static_file=None): """Run the averager on tripolar ocean history data Parameters ---------- fyear : str Year to process (YYYYMMDD) tar : tarfile object In-memory pointer to history tarfile modules : dict Dictionary of history nc streams (keys) and output db name (values) """ members = [f"{fyear}.{x}.nc" for x in list(modules.keys())] members = [tar_member_exists(tar, x) for x in members] if any(members): if static_file is not None: assert isinstance(static_file, tuple), "Static file must be a tuple" assert ( len(static_file) == 2 ), "Static file tuple must have primary and one backup stream" staticname = f"{fyear}.{static_file[0]}.nc" grid_file = (extract_from_tar(tar, staticname) if tar_member_exists(tar, staticname) else extract_from_tar(tar, f"{fyear}.{static_file[1]}.nc")) else: grid_file = None for module in list(modules.keys()): fname = f"{fyear}.{module}.nc" if tar_member_exists(tar, fname): print(f"{fyear} - {module}") fdata = extract_from_tar(tar, fname) grid_file = fdata if grid_file is None else grid_file average(grid_file, fdata, fyear, "./", modules[module]) del fdata del grid_file
def xr_average(fyear, tar, modules): """xarray-based processing routines for lat-lon model output Parameters ---------- fyear : str Year being processed (YYYY) tar : tarfile In-memory tarfile object modules : dict Mappings of netCDF file names inside the tar file to output db file names """ members = [ x for x in modules if netcdf.tar_member_exists(tar, f"{fyear}.{x}.nc") ] for member in members: print(f"{fyear}.{member}.nc") data_file = netcdf.extract_from_tar(tar, f"{fyear}.{member}.nc") dset = netcdf.in_mem_xr(data_file) geolat = np.tile(dset.lat.data[:, None], (1, dset.lon.data.shape[0])) geolon = np.tile(dset.lon.data[None, :], (dset.lat.data.shape[0], 1)) _geolat = xr.DataArray(geolat, coords=((dset.lat, dset.lon))) _geolon = xr.DataArray(geolon, coords=((dset.lat, dset.lon))) _area = xr.DataArray( gmeantools.standard_grid_cell_area(dset.lat.data, dset.lon.data), coords=((dset.lat, dset.lon)), ) # Retain only time-dependent variables variables = list(dset.variables.keys()) for x in variables: if "time" not in dset[x].dims: del dset[x] for region in ["global", "nh", "sh", "tropics"]: _masked_area = xrtools.xr_mask_by_latitude(_area, _geolat, region=region) gmeantools.write_sqlite_data( f"{fyear}.{region}Ave{modules[member]}.db", "area", fyear, _masked_area.sum().data, ) weights = dset.average_DT.astype("float") * _masked_area _dset_weighted = xrtools.xr_weighted_avg(dset, weights) xrtools.xr_to_db(_dset_weighted, fyear, f"{fyear}.{region}Ave{modules[member]}.db")
def generic_cubesphere_driver(fyear, tar, modules, average, grid_spec="grid_spec"): """Generic cubesphere data driver Parameters ---------- fyear : str Year to process (YYYYMMDD) tar : tarfile object In-memory pointer to history tarfile modules : dict Dictionary of history nc streams (keys) and output db name (values) """ members = [f"{fyear}.{x}.tile1.nc" for x in list(modules.keys())] members = [tar_member_exists(tar, x) for x in members] if any(members): gs_tiles = [ extract_from_tar(tar, f"{fyear}.{grid_spec}.tile{x}.nc") for x in range(1, 7) ] for module in list(modules.keys()): if tar_member_exists(tar, f"{fyear}.{module}.tile1.nc"): print(f"{fyear} - {module}") data_tiles = [ extract_from_tar(tar, f"{fyear}.{module}.tile{x}.nc") for x in range(1, 7) ] average(gs_tiles, data_tiles, fyear, "./", modules[module]) del data_tiles del gs_tiles
def routines(args, infile): """Driver routine for CM4-class models Parameters ---------- args : argparse.parser Parsed commmand line arguments infile : str, pathlike History tar file path """ # -- Open the tarfile tar = tarfile.open(infile) # -- Set the model year string fyear = str(infile.split("/")[-1].split(".")[0]) print("Processing " + fyear) # -- Get list of components to process comps = args.component # -- Atmospheric Fields modules = { "atmos_month": "Atmos", "atmos_co2_month": "Atmos", "atmos_month_aer": "AtmosAer", "aerosol_month_cmip": "AeroCMIP", } if any(comp in comps for comp in ["atmos", "all"]): averagers.cubesphere.xr_average(fyear, tar, modules) # -- Land Fields modules = {"land_month": "Land"} if any(comp in comps for comp in ["land", "all"]): averagers.land_lm4.xr_average(fyear, tar, modules) # -- Ice modules = {"ice_month": "Ice"} if any(comp in comps for comp in ["ice", "all"]): averagers.ice.xr_average(fyear, tar, modules) # -- Ocean fname = f"{fyear}.ocean_scalar_annual.nc" if any(comp in comps for comp in ["ocean", "all"]): if tar_member_exists(tar, fname): print(f"{fyear}.ocean_scalar_annual.nc") fdata = nctools.extract_from_tar(tar, fname, ncfile=True) extract_ocean_scalar.mom6(fdata, fyear, "./") fdata.close() # -- OBGC modules = { "ocean_cobalt_sfc": "OBGC", "ocean_cobalt_misc": "OBGC", "ocean_cobalt_tracers_year": "OBGC", "ocean_cobalt_tracers_int": "OBGC", "ocean_bling": "OBGC", "ocean_bling_cmip6_omip_2d": "OBGC", "ocean_bling_cmip6_omip_rates_year_z": "OBGC", "ocean_bling_cmip6_omip_sfc": "OBGC", "ocean_bling_cmip6_omip_tracers_month_z": "OBGC", "ocean_bling_cmip6_omip_tracers_year_z": "OBGC", } if any(comp in comps for comp in ["obgc", "all"]): averagers.tripolar.xr_average(fyear, tar, modules) # -- AMOC if any(comp in comps for comp in ["amoc", "all"]): diags.amoc.mom6_amoc(fyear, tar) # -- Close out the tarfile handle tar.close()
def mom6_amoc(fyear, tar, label="Ocean", outdir="./"): """Driver for AMOC calculation in MOM6-class models Parameters ---------- fyear : str Year label (YYYY) tar : tarfile In-memory history tarfile object label : str SQLite output stream name outdir : str, path-like Path to output SQLite file """ member = f"{fyear}.ocean_annual_z.nc" static = f"{fyear}.ocean_static.nc" annual_file = (extract_from_tar(tar, member, ncfile=True) if tar_member_exists(tar, member) else None) static_file = (extract_from_tar(tar, static, ncfile=True) if tar_member_exists(tar, static) else None) if annual_file is not None and static_file is not None: # open the Dataset with the transports dset = in_mem_xr(annual_file) # select first time level from static file # editorial comment: why does the static file have a time dimension? dset_static = in_mem_xr(static_file).isel(time=0) # merge static DataSet with transport DataSet for geo_coord in ["geolon_v", "geolat_v", "wet_v"]: if geo_coord in dset_static.variables: dset[geo_coord] = xr.DataArray( dset_static[geo_coord].values, dims=dset_static[geo_coord].dims) required_vars = ["geolon_v", "geolat_v", "umo", "vmo"] dset_vars = list(dset.variables) if list(set(required_vars) - set(dset_vars)) == []: # calculate non-rotated y-ward moc array moc = xoverturning.calcmoc(dset, basin="atl-arc", verbose=False) # max streamfunction between 20N-80N and 500-2500m depth maxsfn = moc.sel(yq=slice(20.0, 80.0), z_i=slice(500.0, 2500.0)).max() maxsfn = maxsfn.astype(np.float16).values print(f" AMOC = {maxsfn}") # max streamfunction at 26.5N rapidsfn = moc.sel(yq=26.5, method="nearest") rapidsfn = rapidsfn.sel(z_i=slice(500.0, 2500.0)).max() rapidsfn = rapidsfn.astype(np.float16).values print(f" RAPID AMOC = {rapidsfn}") # -- Write to sqlite gmeantools.write_sqlite_data( outdir + "/" + fyear + ".globalAve" + label + ".db", "amoc_vh", fyear[:4], maxsfn, ) gmeantools.write_sqlite_data( outdir + "/" + fyear + ".globalAve" + label + ".db", "amoc_rapid", fyear[:4], rapidsfn, ) else: warnings.warn(f"{required_vars} are required to calculate AMOC") else: warnings.warn( "AMOC calculation requires ocean_static and ocean_annual_z")
def xr_average(fyear, tar, modules): """xarray-based processing routines for cubed sphere LM4 land output Parameters ---------- fyear : str Year being processed (YYYY) tar : tarfile In-memory tarfile object modules : dict Mappings of netCDF file names inside the tar file to output db file names """ members = [ x for x in modules if netcdf.tar_member_exists(tar, f"{fyear}.{x}.tile1.nc") ] for member in members: print(f"{fyear}.{member}.nc") data_files = [ netcdf.extract_from_tar(tar, f"{fyear}.{member}.tile{x}.nc") for x in range(1, 7) ] data_files = [netcdf.in_mem_xr(x) for x in data_files] dset = xr.concat(data_files, "tile") # Calculate cell depth depth = dset["zhalf_soil"].data depth = [depth[x] - depth[x - 1] for x in range(1, len(depth))] dset["depth"] = xr.DataArray(depth, dims=("zfull_soil")) depth = dset["depth"] # Retain only time-dependent variables variables = list(dset.variables.keys()) for x in variables: if "time" not in dset[x].dims: del dset[x] # Load grid data grid_files = [ netcdf.extract_from_tar(tar, f"{fyear}.land_static.tile{x}.nc") for x in range(1, 7) ] grid_files = [netcdf.in_mem_xr(x) for x in grid_files] ds_grid = xr.concat(grid_files, "tile") # Retain only time-invariant area fields grid = xr.Dataset() variables = list(ds_grid.variables.keys()) for x in variables: if "area" in x or "frac" in x: grid[x] = ds_grid[x] # Get List of cell measures cell_measures = [ dset[x].attrs["cell_measures"] for x in list(dset.variables) if "cell_measures" in list(dset[x].attrs.keys()) ] cell_measures = sorted(list(set(cell_measures))) # Create dict of land groups based on cell measures land_groups = {} for x in cell_measures: land_groups[x] = xr.Dataset() # Loop over variables and assign them to groups variables = list(dset.variables.keys()) for x in variables: if "cell_measures" in list(dset[x].attrs.keys()): _measure = dset[x].attrs["cell_measures"] dset[x].attrs["measure"] = _measure.split(" ")[-1] land_groups[_measure][x] = dset[x] # Since natural tile area is time-dependent, ignore for now if "area: area_ntrl" in cell_measures: cell_measures.remove("area: area_ntrl") if "area: glac_area" in cell_measures: cell_measures.remove("area: glac_area") # Loop over groups for measure in cell_measures: _dset = land_groups[measure] _measure = measure.split(" ")[-1] _area = ds_grid[_measure] for region in ["global", "nh", "sh", "tropics"]: _masked_area = xrtools.xr_mask_by_latitude(_area, ds_grid.geolat_t, region=region) gmeantools.write_sqlite_data( f"{fyear}.{region}Ave{modules[member]}.db", _measure, fyear, _masked_area.sum().data, ) # _masked_area = _masked_area.fillna(0) weights = dset.average_DT.astype("float") * _masked_area if _measure == "soil_area": area_x_depth = _masked_area * depth gmeantools.write_sqlite_data( f"{fyear}.{region}Ave{modules[member]}.db", "soil_volume", fyear, area_x_depth.sum().data, ) weights = [ weights, (weights * depth).transpose("tile", "time", "zfull_soil", "grid_yt", "grid_xt"), ] for x in list(_dset.variables): if "zfull_soil" in list(_dset[x].dims): _dset[x].attrs["measure"] = "soil_volume" _dset_weighted = xrtools.xr_weighted_avg(_dset, weights) xrtools.xr_to_db(_dset_weighted, fyear, f"{fyear}.{region}Ave{modules[member]}.db")
def xr_average(fyear, tar, modules): """xarray-based processing routines for lat-lon model output Parameters ---------- fyear : str Year being processed (YYYY) tar : tarfile In-memory tarfile object modules : dict Mappings of netCDF file names inside the tar file to output db file names """ members = [ x for x in modules if netcdf.tar_member_exists(tar, f"{fyear}.{x}.nc") ] for member in members: print(f"{fyear}.{member}.nc") data_file = netcdf.extract_from_tar(tar, f"{fyear}.ice_month.nc") dset = netcdf.in_mem_xr(data_file) grid_file = (f"{fyear}.ice_static.nc" if netcdf.tar_member_exists( tar, f"{fyear}.ice_static.nc") else f"{fyear}.ice_month.nc") grid_file = netcdf.extract_from_tar(tar, grid_file) ds_grid = netcdf.in_mem_xr(grid_file) # Retain only time-dependent variables variables = list(dset.variables.keys()) for x in variables: if "time" not in dset[x].dims: del dset[x] if x == "CN": dset[x] = dset[x].sum(("ct")).assign_attrs(dset[x].attrs) if "CN" in list(dset.variables.keys()): concentration = dset["CN"] elif "siconc" in list(dset.variables.keys()): concentration = dset["siconc"] else: warnings.warn("Unable to determine sea ice concentation") earth_radius = 6371.0e3 # Radius of the Earth in 'm' _area = ds_grid["CELL_AREA"] * 4.0 * np.pi * (earth_radius**2) # --- todo Add in concentration and extent for region in ["global", "nh", "sh"]: _masked_area = xrtools.xr_mask_by_latitude(_area, ds_grid.GEOLAT, region=region) gmeantools.write_sqlite_data( f"{fyear}.{region}Ave{modules[member]}.db", "area", fyear, _masked_area.sum().data, ) # area-weight but not time_weight weights = _masked_area _dset = dset.copy() ones = (concentration * 0.0) + 1.0 ice_area = ones.where(concentration > 0.0, 0.0) * _masked_area extent = ones.where(concentration > 0.15, 0.0) * _masked_area ice_area_attrs = { "long_name": "area covered by sea ice", "units": "million km2", } extent_attrs = { "long_name": "sea ice extent", "units": "million km2" } for x in list(_dset.variables): if tuple(_dset[x].dims)[-3::] == ("time", "yT", "xT"): _dset[x] = ((_dset[x] * weights).sum(("yT", "xT")) / weights.sum()).assign_attrs(dset[x].attrs) _dset["ice_area"] = (ice_area.sum( ("yT", "xT")) * 1.0e-12).assign_attrs(ice_area_attrs) _dset["extent"] = (extent.sum( ("yT", "xT")) * 1.0e-12).assign_attrs(extent_attrs) elif tuple(_dset[x].dims)[-3::] == ("time", "yt", "xt"): _dset[x] = ((_dset[x] * weights).sum(("yt", "xt")) / weights.sum()).assign_attrs(dset[x].attrs) _dset["ice_area"] = (ice_area.sum( ("yt", "xt")) * 1.0e-12).assign_attrs(ice_area_attrs) _dset["extent"] = (extent.sum( ("yt", "xt")) * 1.0e-12).assign_attrs(extent_attrs) else: del _dset[x] _dset_max = _dset.max(("time")) newvars = {x: x + "_max" for x in list(_dset_max.variables)} _dset_max = _dset_max.rename(newvars) _dset_min = _dset.min(("time")) newvars = {x: x + "_min" for x in list(_dset_min.variables)} _dset_min = _dset_min.rename(newvars) weights = dset.average_DT.astype("float") _dset_weighted = xrtools.xr_weighted_avg(_dset, weights) newvars = {x: x + "_mean" for x in list(_dset_weighted.variables)} _dset_weighted = _dset_weighted.rename(newvars) xrtools.xr_to_db(_dset_weighted, fyear, f"{fyear}.{region}AveIce.db") xrtools.xr_to_db(_dset_max, fyear, f"{fyear}.{region}AveIce.db") xrtools.xr_to_db(_dset_min, fyear, f"{fyear}.{region}AveIce.db")