def xr_average(fyear, tar, modules): """xarray-based processing routines for cubed sphere atmos. output Parameters ---------- fyear : str Year being processed (YYYY) tar : tarfile In-memory tarfile object modules : dict Mappings of netCDF file names inside the tar file to output db file names """ members = [ x for x in modules if netcdf.tar_member_exists(tar, f"{fyear}.{x}.tile1.nc") ] for member in members: print(f"{fyear}.{member}.nc") data_files = [ netcdf.extract_from_tar(tar, f"{fyear}.{member}.tile{x}.nc") for x in range(1, 7) ] data_files = [netcdf.in_mem_xr(x) for x in data_files] dset = xr.concat(data_files, "tile") # Retain only time-dependent variables variables = list(dset.variables.keys()) for x in variables: if "time" not in dset[x].dims: del dset[x] # Aggregate grid spec tiles grid_files = [ netcdf.extract_from_tar(tar, f"{fyear}.grid_spec.tile{x}.nc") for x in range(1, 7) ] grid_files = [netcdf.in_mem_xr(x) for x in grid_files] ds_grid = xr.concat(grid_files, "tile") dset["area"] = ds_grid["area"] for region in ["global", "nh", "sh", "tropics"]: _masked_area = xrtools.xr_mask_by_latitude( dset.area, ds_grid.grid_latt, region=region ) gmeantools.write_sqlite_data( f"{fyear}.{region}Ave{modules[member]}.db", "area", fyear, _masked_area.sum().data, ) weights = dset.average_DT.astype("float") * _masked_area _dset_weighted = xrtools.xr_weighted_avg(dset, weights) xrtools.xr_to_db( _dset_weighted, fyear, f"{fyear}.{region}Ave{modules[member]}.db" )
def xr_average(fyear, tar, modules): """xarray-based processing routines for lat-lon model output Parameters ---------- fyear : str Year being processed (YYYY) tar : tarfile In-memory tarfile object modules : dict Mappings of netCDF file names inside the tar file to output db file names """ members = [ x for x in modules if netcdf.tar_member_exists(tar, f"{fyear}.{x}.nc") ] for member in members: print(f"{fyear}.{member}.nc") data_file = netcdf.extract_from_tar(tar, f"{fyear}.{member}.nc") dset = netcdf.in_mem_xr(data_file) grid_file = (f"{fyear}.ocean_static.nc" if netcdf.tar_member_exists( tar, f"{fyear}.ocean_static.nc") else f"{fyear}.ocean_month.nc") grid_file = netcdf.extract_from_tar(tar, grid_file) ds_grid = netcdf.in_mem_xr(grid_file) # Retain only time-dependent variables variables = list(dset.variables.keys()) for x in variables: if "time" not in dset[x].dims: del dset[x] _area = "areacello" if "areacello" in list( ds_grid.variables) else "area_t" if "wet" in list(ds_grid.variables): _wet = ds_grid["wet"] else: _wet = 1.0 warnings.warn("Unable to find wet mask") _area = ds_grid[_area] * _wet for region in ["global", "nh", "sh", "tropics"]: _masked_area = xrtools.xr_mask_by_latitude(_area, ds_grid.geolat, region=region) gmeantools.write_sqlite_data( f"{fyear}.{region}Ave{modules[member]}.db", "area", fyear, _masked_area.sum().data, ) weights = dset.average_DT.astype("float") * _masked_area _dset_weighted = xrtools.xr_weighted_avg(dset, weights) xrtools.xr_to_db(_dset_weighted, fyear, f"{fyear}.{region}Ave{modules[member]}.db")
def xr_average(fyear, tar, modules): """xarray-based processing routines for lat-lon model output Parameters ---------- fyear : str Year being processed (YYYY) tar : tarfile In-memory tarfile object modules : dict Mappings of netCDF file names inside the tar file to output db file names """ members = [ x for x in modules if netcdf.tar_member_exists(tar, f"{fyear}.{x}.nc") ] for member in members: print(f"{fyear}.{member}.nc") data_file = netcdf.extract_from_tar(tar, f"{fyear}.{member}.nc") dset = netcdf.in_mem_xr(data_file) geolat = np.tile(dset.lat.data[:, None], (1, dset.lon.data.shape[0])) geolon = np.tile(dset.lon.data[None, :], (dset.lat.data.shape[0], 1)) _geolat = xr.DataArray(geolat, coords=((dset.lat, dset.lon))) _geolon = xr.DataArray(geolon, coords=((dset.lat, dset.lon))) _area = xr.DataArray( gmeantools.standard_grid_cell_area(dset.lat.data, dset.lon.data), coords=((dset.lat, dset.lon)), ) # Retain only time-dependent variables variables = list(dset.variables.keys()) for x in variables: if "time" not in dset[x].dims: del dset[x] for region in ["global", "nh", "sh", "tropics"]: _masked_area = xrtools.xr_mask_by_latitude(_area, _geolat, region=region) gmeantools.write_sqlite_data( f"{fyear}.{region}Ave{modules[member]}.db", "area", fyear, _masked_area.sum().data, ) weights = dset.average_DT.astype("float") * _masked_area _dset_weighted = xrtools.xr_weighted_avg(dset, weights) xrtools.xr_to_db(_dset_weighted, fyear, f"{fyear}.{region}Ave{modules[member]}.db")
def xr_to_db(dset, fyear, sqlfile): """Writes Xarray dataset to SQLite format Parameters ---------- dset : xarray.DataSet Input dataset fyear : str Year label (YYYY) sqlfile : str Filename of output db file """ for var in list(dset.variables): write_sqlite_data(sqlfile, var, str(fyear), str(dset[var].data)) if "units" in list(dset[var].attrs): write_metadata(sqlfile, var, "units", dset[var].units) if "long_name" in list(dset[var].attrs): write_metadata(sqlfile, var, "long_name", dset[var].long_name) if "measure" in list(dset[var].attrs): write_metadata(sqlfile, var, "cell_measure", dset[var].measure)
def mom6_amoc(fyear, tar, label="Ocean", outdir="./"): """Driver for AMOC calculation in MOM6-class models Parameters ---------- fyear : str Year label (YYYY) tar : tarfile In-memory history tarfile object label : str SQLite output stream name outdir : str, path-like Path to output SQLite file """ member = f"{fyear}.ocean_annual_z.nc" static = f"{fyear}.ocean_static.nc" annual_file = (extract_from_tar(tar, member, ncfile=True) if tar_member_exists(tar, member) else None) static_file = (extract_from_tar(tar, static, ncfile=True) if tar_member_exists(tar, static) else None) if annual_file is not None and static_file is not None: # open the Dataset with the transports dset = in_mem_xr(annual_file) # select first time level from static file # editorial comment: why does the static file have a time dimension? dset_static = in_mem_xr(static_file).isel(time=0) # merge static DataSet with transport DataSet for geo_coord in ["geolon_v", "geolat_v", "wet_v"]: if geo_coord in dset_static.variables: dset[geo_coord] = xr.DataArray( dset_static[geo_coord].values, dims=dset_static[geo_coord].dims) required_vars = ["geolon_v", "geolat_v", "umo", "vmo"] dset_vars = list(dset.variables) if list(set(required_vars) - set(dset_vars)) == []: # calculate non-rotated y-ward moc array moc = xoverturning.calcmoc(dset, basin="atl-arc", verbose=False) # max streamfunction between 20N-80N and 500-2500m depth maxsfn = moc.sel(yq=slice(20.0, 80.0), z_i=slice(500.0, 2500.0)).max() maxsfn = maxsfn.astype(np.float16).values print(f" AMOC = {maxsfn}") # max streamfunction at 26.5N rapidsfn = moc.sel(yq=26.5, method="nearest") rapidsfn = rapidsfn.sel(z_i=slice(500.0, 2500.0)).max() rapidsfn = rapidsfn.astype(np.float16).values print(f" RAPID AMOC = {rapidsfn}") # -- Write to sqlite gmeantools.write_sqlite_data( outdir + "/" + fyear + ".globalAve" + label + ".db", "amoc_vh", fyear[:4], maxsfn, ) gmeantools.write_sqlite_data( outdir + "/" + fyear + ".globalAve" + label + ".db", "amoc_rapid", fyear[:4], rapidsfn, ) else: warnings.warn(f"{required_vars} are required to calculate AMOC") else: warnings.warn( "AMOC calculation requires ocean_static and ocean_annual_z")
def timing(ascii_file, fyear, outdir, label): """Extracts FMS timings Parameters ---------- ascii_file : str, path-like Path to ascii tar file fyear : str Year to process (YYYY) outdir : str Path for output SQLite file label : str Name of output SQLite file """ def ascii_tar_to_stats_df(ascii_file): """Subroutine to read fms.out from ascii tar file, extract the clock timings, and return a pandas dataframe Parameters ---------- ascii_file : str, path-like Path to ascii tar file Returns ------- pandas.DataFrame of timings """ member = None tar = tf.open(ascii_file) for member in tar.getnames(): if "fms.out" in member: break txtfile = tar.extractfile(member) content = txtfile.readlines() x = -1 for x, line in enumerate(content): if "Total runtime" in line.decode("utf-8"): break tar.close() content = content[x::] output = io.BytesIO() output.write(str.encode("clock,min,max,mean,std\n")) for line in content: line = line.decode("utf-8") if not line.startswith(" "): line = line.split() line = line[-1::-1] label = " ".join(line[8::][-1::-1]) label = label.replace(" ", "_") label = label.replace("-", "_") label = label.replace("&", "_") for x in ["(", ")", "*", "/", ":"]: label = label.replace(x, "") line = [label] + line[0:8][-1::-1][0:4] line = ",".join(line) + "\n" output.write(str.encode(line)) output.seek(0) df = pd.read_csv(output, delimiter=",") # df = df.set_index('clock') return df df = ascii_tar_to_stats_df(ascii_file) clocks = sorted(df["clock"].to_list()) for clock in clocks: for attr in ["mean", "min", "max"]: val = df[df["clock"] == clock][attr].values[0] # -- Write to sqlite gmeantools.write_sqlite_data( outdir + "/" + fyear + ".globalAve" + label + ".db", clock + "_" + attr, fyear[:4], val, )
def xr_average(fyear, tar, modules): """xarray-based processing routines for cubed sphere LM4 land output Parameters ---------- fyear : str Year being processed (YYYY) tar : tarfile In-memory tarfile object modules : dict Mappings of netCDF file names inside the tar file to output db file names """ members = [ x for x in modules if netcdf.tar_member_exists(tar, f"{fyear}.{x}.tile1.nc") ] for member in members: print(f"{fyear}.{member}.nc") data_files = [ netcdf.extract_from_tar(tar, f"{fyear}.{member}.tile{x}.nc") for x in range(1, 7) ] data_files = [netcdf.in_mem_xr(x) for x in data_files] dset = xr.concat(data_files, "tile") # Calculate cell depth depth = dset["zhalf_soil"].data depth = [depth[x] - depth[x - 1] for x in range(1, len(depth))] dset["depth"] = xr.DataArray(depth, dims=("zfull_soil")) depth = dset["depth"] # Retain only time-dependent variables variables = list(dset.variables.keys()) for x in variables: if "time" not in dset[x].dims: del dset[x] # Load grid data grid_files = [ netcdf.extract_from_tar(tar, f"{fyear}.land_static.tile{x}.nc") for x in range(1, 7) ] grid_files = [netcdf.in_mem_xr(x) for x in grid_files] ds_grid = xr.concat(grid_files, "tile") # Retain only time-invariant area fields grid = xr.Dataset() variables = list(ds_grid.variables.keys()) for x in variables: if "area" in x or "frac" in x: grid[x] = ds_grid[x] # Get List of cell measures cell_measures = [ dset[x].attrs["cell_measures"] for x in list(dset.variables) if "cell_measures" in list(dset[x].attrs.keys()) ] cell_measures = sorted(list(set(cell_measures))) # Create dict of land groups based on cell measures land_groups = {} for x in cell_measures: land_groups[x] = xr.Dataset() # Loop over variables and assign them to groups variables = list(dset.variables.keys()) for x in variables: if "cell_measures" in list(dset[x].attrs.keys()): _measure = dset[x].attrs["cell_measures"] dset[x].attrs["measure"] = _measure.split(" ")[-1] land_groups[_measure][x] = dset[x] # Since natural tile area is time-dependent, ignore for now if "area: area_ntrl" in cell_measures: cell_measures.remove("area: area_ntrl") if "area: glac_area" in cell_measures: cell_measures.remove("area: glac_area") # Loop over groups for measure in cell_measures: _dset = land_groups[measure] _measure = measure.split(" ")[-1] _area = ds_grid[_measure] for region in ["global", "nh", "sh", "tropics"]: _masked_area = xrtools.xr_mask_by_latitude(_area, ds_grid.geolat_t, region=region) gmeantools.write_sqlite_data( f"{fyear}.{region}Ave{modules[member]}.db", _measure, fyear, _masked_area.sum().data, ) # _masked_area = _masked_area.fillna(0) weights = dset.average_DT.astype("float") * _masked_area if _measure == "soil_area": area_x_depth = _masked_area * depth gmeantools.write_sqlite_data( f"{fyear}.{region}Ave{modules[member]}.db", "soil_volume", fyear, area_x_depth.sum().data, ) weights = [ weights, (weights * depth).transpose("tile", "time", "zfull_soil", "grid_yt", "grid_xt"), ] for x in list(_dset.variables): if "zfull_soil" in list(_dset[x].dims): _dset[x].attrs["measure"] = "soil_volume" _dset_weighted = xrtools.xr_weighted_avg(_dset, weights) xrtools.xr_to_db(_dset_weighted, fyear, f"{fyear}.{region}Ave{modules[member]}.db")
def xr_average(fyear, tar, modules): """xarray-based processing routines for lat-lon model output Parameters ---------- fyear : str Year being processed (YYYY) tar : tarfile In-memory tarfile object modules : dict Mappings of netCDF file names inside the tar file to output db file names """ members = [ x for x in modules if netcdf.tar_member_exists(tar, f"{fyear}.{x}.nc") ] for member in members: print(f"{fyear}.{member}.nc") data_file = netcdf.extract_from_tar(tar, f"{fyear}.ice_month.nc") dset = netcdf.in_mem_xr(data_file) grid_file = (f"{fyear}.ice_static.nc" if netcdf.tar_member_exists( tar, f"{fyear}.ice_static.nc") else f"{fyear}.ice_month.nc") grid_file = netcdf.extract_from_tar(tar, grid_file) ds_grid = netcdf.in_mem_xr(grid_file) # Retain only time-dependent variables variables = list(dset.variables.keys()) for x in variables: if "time" not in dset[x].dims: del dset[x] if x == "CN": dset[x] = dset[x].sum(("ct")).assign_attrs(dset[x].attrs) if "CN" in list(dset.variables.keys()): concentration = dset["CN"] elif "siconc" in list(dset.variables.keys()): concentration = dset["siconc"] else: warnings.warn("Unable to determine sea ice concentation") earth_radius = 6371.0e3 # Radius of the Earth in 'm' _area = ds_grid["CELL_AREA"] * 4.0 * np.pi * (earth_radius**2) # --- todo Add in concentration and extent for region in ["global", "nh", "sh"]: _masked_area = xrtools.xr_mask_by_latitude(_area, ds_grid.GEOLAT, region=region) gmeantools.write_sqlite_data( f"{fyear}.{region}Ave{modules[member]}.db", "area", fyear, _masked_area.sum().data, ) # area-weight but not time_weight weights = _masked_area _dset = dset.copy() ones = (concentration * 0.0) + 1.0 ice_area = ones.where(concentration > 0.0, 0.0) * _masked_area extent = ones.where(concentration > 0.15, 0.0) * _masked_area ice_area_attrs = { "long_name": "area covered by sea ice", "units": "million km2", } extent_attrs = { "long_name": "sea ice extent", "units": "million km2" } for x in list(_dset.variables): if tuple(_dset[x].dims)[-3::] == ("time", "yT", "xT"): _dset[x] = ((_dset[x] * weights).sum(("yT", "xT")) / weights.sum()).assign_attrs(dset[x].attrs) _dset["ice_area"] = (ice_area.sum( ("yT", "xT")) * 1.0e-12).assign_attrs(ice_area_attrs) _dset["extent"] = (extent.sum( ("yT", "xT")) * 1.0e-12).assign_attrs(extent_attrs) elif tuple(_dset[x].dims)[-3::] == ("time", "yt", "xt"): _dset[x] = ((_dset[x] * weights).sum(("yt", "xt")) / weights.sum()).assign_attrs(dset[x].attrs) _dset["ice_area"] = (ice_area.sum( ("yt", "xt")) * 1.0e-12).assign_attrs(ice_area_attrs) _dset["extent"] = (extent.sum( ("yt", "xt")) * 1.0e-12).assign_attrs(extent_attrs) else: del _dset[x] _dset_max = _dset.max(("time")) newvars = {x: x + "_max" for x in list(_dset_max.variables)} _dset_max = _dset_max.rename(newvars) _dset_min = _dset.min(("time")) newvars = {x: x + "_min" for x in list(_dset_min.variables)} _dset_min = _dset_min.rename(newvars) weights = dset.average_DT.astype("float") _dset_weighted = xrtools.xr_weighted_avg(_dset, weights) newvars = {x: x + "_mean" for x in list(_dset_weighted.variables)} _dset_weighted = _dset_weighted.rename(newvars) xrtools.xr_to_db(_dset_weighted, fyear, f"{fyear}.{region}AveIce.db") xrtools.xr_to_db(_dset_max, fyear, f"{fyear}.{region}AveIce.db") xrtools.xr_to_db(_dset_min, fyear, f"{fyear}.{region}AveIce.db")
def process_var(variable, averager=None): """Function called by multiprocessing thread to process a variable Parameters ---------- variable : RichVariable object Input variable to process """ var = None _cell_depth = None if averager in ["cubesphere", "land_lm4"]: # open up data tiles and get variable info data_file = [nctools.in_mem_nc(x) for x in variable.data_file] varshape = data_file[0].variables[variable.varname].shape ndim = len(varshape) units = gmeantools.extract_metadata(data_file[0], variable.varname, "units") long_name = gmeantools.extract_metadata(data_file[0], variable.varname, "long_name") if (averager == "land_lm4") and (ndim >= 3): cell_measures = gmeantools.extract_metadata( data_file[0], variable.varname, "cell_measures") area_measure = gmeantools.parse_cell_measures( cell_measures, "area") if (area_measure is not None) and (area_measure != "area_ntrl"): var = gmeantools.cube_sphere_aggregate(variable.varname, data_file) var = np.ma.average( var, axis=0, weights=data_file[0].variables["average_DT"][:]) _area_weight = variable.area_types[area_measure] _cell_depth = variable.cell_depth if ndim == 4 else None elif (averager == "cubesphere") and (ndim == 3): var = gmeantools.cube_sphere_aggregate(variable.varname, data_file) var = np.ma.average( var, axis=0, weights=data_file[0].variables["average_DT"][:]) _area_weight = variable.cell_area else: fdata = nctools.in_mem_nc(variable.data_file) units = gmeantools.extract_metadata(fdata, variable.varname, "units") long_name = gmeantools.extract_metadata(fdata, variable.varname, "long_name") ndim = len(fdata.variables[variable.varname].shape) if (averager == "tripolar") and (ndim >= 3): dims = fdata.variables[variable.varname].dimensions if (dims[-2::] == ("yh", "xh")) and (ndim == 3): var = fdata.variables[variable.varname][:] elif (ndim == 4) and (variable.varname[0:9] == "tot_layer"): var = fdata.variables[variable.varname][:] var = np.ma.sum(var, axis=1).squeeze() if var is not None: var = np.ma.average(var, axis=0, weights=fdata.variables["average_DT"][:]) _area_weight = variable.cell_area if averager == "ice": if ndim == len(variable.cell_area.shape): var = fdata.variables[variable.varname][:] if averager == "lat-lon" and ndim == 3: var = fdata.variables[variable.varname][:] var = np.ma.average(var, axis=0, weights=fdata.variables["average_DT"][:]) _area_weight = variable.cell_area if averager == "land-lm3" and ndim >= 3: var = fdata[variable.varname][:] var = np.ma.average(var, axis=0, weights=fdata["average_DT"][:]) if var is None: return None for reg in ["global", "tropics", "nh", "sh"]: sqlfile = (variable.outdir + "/" + variable.fyear + "." + reg + "Ave" + variable.label + ".db") if averager == "ice": if var.shape != variable.cell_area.shape: return None # mask by latitude bands _v, _area = gmeantools.mask_latitude_bands(var, variable.cell_area, variable.geolat, region=reg) _v = np.ma.sum( (_v * _area), axis=(-1, -2)) / np.ma.sum(_area, axis=(-1, -2)) # write out ice annual mean, min, and max gmeantools.write_sqlite_data(sqlfile, variable.varname + "_max", variable.fyear[:4], np.ma.max(_v)) gmeantools.write_sqlite_data(sqlfile, variable.varname + "_min", variable.fyear[:4], np.ma.min(_v)) gmeantools.write_sqlite_data( sqlfile, variable.varname + "_mean", variable.fyear[:4], np.ma.average(_v, axis=0, weights=variable.average_dt), ) # sum of area not reported for ice diagnostics area_sum = None # -- Legacy LM3 Land Model elif averager == "land-lm3": result, summed = gmeantools.legacy_area_mean( var, variable.cell_area, variable.geolat, variable.geolon, cell_frac=variable.cell_frac, soil_frac=variable.soil_frac, region=reg, varname=variable.varname, component="land", ) # use the legacy write method of the LM3 version of the land model gmeantools.write_sqlite_data( sqlfile, variable.varname, variable.fyear[:4], result, summed, component="land", ) # legacy global mean format did not have land area area_sum = None else: if (averager == "land_lm4") and (_cell_depth is not None): if var.shape[0] != _cell_depth.shape[0]: return None result, area_sum = gmeantools.area_mean( var, _area_weight, variable.geolat, variable.geolon, region=reg, cell_depth=_cell_depth, ) if (averager == "land_lm4") and (hasattr(result, "mask")): return None gmeantools.write_sqlite_data(sqlfile, variable.varname, variable.fyear[:4], result) gmeantools.write_metadata(sqlfile, variable.varname, "units", units) gmeantools.write_metadata(sqlfile, variable.varname, "long_name", long_name) if averager == "land_lm4": area_measure = (area_measure.replace("area", "volume") if ndim == 4 else area_measure) gmeantools.write_metadata(sqlfile, variable.varname, "cell_measure", area_measure) gmeantools.write_sqlite_data(sqlfile, area_measure, variable.fyear[:4], area_sum) else: if area_sum is not None: gmeantools.write_sqlite_data(sqlfile, "area", variable.fyear[:4], area_sum) if averager in ["cubesphere", "land_lm4"]: _ = [x.close() for x in data_file] else: fdata.close() return None