def calculate(ds, dobs, region="nh"): """Function to calculate sea ice parameters""" # Container dictionaries to hold results model = xr.Dataset() obs = xr.Dataset() # Add coordinates model["GEOLON"] = ds["GEOLON"] model["GEOLAT"] = ds["GEOLAT"] model = model.rename({"GEOLON": "lon", "GEOLAT": "lat"}) obs["lon"] = dobs["lon"] obs["lat"] = dobs["lat"] # Create annual cycle climatology model["ac"] = annual_cycle(ds, "CN") obs["ac"] = annual_cycle(dobs, "sic") # Regrid the observations to the model grid (for plotting) regridded = curv_to_curv(obs, model, reuse_weights=False) # Calculate area and extent if region == "nh": model["area"] = xr.where(ds["GEOLAT"] > 0.0, model["ac"] * ds.AREA, 0.0) model["ext"] = xr.where( (model["ac"] > 0.15) & (ds["GEOLAT"] > 0.0), ds.AREA, 0.0 ) elif region == "sh": model["area"] = xr.where(ds["GEOLAT"] < 0.0, model["ac"] * ds.AREA, 0.0) model["ext"] = xr.where( (model["ac"] > 0.15) & (ds["GEOLAT"] < 0.0), ds.AREA, 0.0 ) else: raise ValueError(f"Unknown region {region}. Option are nh or sh") # Ensure dims are in the correct order model["area"] = model["area"].transpose("month", ...) model["ext"] = model["ext"].transpose("month", ...) # Sum to get model area and extent model["area"] = model["area"].sum(axis=(-2, -1)) * 1.0e-12 model["ext"] = model["ext"].sum(axis=(-2, -1)) * 1.0e-12 # Get obs model and extent obs["area"] = obs["ac"] * dobs.areacello obs["area"] = obs["area"].transpose("month", ...) obs["area"] = obs["area"].sum(axis=(-2, -1)) * 1.0e-12 obs["ext"] = xr.where(obs["ac"] > 0.15, dobs.areacello, 0.0) obs["ext"] = obs["ext"].transpose("month", ...) obs["ext"] = obs["ext"].sum(axis=(-2, -1)) * 1.0e-12 # Get tuple of start year and end years for model and observations model.attrs["time"] = date_range(ds) obs.attrs["time"] = date_range(dobs) return model, obs, regridded
def read(dictArgs, adv_varname="T_ady_2d", dif_varname="T_diffy_2d"): """Read in heat transport data""" infile = dictArgs["infile"] ds = xr.open_mfdataset(infile, combine="by_coords") # advective component of transport advective = ds[adv_varname] # diffusive component of transport if dif_varname in ds.variables: diffusive = ds[dif_varname] else: diffusive = None outputgrid = "nonsymetric" dsV = horizontal_grid(dictArgs, point_type="v", outputgrid=outputgrid) if advective.shape[-2] == (dsV.geolat.shape[0] + 1): print("Symmetric grid detected.<br>") outputgrid = "symetric" dsV = horizontal_grid(dictArgs, point_type="v", outputgrid=outputgrid) geolon_v = dsV.geolon.values geolat_v = dsV.geolat.values yq = dsV.nominal_y.values basin_code = dsV.basin.values # basin masks atlantic_arctic_mask = generate_basin_masks(basin_code, basin="atlantic_arctic") indo_pacific_mask = generate_basin_masks(basin_code, basin="indo_pacific") # date range dates = date_range(ds) return ( geolon_v, geolat_v, yq, basin_code, atlantic_arctic_mask, indo_pacific_mask, advective, diffusive, dates, )
def read(dictArgs): ds_model = xr.open_mfdataset(dictArgs["infile"], use_cftime=True) dates = date_range(ds_model) if dictArgs["obsfile"] is not None: # priority to user-provided obs file dsobs = xr.open_mfdataset(dictArgs["obsfile"], combine="by_coords", decode_times=False) else: # use dataset from catalog, either from command line or default cat = open_intake_catalog(dictArgs["platform"], "OMIP2") obsdataset = f"{dictArgs['dataset']}_{dictArgs['period']}" ds_ref_tau = cat[obsdataset].to_dask() ds_static = xr.open_mfdataset(dictArgs["static"]) # replace the nominal xq and yq by indices so that Xarray does not get confused. # Confusion arises since there are inconsistencies between static file grid and # model data grid for the last value of yq. We never need xq and yq for actual # calculations, so filling these arrays with any value is not going to change # any results. But Xarray needs them to be consistent between the two files when # doing the curl operation on the stress. ds_model["xq"] = xr.DataArray(np.arange(len(ds_model["xq"])), dims=["xq"]) ds_model["yq"] = xr.DataArray(np.arange(len(ds_model["yq"])), dims=["yq"]) ds_ref_tau["xq"] = xr.DataArray(np.arange(len(ds_ref_tau["xq"])), dims=["xq"]) ds_ref_tau["yq"] = xr.DataArray(np.arange(len(ds_ref_tau["yq"])), dims=["yq"]) ds_static["xq"] = xr.DataArray(np.arange(len(ds_static["xq"])), dims=["xq"]) ds_static["yq"] = xr.DataArray(np.arange(len(ds_static["yq"])), dims=["yq"]) ds_model.attrs = {"date_range": dates} return ds_model, ds_ref_tau, ds_static
def read(dictArgs): """ read data from model and obs files, process data and return it """ if dictArgs["config"] is not None: # use dataset from catalog, either from command line or default cat = open_intake_catalog(dictArgs["platform"], dictArgs["config"]) ds_static = cat["ocean_static_1x1"].to_dask() if dictArgs["static"] is not None: ds_static = xr.open_dataset(dictArgs["static"]) # Compute basin codes codes = generate_basin_codes(ds_static, lon="lon", lat="lat") codes = np.array(codes) # depth coordinate if "deptho" in list(ds_static.variables): depth = ds_static.deptho.to_masked_array() elif "depth" in list(ds_static.variables): depth = ds_static.depth.to_masked_array() else: raise ValueError("Unable to find depth field.") depth = np.where(np.isnan(depth), 0.0, depth) depth = depth * -1.0 dsmodel = xr.open_mfdataset(dictArgs["infile"], combine="by_coords", use_cftime=True) if dictArgs["obsfile"] is not None: # priority to user-provided obs file dsobs = xr.open_mfdataset(dictArgs["obsfile"], combine="by_coords", decode_times=False) else: # use dataset from catalog, either from command line or default cat = open_intake_catalog(dictArgs["platform"], "obs") dsobs = cat[dictArgs["dataset"]].to_dask() # read in model and obs data datamodel = read_data(dsmodel, dictArgs["possible_variable_names"]) dataobs = read_data(dsobs, dictArgs["possible_variable_names"]) # reduce data along time, here mandatory if ("assigned_time" in datamodel.dims) and (len(datamodel["assigned_time"]) > 1): warnings.warn("input dataset has more than one time record, " + "performing non-weighted average") datamodel = simple_average(datamodel, "assigned_time") if ("assigned_time" in dataobs.dims) and len(dataobs["assigned_time"]) > 1: warnings.warn("reference dataset has more than one time record, " + "performing non-weighted average") dataobs = simple_average(dataobs, "assigned_time") datamodel = datamodel.squeeze() dataobs = dataobs.squeeze() # check final data is 3d assert len(datamodel.dims) == 3 assert len(dataobs.dims) == 3 # check consistency of coordinates assert np.allclose(datamodel["assigned_lon"], dataobs["assigned_lon"]) assert np.allclose(datamodel["assigned_lat"], dataobs["assigned_lat"]) # homogeneize coords dataobs = copy_coordinates(datamodel, dataobs, ["assigned_lon", "assigned_lat"]) # restrict model to where obs exists datamodel = datamodel.where(dataobs) # dump values model = datamodel.to_masked_array() obs = dataobs.to_masked_array() x = datamodel["assigned_lon"].values y = datamodel["assigned_lat"].values z = datamodel["assigned_depth"].values # convert z to negative values z = z * -1 # compute area if "areacello" in dsmodel.variables: area = dsmodel["areacello"].values else: if (model.shape[-2], model.shape[-1]) == (180, 360): area = compute_area_regular_grid(dsmodel) else: raise IOError("no cell area provided") # date range dates = date_range(dsmodel) return y, z, depth, area, codes, model, obs, dates
def read(dictArgs): """read data from model and obs files, process data and return it""" dsmodel = xr.open_mfdataset(dictArgs["infile"], combine="by_coords", use_cftime=True) if dictArgs["obsfile"] is not None: # priority to user-provided obs file dsobs = xr.open_mfdataset(dictArgs["obsfile"], combine="by_coords", decode_times=False) else: # use dataset from catalog, either from command line or default cat = open_intake_catalog(dictArgs["platform"], "obs") dsobs = cat[dictArgs["dataset"]].to_dask() # read in model and obs data datamodel = read_data(dsmodel, dictArgs["possible_variable_names"]) dataobs = read_data(dsobs, dictArgs["possible_variable_names"]) # subset data if dictArgs["depth"] is None: dictArgs["depth"] = dictArgs["surface_default_depth"] if dictArgs["depth"] is not None: datamodel = subset_data(datamodel, "assigned_depth", dictArgs["depth"]) dataobs = subset_data(dataobs, "assigned_depth", dictArgs["depth"]) # reduce data along depth (not yet implemented) if "depth_reduce" in dictArgs: if dictArgs["depth_reduce"] == "mean": # do mean pass elif dictArgs["depth_reduce"] == "sum": # do sum pass # reduce data along time, here mandatory if ("assigned_time" in datamodel.dims) and (len(datamodel["assigned_time"]) > 1): warnings.warn("input dataset has more than one time record, " + "performing non-weighted average") datamodel = simple_average(datamodel, "assigned_time") if ("assigned_time" in dataobs.dims) and len(dataobs["assigned_time"]) > 1: warnings.warn("reference dataset has more than one time record, " + "performing non-weighted average") dataobs = simple_average(dataobs, "assigned_time") datamodel = datamodel.squeeze() dataobs = dataobs.squeeze() # check final data is 2d assert len(datamodel.dims) == 2 assert len(dataobs.dims) == 2 # check consistency of coordinates assert np.allclose(datamodel["assigned_lon"], dataobs["assigned_lon"]) assert np.allclose(datamodel["assigned_lat"], dataobs["assigned_lat"]) # homogeneize coords dataobs = copy_coordinates(datamodel, dataobs, ["assigned_lon", "assigned_lat"]) # restrict model to where obs exists datamodel = datamodel.where(dataobs) # dump values model = datamodel.to_masked_array() obs = dataobs.to_masked_array() x = datamodel["assigned_lon"].values y = datamodel["assigned_lat"].values # compute area if "areacello" in dsmodel.variables: area = dsmodel["areacello"].values else: if model.shape == (180, 360): area = compute_area_regular_grid(dsmodel) else: raise IOError("no cell area provided") # date range dates = date_range(dsmodel) return x, y, area, model, obs, dates
def read(dictArgs, vcomp="vmo", ucomp="umo"): """Read required fields to plot MOC in om4labs Parameters ---------- dictArgs : dict Dictionary containing argparse options vcomp : str, optional Name of meridional component of total residual freshwater transport, by default "vmo" ucomp : str, optional Name of zonal component of total residual freshwater transport, by default "umo" Returns ------- xarray.DataSet Two xarray datasets; one containing `umo`, `vmo` and the other containing the grid information """ # initialize an xarray.Dataset to hold the output dset = xr.Dataset() dset_grid = xr.Dataset() # read the infile and get u, v transport components infile = dictArgs["infile"] ds = xr.open_mfdataset(infile, combine="by_coords") dset["umo"] = ds[ucomp] dset["vmo"] = ds[vcomp] # detect symmetric grid outputgrid = "symetric" if is_symmetric(dset) else "nonsymetric" # determine vertical coordinate layer = "z_l" if "z_l" in ds.dims else "rho2_l" if "rho2_l" in ds.dims else None assert layer is not None, "Unrecognized vertical coordinate." # get vertical coordinate edges interface = "z_i" if layer == "z_l" else "rho2_i" if layer == "rho2_l" else None dset[interface] = ds[interface] # save layer and interface info for use later in the workflow dset.attrs["layer"] = layer dset.attrs["interface"] = interface # get horizontal v-cell grid info dsV = horizontal_grid(dictArgs, coords=ds.coords, point_type="v", outputgrid=outputgrid) dset_grid["geolon_v"] = xr.DataArray(dsV.geolon.values, dims=("yq", "xh")) dset_grid["geolat_v"] = xr.DataArray(dsV.geolat.values, dims=("yq", "xh")) dset_grid["wet_v"] = xr.DataArray(dsV.wet.values, dims=("yq", "xh")) # get topography info depth = read_topography(dictArgs, coords=ds.coords, point_type="t") depth = np.where(np.isnan(depth.to_masked_array()), 0.0, depth) dset_grid["deptho"] = xr.DataArray(depth, dims=("yh", "xh")) # dset_grid requires `xq` dset_grid["xq"] = dset.xq # save date range as an attribute dates = date_range(ds) dset.attrs["dates"] = dates return (dset, dset_grid)