def read(dictArgs): """Read required fields for ENSO analysis Parameters ---------- dictArgs : dict Dictionary containing argparse options Returns ------- (xarray.DataArray, xarray.DataArray) """ # Open model array and static file array = xr.open_mfdataset(dictArgs["infile"], use_cftime=True)[dictArgs["varname"]] areacello = xr.open_dataset(dictArgs["static"])["areacello"].fillna(0.0) # Open pre-calculated ENSO spectra from Obs. if dictArgs["obsfile"] is not None: ref1 = xr.open_dataset(dictArgs["obsfile"]) ref1 = ref1["spectrum"] ref1.attrs = {**ref1.attrs, "label": "Reference"} reference = [ref1] else: cat = open_intake_catalog(dictArgs["platform"], "obs") # open reference datasets ref1 = cat["wavelet_NOAA_ERSST_v5_1957_2002"].to_dask() ref2 = cat["wavelet_NOAA_ERSST_v5_1880_2019"].to_dask() # select spectrum variable ref1 = ref1["spectrum"] ref2 = ref2["spectrum"] # set attributes ref1.attrs = {**ref1.attrs, "label": "ERSST v5 1957-2002"} ref2.attrs = {**ref2.attrs, "label": "ERSST v5 1880-2019"} reference = [ref1, ref2] return (array, areacello, reference)
def read(dictArgs): ds_model = xr.open_mfdataset(dictArgs["infile"], use_cftime=True) dates = date_range(ds_model) if dictArgs["obsfile"] is not None: # priority to user-provided obs file dsobs = xr.open_mfdataset(dictArgs["obsfile"], combine="by_coords", decode_times=False) else: # use dataset from catalog, either from command line or default cat = open_intake_catalog(dictArgs["platform"], "OMIP2") obsdataset = f"{dictArgs['dataset']}_{dictArgs['period']}" ds_ref_tau = cat[obsdataset].to_dask() ds_static = xr.open_mfdataset(dictArgs["static"]) # replace the nominal xq and yq by indices so that Xarray does not get confused. # Confusion arises since there are inconsistencies between static file grid and # model data grid for the last value of yq. We never need xq and yq for actual # calculations, so filling these arrays with any value is not going to change # any results. But Xarray needs them to be consistent between the two files when # doing the curl operation on the stress. ds_model["xq"] = xr.DataArray(np.arange(len(ds_model["xq"])), dims=["xq"]) ds_model["yq"] = xr.DataArray(np.arange(len(ds_model["yq"])), dims=["yq"]) ds_ref_tau["xq"] = xr.DataArray(np.arange(len(ds_ref_tau["xq"])), dims=["xq"]) ds_ref_tau["yq"] = xr.DataArray(np.arange(len(ds_ref_tau["yq"])), dims=["yq"]) ds_static["xq"] = xr.DataArray(np.arange(len(ds_static["xq"])), dims=["xq"]) ds_static["yq"] = xr.DataArray(np.arange(len(ds_static["yq"])), dims=["yq"]) ds_model.attrs = {"date_range": dates} return ds_model, ds_ref_tau, ds_static
def read(dictArgs): """ read data from model and obs files, process data and return it """ if dictArgs["config"] is not None: # use dataset from catalog, either from command line or default cat = open_intake_catalog(dictArgs["platform"], dictArgs["config"]) ds_static = cat["ocean_static_1x1"].to_dask() if dictArgs["static"] is not None: ds_static = xr.open_dataset(dictArgs["static"]) # Compute basin codes codes = generate_basin_codes(ds_static, lon="lon", lat="lat") codes = np.array(codes) # depth coordinate if "deptho" in list(ds_static.variables): depth = ds_static.deptho.to_masked_array() elif "depth" in list(ds_static.variables): depth = ds_static.depth.to_masked_array() else: raise ValueError("Unable to find depth field.") depth = np.where(np.isnan(depth), 0.0, depth) depth = depth * -1.0 dsmodel = xr.open_mfdataset(dictArgs["infile"], combine="by_coords", use_cftime=True) if dictArgs["obsfile"] is not None: # priority to user-provided obs file dsobs = xr.open_mfdataset(dictArgs["obsfile"], combine="by_coords", decode_times=False) else: # use dataset from catalog, either from command line or default cat = open_intake_catalog(dictArgs["platform"], "obs") dsobs = cat[dictArgs["dataset"]].to_dask() # read in model and obs data datamodel = read_data(dsmodel, dictArgs["possible_variable_names"]) dataobs = read_data(dsobs, dictArgs["possible_variable_names"]) # reduce data along time, here mandatory if ("assigned_time" in datamodel.dims) and (len(datamodel["assigned_time"]) > 1): warnings.warn("input dataset has more than one time record, " + "performing non-weighted average") datamodel = simple_average(datamodel, "assigned_time") if ("assigned_time" in dataobs.dims) and len(dataobs["assigned_time"]) > 1: warnings.warn("reference dataset has more than one time record, " + "performing non-weighted average") dataobs = simple_average(dataobs, "assigned_time") datamodel = datamodel.squeeze() dataobs = dataobs.squeeze() # check final data is 3d assert len(datamodel.dims) == 3 assert len(dataobs.dims) == 3 # check consistency of coordinates assert np.allclose(datamodel["assigned_lon"], dataobs["assigned_lon"]) assert np.allclose(datamodel["assigned_lat"], dataobs["assigned_lat"]) # homogeneize coords dataobs = copy_coordinates(datamodel, dataobs, ["assigned_lon", "assigned_lat"]) # restrict model to where obs exists datamodel = datamodel.where(dataobs) # dump values model = datamodel.to_masked_array() obs = dataobs.to_masked_array() x = datamodel["assigned_lon"].values y = datamodel["assigned_lat"].values z = datamodel["assigned_depth"].values # convert z to negative values z = z * -1 # compute area if "areacello" in dsmodel.variables: area = dsmodel["areacello"].values else: if (model.shape[-2], model.shape[-1]) == (180, 360): area = compute_area_regular_grid(dsmodel) else: raise IOError("no cell area provided") # date range dates = date_range(dsmodel) return y, z, depth, area, codes, model, obs, dates
def read( dictArgs, tempvar="thetao", saltvar="so", argo_xcoord="lon", argo_ycoord="lat", argo_tvar="ptemp", argo_svar="salt", argo_pvar="pres", ): """Read required fields Parameters ---------- dictArgs : dict Dictionary containing argparse options tempvar : str, optional Name of potentital temperature variable, by default "thetao" saltvar : str, optional Name of practical salinity variable, by default "so" argo_xcoord : str, optional Name of longitude coordinate in obs dataset, by default "lon" argo_ycoord : str, optional Name of latitude coordinate in obs dataset, by default "lat" argo_tvar : str, optional Name of temperature variable in obs dataset, by default "ptemp" argo_svar : str, optional Name of salinity variable in obs dataset, by default "salt" argo_pvar : str, optional Name of pressure variable in obs dataset, by default "pres" Returns ------- xarray.DataSet """ model_xcoord = dictArgs["model_xcoord"] model_ycoord = dictArgs["model_ycoord"] model_zcoord = dictArgs["model_zcoord"] model = xr.open_mfdataset(dictArgs["infile"], use_cftime=True) model = xr.Dataset( { "temp": model[tempvar].mean(dim="time"), "salt": model[saltvar].mean(dim="time"), } ) # clean up and standardize model = model.squeeze() model = model.reset_coords(drop=True) model = model.rename({model_xcoord: "lon", model_ycoord: "lat"}) # ----- read the Argo data if dictArgs["argo_file"] is not None: argo_dset = xr.open_dataset(dictArgs["argo_file"], decode_times=False,) else: # use dataset from catalog, either from command line or default cat = open_intake_catalog(dictArgs["platform"], "obs") argo_dset = cat["Argo_Climatology"].to_dask() # standardize longitude to go from 0 to 360. argo_dset = standardize_longitude(argo_dset, argo_xcoord) # subset varaibles argo_dset = xr.Dataset( { "temp": argo_dset[argo_tvar], "salt": argo_dset[argo_svar], "pres": argo_dset[argo_pvar], } ) # clean up and standardize argo_dset = argo_dset.squeeze() argo_dset = argo_dset.reset_coords(drop=True) argo_dset = argo_dset.rename({argo_xcoord: "lon", argo_ycoord: "lat"}) # ----- read the WOA data if dictArgs["obsfile"] is not None: # priority to user-provided obs file dsobs = xr.open_mfdataset( dictArgs["obsfile"], combine="by_coords", decode_times=False ) else: # use dataset from catalog, either from command line or default cat = open_intake_catalog(dictArgs["platform"], "obs") dsobs = cat[dictArgs["dataset"]].to_dask() woa = xr.Dataset({"temp": dsobs["ptemp"], "salt": dsobs["salinity"],}) woa = woa.squeeze() woa = woa.reset_coords(drop=True) return model, woa, argo_dset
def plot(dictArgs, yq, trans_global, trans_atlantic, trans_pacific, dates=None): # Load observations for plotting GW = GWObs() cat = open_intake_catalog(dictArgs["platform"], "obs") fObs = cat["Trenberth_and_Caron"].to_dask() yobs = fObs.ylat.to_masked_array() NCEP_Global = fObs.OTn.to_masked_array() NCEP_Atlantic = fObs.ATLn.to_masked_array() NCEP_IndoPac = fObs.INDPACn.to_masked_array() ECMWF_Global = fObs.OTe.to_masked_array() ECMWF_Atlantic = fObs.ATLe.to_masked_array() ECMWF_IndoPac = fObs.INDPACe.to_masked_array() fig = plt.figure(figsize=(6, 10)) # Global Heat Transport ax1 = plt.subplot(3, 1, 1) plt.plot(yq, yq * 0.0, "k", linewidth=0.5) plt.plot(yq, trans_global, "r", linewidth=1.5, label="Model") GW.gbl.annotate(ax1) plt.plot(yobs, NCEP_Global, "k--", linewidth=0.5, label="NCEP") plt.plot(yobs, ECMWF_Global, "k.", linewidth=0.5, label="ECMWF") plt.ylim(-2.5, 3.0) plt.grid(True) plt.legend(loc=2, fontsize=10) ax1.text( 0.01, 1.02, "a. Global Poleward Heat Transport", ha="left", transform=ax1.transAxes, ) if dates is not None: assert isinstance(dates, tuple), "Year range should be provided as a tuple." datestring = f"Years {dates[0]} - {dates[1]}" ax1.text(0.98, 1.02, datestring, ha="right", fontsize=10, transform=ax1.transAxes) # if diffusive is None: annotatePlot('Warning: Diffusive component of transport is missing.') # Atlantic Heat Transport ax2 = plt.subplot(3, 1, 2) plt.plot(yq, yq * 0.0, "k", linewidth=0.5) trans_atlantic[yq <= -33] = np.nan plt.plot(yq, trans_atlantic, "r", linewidth=1.5, label="Model") GW.atl.annotate(ax2) plt.plot(yobs, NCEP_Atlantic, "k--", linewidth=0.5, label="NCEP") plt.plot(yobs, ECMWF_Atlantic, "k.", linewidth=0.5, label="ECMWF") plt.ylim(-0.5, 2.0) plt.grid(True) ax2.text( 0.01, 1.02, "b. Atlantic Poleward Heat Transport", ha="left", transform=ax2.transAxes, ) # Indo-pacific Heat Transport ax3 = plt.subplot(3, 1, 3) plt.plot(yq, yq * 0.0, "k", linewidth=0.5) trans_pacific[yq <= -33] = np.nan plt.plot(yq, trans_pacific, "r", linewidth=1.5, label="Model") GW.indpac.annotate(ax3) plt.plot(yobs, NCEP_IndoPac, "k--", linewidth=0.5, label="NCEP") plt.plot(yobs, ECMWF_IndoPac, "k.", linewidth=0.5, label="ECMWF") plt.ylim(-2.5, 1.5) plt.grid(True) plt.xlabel(r"Latitude [$\degree$N]") ax3.text( 0.01, 1.02, "c. Indo-Pacific Poleward Heat Transport", ha="left", transform=ax3.transAxes, ) plt.subplots_adjust(hspace=0.3) # Annotations fig.text( 0.05, 0.05, r"Trenberth, K. E. and J. M. Caron, 2001: Estimates of Meridional Atmosphere and Ocean Heat Transports. J.Climate, 14, 3433-3443.", fontsize=6, ) fig.text( 0.05, 0.04, r"Ganachaud, A. and C. Wunsch, 2000: Improved estimates of global ocean circulation, heat transport and mixing from hydrographic data.", fontsize=6, ) fig.text(0.05, 0.03, r"Nature, 408, 453-457", fontsize=6) if dictArgs["label"] is not None: plt.suptitle(dictArgs["label"]) # HTplot = heatTrans(advective, diffusive, vmask=m*numpy.roll(m,-1,axis=-2)) return fig
def read(dictArgs): """Function to read in the data. Returns xarray datasets""" infile = dictArgs["infile"] # Open ice model output and the static file ds = xr.open_mfdataset(infile, combine="by_coords") if "siconc" in ds.variables: ds["CN"] = ds["siconc"] if ds["CN"].max() > 1.0: ds["CN"] = ds["CN"] / 100.0 else: ds["CN"] = ds["CN"].sum(dim="ct") # Detect if we are native grid or 1x1 if (ds.CN.shape[-2] == 180) and (ds.CN.shape[-1] == 360): standard_grid = True else: standard_grid = False if dictArgs["config"] is not None: # use dataset from catalog, either from command line or default cat = open_intake_catalog(dictArgs["platform"], dictArgs["config"]) if standard_grid is True: dstatic = cat["ice_static_1x1"].to_dask() else: dstatic = cat["ice_static"].to_dask() # Override static file if provided if dictArgs["static"] is not None: dstatic = xr.open_dataset(dictArgs["static"]) # Append static fields to the return Dataset if standard_grid is True: _lon = np.array(dstatic["lon"].to_masked_array()) _lat = np.array(dstatic["lat"].to_masked_array()) X, Y = np.meshgrid(_lon, _lat) ds["GEOLON"] = xr.DataArray(X, dims=["lat", "lon"]) ds["GEOLAT"] = xr.DataArray(Y, dims=["lat", "lon"]) _AREA = standard_grid_cell_area(_lat, _lon) _MASK = np.array(dstatic["mask"].fillna(0.0).to_masked_array()) _AREA = _AREA * _MASK ds["CELL_AREA"] = xr.DataArray(_AREA, dims=["lat", "lon"]) ds["AREA"] = xr.DataArray(_AREA, dims=["lat", "lon"]) ds = ds.rename({"lon": "x", "lat": "y"}) else: ds["CELL_AREA"] = dstatic["CELL_AREA"] ds["GEOLON"] = dstatic["GEOLON"] ds["GEOLAT"] = dstatic["GEOLAT"] ds["AREA"] = dstatic["CELL_AREA"] * 4.0 * np.pi * (6.378e6 ** 2) # Get Valid Mask valid_mask = np.where(ds["CELL_AREA"] == 0.0, True, False) # Open observed SIC on 25-km EASE grid (coords already named lat and lon) if dictArgs["obsfile"] is not None: dobs = xr.open_dataset(dictArgs["obsfile"]) else: cat = open_intake_catalog(dictArgs["platform"], "obs") dobs = cat[f"NSIDC_{dictArgs['region'].upper()}_monthly"].to_dask() # Close the static file (no longer used) dstatic.close() return ds, dobs, valid_mask
def read(dictArgs): """read data from model and obs files, process data and return it""" dsmodel = xr.open_mfdataset(dictArgs["infile"], combine="by_coords", use_cftime=True) if dictArgs["obsfile"] is not None: # priority to user-provided obs file dsobs = xr.open_mfdataset(dictArgs["obsfile"], combine="by_coords", decode_times=False) else: # use dataset from catalog, either from command line or default cat = open_intake_catalog(dictArgs["platform"], "obs") dsobs = cat[dictArgs["dataset"]].to_dask() # read in model and obs data datamodel = read_data(dsmodel, dictArgs["possible_variable_names"]) dataobs = read_data(dsobs, dictArgs["possible_variable_names"]) # subset data if dictArgs["depth"] is None: dictArgs["depth"] = dictArgs["surface_default_depth"] if dictArgs["depth"] is not None: datamodel = subset_data(datamodel, "assigned_depth", dictArgs["depth"]) dataobs = subset_data(dataobs, "assigned_depth", dictArgs["depth"]) # reduce data along depth (not yet implemented) if "depth_reduce" in dictArgs: if dictArgs["depth_reduce"] == "mean": # do mean pass elif dictArgs["depth_reduce"] == "sum": # do sum pass # reduce data along time, here mandatory if ("assigned_time" in datamodel.dims) and (len(datamodel["assigned_time"]) > 1): warnings.warn("input dataset has more than one time record, " + "performing non-weighted average") datamodel = simple_average(datamodel, "assigned_time") if ("assigned_time" in dataobs.dims) and len(dataobs["assigned_time"]) > 1: warnings.warn("reference dataset has more than one time record, " + "performing non-weighted average") dataobs = simple_average(dataobs, "assigned_time") datamodel = datamodel.squeeze() dataobs = dataobs.squeeze() # check final data is 2d assert len(datamodel.dims) == 2 assert len(dataobs.dims) == 2 # check consistency of coordinates assert np.allclose(datamodel["assigned_lon"], dataobs["assigned_lon"]) assert np.allclose(datamodel["assigned_lat"], dataobs["assigned_lat"]) # homogeneize coords dataobs = copy_coordinates(datamodel, dataobs, ["assigned_lon", "assigned_lat"]) # restrict model to where obs exists datamodel = datamodel.where(dataobs) # dump values model = datamodel.to_masked_array() obs = dataobs.to_masked_array() x = datamodel["assigned_lon"].values y = datamodel["assigned_lat"].values # compute area if "areacello" in dsmodel.variables: area = dsmodel["areacello"].values else: if model.shape == (180, 360): area = compute_area_regular_grid(dsmodel) else: raise IOError("no cell area provided") # date range dates = date_range(dsmodel) return x, y, area, model, obs, dates