def get_seaflux_data(catalog_name=catalog_name, dest=_dest, n_jobs=1, verbose=False): """Downloads SeaFlux data from Zenodo using the default yaml file containing the paths to the latest SeaFlux data. The data is downloaded and then combined. You can create your own yaml file to customise the files you want to access.""" from datetime import datetime as dt import fetch_data as fd import xarray as xr from . import config from .utils import preprocess cat = fd.read_catalog(catalog_name) key = list(cat.keys())[0] entry = cat[key] entry["dest"] = dest flist = fd.download(**entry, n_jobs=n_jobs, verbose=verbose) xds = xr.open_mfdataset(flist, preprocess=preprocess()) xds = xds.assign_attrs(product_name="SeaFlux", product_version=config.version, date_accessed=dt.now().strftime("%Y-%m-%d"), contact=config.contact, **entry["meta"]) return xds
def test_read_catalog(): fname = "./tests/example_catalog.yml" cat = fd.read_catalog(fname) assert isinstance(cat, dict) assert cat != {}
def test_make_readme(): fname = "./tests/example_catalog.yml" cat = fd.read_catalog(fname) for key in cat: cat[key]["name"] = key.upper().replace("_", " ") fd.core.create_download_readme("README.txt", **cat[key])
def get_nies_fnn(entry): """processes data""" from warnings import filterwarnings from fetch_data import read_catalog from ..fco2_pco2_conversion import fCO2_to_pCO2 from .aux_vars import download_era5_slp, download_sst_ice from .utils import add_history filterwarnings("ignore", category=RuntimeWarning) def decode_time(xds): """processes data""" import pandas as pd from datetime_matcher import DatetimeMatcher re_date = DatetimeMatcher() fname = xds.encoding["source"] datetime = re_date.extract_datetime("flux.%Y.ver", fname) year = pd.Timestamp(datetime).year y0, y1 = str(year), str(year + 1) time = pd.date_range(y0, y1, freq="1MS", closed="left") xds = xds.rename(month="time").assign_coords(time=time) xds = add_history(xds, "decode times manually") return xds flist = download(**entry) xda = xr.open_mfdataset(flist, preprocess=preprocess(decode_time)).fco2 aux_cat = read_catalog("../data/aux_data.yml") t0, t1 = [str(s) for s in xda.time.values[[0, -1]]] sst = xr.open_dataset(download_sst_ice( aux_cat["oisst_v2"]))["sst"].sel(time=slice(t0, t1)) msl = xr.open_dataset( download_era5_slp())["sp"].sel(time=slice(t0, t1)) / 100 pco2 = xr.DataArray( fCO2_to_pCO2(xda, sst, msl), coords=xda.coords, dims=xda.dims, attrs=dict(units="uatm", source=entry["url"], **entry["meta"]), ) pco2 = add_history( pco2, "re-shaped data from [year month lat lon] to [time lat lon].") pco2 = add_history( pco2, "converted fCO2 to pCO2 using OISST v2.1, and ERA5 MSLP") return pco2
def __init__(self, catalog_fname, verbose=True): """ An object that downloads, reads in, homogenizes and combines surface ocean pCO2 products. The catalog must contain entries that match the functions in the SOCOM ensemble object. The catalog contains the urls to where the data can be downloaded. Further, the object contains functions are tailormade to homogenise the data sets so that they can easily be worked with. A default list of ensemble members is stored under ``self.members`` which excludes the climatology and sea mask. The climatology name is stored under ``self.climatology``. The data can be accessed under self.data, which will load the dataset from memory if loaded, from a file if it exists, process each individual product, or download the products. A full pipeline! The catalog is not stored on GitHub as it contains passwords. Contact Luke for access to this catalog. """ from fetch_data import read_catalog self.catalog_fname = catalog_fname self.cat = read_catalog(catalog_fname) self.members = [ "jena_mls", "mpi_somffn", "cmems_ffnn", "csir_ml6", "not there", "nies_fnn", "jma_mlr", ] self.climatology = "mpi_ulb_somffn" missing = [key for key in self.members if key not in self.cat] if any(missing): for m in missing: self.members.remove(m) self._data = None self.verbose = verbose self.aux_catalog_name = "../data/aux_data.yml" print("[SeaFlux] Default ensemble members in catalog:", ", ".join(self.members))
def solubility(aux_catalog_fname, dest="../data/output/"): """Computes SeaFlux solubility from SST, Salt and Pres""" import xarray as xr from fetch_data import read_catalog from ..solubility import solubility_weiss1974 from .utils import save_seaflux cat = read_catalog(aux_catalog_fname) print("[SeaFlux] fetching SST, Salinity, and sea-level pressure") ds = xr.merge([ xr.open_dataset(download_sst_ice(cat["oisst_v2"])).sst.rename("temp") + 273.15, xr.open_dataset(download_salinity( cat["en4_g10"])).salinity.rename("salt"), xr.open_dataset( download_era5_slp( download_dest=cat["era5_mslp"]["dest"])).sp.rename("mslp") / 101325, ]) time_mask = ds.to_array("tmp").notnull().all("tmp").any(["lat", "lon"]) ds = ds.where(time_mask, drop=True) # unit analysis # mol / L / atm --> mol / m3 / uatm # mol . L-1 . atm-1 * (1e3L . m-3) * (1e-6 atm . uatm-1) = * 1e-3 print("[SeaFlux] calculating solubility using Weiss (1974)") arr = solubility_weiss1974(ds.salt, ds.temp, press_atm=ds.mslp) * 1e-3 sol = xr.DataArray( data=arr, coords=ds.temp.coords, dims=ds.temp.dims, attrs={ "description": "CO2 solubility in seawater using the formulation of Weiss 1974", "units": "mol/m3/uatm", "long_name": "CO2 solubility in seawater", }, ) sname = save_seaflux(sol, dest, "sol") return sname
def area(aux_catalog_fname, dest="../data/output/"): """Computes the area of the SeaFlux grid cells""" import xarray as xr from fetch_data import read_catalog from ..area import get_area_from_dataset from .utils import save_seaflux cat = read_catalog(aux_catalog_fname) ds = xr.open_mfdataset(download_sst_ice( cat["oisst_v2"])).sst.rename("temp") area = get_area_from_dataset(ds) sname = save_seaflux(area, dest, "area") return sname
def sea_ice_cover(aux_catalog_fname, dest="../data/output/"): """Calculates SeaFlux sea ice cover as a fraction""" import xarray as xr from fetch_data import read_catalog from .utils import save_seaflux cat = read_catalog(aux_catalog_fname) fname = download_sst_ice(cat["oisst_v2"]) variable = "ice" ice = xr.open_mfdataset(fname)["icec"].rename(variable) / 100 ice = ice.sel(time=slice("1982", None)) sname = save_seaflux(ice, dest, variable) return sname
def get_zenodo_catalog(): """fetches the default catalog and returns as a dictionary. The dictionary is presented as a YAML file if you are using IPython/Jupyter""" import fetch_data as fd return fd.read_catalog(catalog_name)
def main( noaa_mbl_url, download_dest="../data/raw/", aux_catalog_name="../data/aux_data.yml", processed_dest="../data/processed/", output_dest="../data/output/", ): """to be called when creating the atmospheric pCO2""" import xarray as xr from fetch_data import read_catalog from pandas import Timestamp from .aux_vars import download_era5_slp, download_salinity, download_sst_ice from .utils import center_time_on_15th, preprocess, save_seaflux if path(output_dest).is_file(): return output_dest cat = read_catalog(aux_catalog_name) salt = download_salinity(cat["en4_g10"], f"{processed_dest}/en4_salt_temp.nc") temp = download_sst_ice(cat["oisst_v2"], f"{processed_dest}/noaa_oisst_sst_icec.nc") pres = download_era5_slp( download_dest=cat["era5_mslp"]["dest"], process_dest=f"{processed_dest}/era5_mslp_monthly.nc", ) ds = xr.merge( [ xr.open_dataset(salt)["salinity"].rename("saltPSU"), xr.open_dataset(temp)["sst"].rename("tempC"), xr.open_dataset(pres)["sp"].rename("presPa"), ] ) noaa_mbl_xco2 = ( download_noaa_mbl( noaa_mbl_url, download_dest=f"{download_dest}/co2_GHGreference_surface.txt", target_lat=ds.lat.values, target_lon=ds.lon.values, ) .resample(time="1MS") .mean() ) t0, t1 = ds.time.values[[0, -1]] noaa_mbl_xco2 = center_time_on_15th(noaa_mbl_xco2).sel(time=slice(t0, t1)) t0, t1 = noaa_mbl_xco2.time.values[[0, -1]] ds = ds.sel(time=slice(t0, t1)) atm_pco2 = atm_xCO2_to_pCO2( noaa_mbl_xco2, ds.presPa.where(ds.tempC.notnull()) / 100, ds.tempC, ds.saltPSU ) atm_pco2 = preprocess()( xr.DataArray( data=atm_pco2, dims=ds.tempC.dims, coords=ds.tempC.coords, name="pco2atm", attrs=dict( long_name=( "partial_pressure_of_carbon_dioxide_in_the_marine_boundary_layer" ), short_name="pco2atm", units="uatm", description=( "Atmospheric pCO2 for the marine boundary layer is calculated " "from the NOAAs marine boundary layer pCO2 with: xCO2 * (Patm " "- pH2O). Where pH2O is calculated using vapour pressure from " "Dickson et al. (2007)" ), history=( getattr(noaa_mbl_xco2, "history", "").strip(";") + ";\n" f"[SeaFlux @ {Timestamp.today():%Y-%m-%d}] " f"pCO2 calculated from xCO2 * (Patm - pH2O), where " f"pH2O is calculated with Dickson et al. (2007)" ), citation=( "Ed Dlugokencky and Pieter Tans, NOAA/ESRL " "(www.esrl.noaa.gov/gmd/ccgg/trends/)" ), ), ) ) variable = "pco2atm" pco2atm = interpolate_year(atm_pco2).to_dataset(name=variable) sname = save_seaflux(pco2atm, output_dest, variable) return sname