def get_seaflux_data(catalog_name=catalog_name, dest=_dest, n_jobs=1, verbose=False): """Downloads SeaFlux data from Zenodo using the default yaml file containing the paths to the latest SeaFlux data. The data is downloaded and then combined. You can create your own yaml file to customise the files you want to access.""" from datetime import datetime as dt import fetch_data as fd import xarray as xr from . import config from .utils import preprocess cat = fd.read_catalog(catalog_name) key = list(cat.keys())[0] entry = cat[key] entry["dest"] = dest flist = fd.download(**entry, n_jobs=n_jobs, verbose=verbose) xds = xr.open_mfdataset(flist, preprocess=preprocess()) xds = xds.assign_attrs(product_name="SeaFlux", product_version=config.version, date_accessed=dt.now().strftime("%Y-%m-%d"), contact=config.contact, **entry["meta"]) return xds
def get_jena_mls(entry): """processes data""" flist = download(**entry) xds = xr.open_mfdataset(flist) xda = xds.pCO2.resample(mtime="1MS").mean("mtime") xda = xda.rename("jena_mls") xda = (xda.interp( lat=np.arange(-89.5, 90), lon=np.arange(-179.5, 180), method="nearest" ).roll(lon=180, roll_coords=False).interpolate_na( "lon", limit=20 ).roll( lon=-180, roll_coords=False ).rename( mtime="time" ).assign_attrs( units="uatm", source=entry["url"], ** entry["meta"], history=( "[Seaflux] resampled from daily to monthly and " "interpolated to 1 degree using nearest neighbour interpolation" ), )) xda = preprocess()(xda) return xda
def get_jma_mlr(entry): """processes data""" def decode_time(xds): """processes data""" import pandas as pd from seaflux.data.utils import add_history time = xds.time unit = time.attrs.get("units") year = pd.to_datetime(unit.split()[-1]).year y0, y1 = str(year), str(year + 1) time = pd.date_range(y0, y1, freq="1MS", closed="left") xds = xds.assign_coords(time=time) xds = add_history(xds, "decode times manually") return xds flist = download(**entry, n_jobs=8) xda = xr.open_mfdataset(flist, decode_times=False, preprocess=preprocess(decode_time)).pCO2s xda = xda.assign_attrs(units="uatm") return xda
def calc_seafrac(process_dest="../data/processed/etopo1_seafrac.nc", ): from fetch_data import download from numpy import arange from xarray import open_mfdataset fname = download( url=("https://www.ngdc.noaa.gov/mgg/global/relief/ETOPO1/data/" "ice_surface/cell_registered/netcdf/ETOPO1_Ice_c_gmt4.grd.gz"), dest="../data/raw/", verbose=True, ) ds = open_mfdataset(fname).rename(x="lon", y="lat", z="topography") sea = ds.topography < 0 seafrac = sea.coarsen(lat=60, lon=60).sum().compute() / 60**2 seafrac = seafrac.assign_coords(lat=arange(-89.5, 90), lon=arange(-179.5, 180)).rename("seafrac") seafrac.attrs = dict( description= "Fraction of pixel that is covered by ocean. Calculated from ETOPO1. ", unit="frac", ) seafrac.to_netcdf(process_dest) return process_dest
def get_nies_fnn(entry): """processes data""" from warnings import filterwarnings from fetch_data import read_catalog from ..fco2_pco2_conversion import fCO2_to_pCO2 from .aux_vars import download_era5_slp, download_sst_ice from .utils import add_history filterwarnings("ignore", category=RuntimeWarning) def decode_time(xds): """processes data""" import pandas as pd from datetime_matcher import DatetimeMatcher re_date = DatetimeMatcher() fname = xds.encoding["source"] datetime = re_date.extract_datetime("flux.%Y.ver", fname) year = pd.Timestamp(datetime).year y0, y1 = str(year), str(year + 1) time = pd.date_range(y0, y1, freq="1MS", closed="left") xds = xds.rename(month="time").assign_coords(time=time) xds = add_history(xds, "decode times manually") return xds flist = download(**entry) xda = xr.open_mfdataset(flist, preprocess=preprocess(decode_time)).fco2 aux_cat = read_catalog("../data/aux_data.yml") t0, t1 = [str(s) for s in xda.time.values[[0, -1]]] sst = xr.open_dataset(download_sst_ice( aux_cat["oisst_v2"]))["sst"].sel(time=slice(t0, t1)) msl = xr.open_dataset( download_era5_slp())["sp"].sel(time=slice(t0, t1)) / 100 pco2 = xr.DataArray( fCO2_to_pCO2(xda, sst, msl), coords=xda.coords, dims=xda.dims, attrs=dict(units="uatm", source=entry["url"], **entry["meta"]), ) pco2 = add_history( pco2, "re-shaped data from [year month lat lon] to [time lat lon].") pco2 = add_history( pco2, "converted fCO2 to pCO2 using OISST v2.1, and ERA5 MSLP") return pco2
def get_seamask(entry): """processes data""" flist = download(**entry) xds = xr.open_mfdataset(flist, preprocess=preprocess()) xda = xds.seamask.assign_attrs(**entry["meta"]) return xda
def get_csir_ml6(entry): """processes data""" flist = download(**entry) xds = xr.open_mfdataset(flist, preprocess=preprocess()) xds = xds["spco2"].assign_attrs(units="uatm", source=entry["url"], **entry["meta"]) return xds
def get_mpi_somffn(entry): """processes data""" flist = download(**entry) xda = xr.open_mfdataset(flist, drop_variables="date").spco2_raw xda = xda.rename("mpi_somffn").assign_attrs(units="uatm", source=entry["url"], **entry["meta"]) xda = preprocess()(xda) return xda
def get_mpi_ulb_somffn(entry): """processes data""" flist = download(**entry) xds = xr.open_mfdataset(flist) xda = xds.pco2.where(xds.pco2 > 0).coarsen(lat=4, lon=4).mean() xda = xda.rename("mpiulb_somffn").rename(time="month") pp = preprocess(rename_coordinates=False, center_months=False) xda = pp(xda) return xda
def get_cmems_ffnn(entry): """processes data""" flist = download(**entry, n_jobs=8) xds = xr.open_mfdataset(flist, combine="nested", concat_dim="time") xda = ((xds.spco2 * 9.867).assign_coords( longitude=(xds.longitude - 180) % 360 - 180).rename( latitude="lat", longitude="lon").resample( time="1MS").mean().sortby("lon").assign_attrs( units="uatm", source=entry["url"], **entry["meta"])) xda = preprocess()(xda) return xda
def process_item(item): if not item.startswith('http'): return item + '\n' # the server will give EVERYTHING if empty if item.endswith('='): return '[{0}Format{2}] {0}{1}{2}\n'.format(colorama.Fore.RED, item, colorama.Style.RESET_ALL) try: data = fetch_data.download(item) except: return '[{0}Retrive{2}] {0}{1}{2}\n'.format(colorama.Fore.RED, item, colorama.Style.RESET_ALL) result = parse_data(data) if result == '[404]': result = '[{0}Not found{2}] {0}{1}{2}\n'.format(colorama.Fore.RED, item, colorama.Style.RESET_ALL) return result
def download_salinity( catalog_entry, verbose=True, process_dest="../data/processed/en4_salt_temp.nc", ): """Downloads salinity from MetOffice for 1982 until today""" import xarray as xr from fetch_data import download from .utils import preprocess if path(process_dest).is_file(): return process_dest flist = download(**catalog_entry, verbose=verbose) ds = preprocess()(xr.open_mfdataset(paths=flist)[["salinity"]].sel( depth=0, method="nearest").drop("depth")) encode = {k: dict(zlib=True, complevel=4) for k in ds} ds.load().to_netcdf(process_dest, encoding=encode) return process_dest
def download_sst_ice( catalog_entry, process_dest="../data/processed/noaa_oisst_sst_icec.nc", ): """Downloads OISSTv2 data from NOAA""" import xarray as xr from fetch_data import download from .utils import preprocess if path(process_dest).is_file(): return process_dest flist = download(**catalog_entry) ds = (xr.open_mfdataset(paths=flist, preprocess=preprocess()).where( lambda a: a.icec.notnull()).drop("time_bnds")) ds.to_netcdf(process_dest, encoding={k: dict(zlib=True, complevel=4) for k in ds}) return process_dest
def get_jra55_wind_speed( url="leave empty - replaced in function", # for transparency download_dest="../data/raw/jra_55/{file_format}/{year}", process_dest="../data/processed/jra55_wind_speed_moments.nc", years=range(1982, 2021), verbose=False, n_jobs=8, ): """ TODO: add readme to netCDF folder. TODO: add readme to """ from pathlib import Path as path from dask.diagnostics import ProgressBar from fetch_data import download from fetch_data.core import create_download_readme from fetch_data.utils import commong_substring from pandas import Timestamp from xarray import concat years = list(years) process_dest = p = path(process_dest) process_dest = p.parent / f"{p.stem}_{years[0]}-{years[-1]}{p.suffix}" if path(process_dest).is_file(): return process_dest else: print(f"File does not exist: {process_dest}") cookies = RDAMScookies().get_cookies() grib_names = [] for y in years: t0 = Timestamp(f"{y}") t1 = Timestamp(f"{y+1}") grib_names += download( # JRA URLs switch from annual to monthly in 2014 url=make_jra_6hrly_urls(t0=t0, t1=t1), dest=download_dest.format( year=y, file_format="grib"), # store the data per year login=dict(cookies=cookies), verbose=verbose, n_jobs=n_jobs, log_name="../downloading.log", readme_fname="../README.txt", meta=jra_meta, ) # replace the path '/grib/' with netcdf for he conversion netcdf_names = [ f.replace("/grib/", "/netcdf/") + ".nc" for f in grib_names ] # the function grib_to_netcdf has been made to run in parallel with the decorator flist = grib_to_netcdf(grib_names, netcdf_names, n_jobs=n_jobs) jra_meta["processing"] = ( "Data has been converted from grib file format to netCDF4 using the cfgrib " "package. Variables without dimensions have been dropped. ") jra_meta["variables"] = "u10, v10" jra_meta["grib_source"] = download_dest.format(year="YYYY", file_format="grib") create_download_readme( "README.md", url=commong_substring(grib_names) + "...", dest=str( path(download_dest.format(year="YYYY", file_format="netcdf")).parent), meta=jra_meta, ) # we get the folders for each year folders = sorted(list(set([path(f).parent for f in flist]))) xds = [] for folder in folders: # list nc files - assumes u10 and v10 in the folder ylist = list(folder.glob("*.nc")) xds += (calculate_wind_speed(ylist), ) with ProgressBar(): wind_speed = concat(xds, "time").load() process_dest = path(process_dest) process_dest.parent.mkdir(exist_ok=True, parents=True) jra_meta["processing"] = ( "Data has been converted from grib file format to netCDF4)." "u10 and v10 data was loaded and the wind_speed was calculated with " "(u10^2 + v10^2)^0.5. The first, second, and third moments " "(wind_speed^n) were calculated from wind_speed. Note that these " "variables were calculated at the model resolution and then scaled " "up to monthly by 1 degree to preserve the variability that would " "otherwise be lost in the squared function. ") jra_meta["variables"] = "wind_speed, wind_speed^2, wind_speed^3" jra_meta["netcdf_source"] = download_dest.format(year="YYYY", file_format="netcdf") wind_speed.attrs = jra_meta wind_speed.to_netcdf( str(process_dest), encoding={k: { "complevel": 4, "zlib": True } for k in wind_speed}, ) return str(process_dest)