Esempio n. 1
0
def get_seaflux_data(catalog_name=catalog_name,
                     dest=_dest,
                     n_jobs=1,
                     verbose=False):
    """Downloads SeaFlux data from Zenodo using the default yaml file containing
    the paths to the latest SeaFlux data. The data is downloaded and then
    combined. You can create your own yaml file to customise the files you want
    to access."""

    from datetime import datetime as dt

    import fetch_data as fd
    import xarray as xr

    from . import config
    from .utils import preprocess

    cat = fd.read_catalog(catalog_name)
    key = list(cat.keys())[0]
    entry = cat[key]
    entry["dest"] = dest

    flist = fd.download(**entry, n_jobs=n_jobs, verbose=verbose)

    xds = xr.open_mfdataset(flist, preprocess=preprocess())
    xds = xds.assign_attrs(product_name="SeaFlux",
                           product_version=config.version,
                           date_accessed=dt.now().strftime("%Y-%m-%d"),
                           contact=config.contact,
                           **entry["meta"])

    return xds
Esempio n. 2
0
    def get_jena_mls(entry):
        """processes data"""

        flist = download(**entry)

        xds = xr.open_mfdataset(flist)
        xda = xds.pCO2.resample(mtime="1MS").mean("mtime")

        xda = xda.rename("jena_mls")
        xda = (xda.interp(
            lat=np.arange(-89.5, 90),
            lon=np.arange(-179.5, 180),
            method="nearest"
        ).roll(lon=180, roll_coords=False).interpolate_na(
            "lon", limit=20
        ).roll(
            lon=-180,
            roll_coords=False
        ).rename(
            mtime="time"
        ).assign_attrs(
            units="uatm",
            source=entry["url"],
            **
            entry["meta"],
            history=(
                "[Seaflux] resampled from daily to monthly and "
                "interpolated to 1 degree using nearest neighbour interpolation"
            ),
        ))

        xda = preprocess()(xda)

        return xda
Esempio n. 3
0
    def get_jma_mlr(entry):
        """processes data"""
        def decode_time(xds):
            """processes data"""
            import pandas as pd

            from seaflux.data.utils import add_history

            time = xds.time
            unit = time.attrs.get("units")
            year = pd.to_datetime(unit.split()[-1]).year

            y0, y1 = str(year), str(year + 1)
            time = pd.date_range(y0, y1, freq="1MS", closed="left")
            xds = xds.assign_coords(time=time)

            xds = add_history(xds, "decode times manually")

            return xds

        flist = download(**entry, n_jobs=8)

        xda = xr.open_mfdataset(flist,
                                decode_times=False,
                                preprocess=preprocess(decode_time)).pCO2s
        xda = xda.assign_attrs(units="uatm")

        return xda
Esempio n. 4
0
def calc_seafrac(process_dest="../data/processed/etopo1_seafrac.nc", ):
    from fetch_data import download
    from numpy import arange
    from xarray import open_mfdataset

    fname = download(
        url=("https://www.ngdc.noaa.gov/mgg/global/relief/ETOPO1/data/"
             "ice_surface/cell_registered/netcdf/ETOPO1_Ice_c_gmt4.grd.gz"),
        dest="../data/raw/",
        verbose=True,
    )

    ds = open_mfdataset(fname).rename(x="lon", y="lat", z="topography")
    sea = ds.topography < 0

    seafrac = sea.coarsen(lat=60, lon=60).sum().compute() / 60**2
    seafrac = seafrac.assign_coords(lat=arange(-89.5, 90),
                                    lon=arange(-179.5, 180)).rename("seafrac")
    seafrac.attrs = dict(
        description=
        "Fraction of pixel that is covered by ocean. Calculated from ETOPO1. ",
        unit="frac",
    )

    seafrac.to_netcdf(process_dest)

    return process_dest
Esempio n. 5
0
    def get_nies_fnn(entry):
        """processes data"""
        from warnings import filterwarnings

        from fetch_data import read_catalog

        from ..fco2_pco2_conversion import fCO2_to_pCO2
        from .aux_vars import download_era5_slp, download_sst_ice
        from .utils import add_history

        filterwarnings("ignore", category=RuntimeWarning)

        def decode_time(xds):
            """processes data"""
            import pandas as pd

            from datetime_matcher import DatetimeMatcher

            re_date = DatetimeMatcher()

            fname = xds.encoding["source"]
            datetime = re_date.extract_datetime("flux.%Y.ver", fname)
            year = pd.Timestamp(datetime).year

            y0, y1 = str(year), str(year + 1)
            time = pd.date_range(y0, y1, freq="1MS", closed="left")

            xds = xds.rename(month="time").assign_coords(time=time)

            xds = add_history(xds, "decode times manually")

            return xds

        flist = download(**entry)
        xda = xr.open_mfdataset(flist, preprocess=preprocess(decode_time)).fco2

        aux_cat = read_catalog("../data/aux_data.yml")

        t0, t1 = [str(s) for s in xda.time.values[[0, -1]]]
        sst = xr.open_dataset(download_sst_ice(
            aux_cat["oisst_v2"]))["sst"].sel(time=slice(t0, t1))
        msl = xr.open_dataset(
            download_era5_slp())["sp"].sel(time=slice(t0, t1)) / 100

        pco2 = xr.DataArray(
            fCO2_to_pCO2(xda, sst, msl),
            coords=xda.coords,
            dims=xda.dims,
            attrs=dict(units="uatm", source=entry["url"], **entry["meta"]),
        )

        pco2 = add_history(
            pco2,
            "re-shaped data from [year month lat lon] to [time lat lon].")
        pco2 = add_history(
            pco2, "converted fCO2 to pCO2 using OISST v2.1, and ERA5 MSLP")

        return pco2
Esempio n. 6
0
    def get_seamask(entry):
        """processes data"""

        flist = download(**entry)
        xds = xr.open_mfdataset(flist, preprocess=preprocess())

        xda = xds.seamask.assign_attrs(**entry["meta"])

        return xda
Esempio n. 7
0
    def get_csir_ml6(entry):
        """processes data"""

        flist = download(**entry)
        xds = xr.open_mfdataset(flist, preprocess=preprocess())

        xds = xds["spco2"].assign_attrs(units="uatm",
                                        source=entry["url"],
                                        **entry["meta"])

        return xds
Esempio n. 8
0
    def get_mpi_somffn(entry):
        """processes data"""

        flist = download(**entry)

        xda = xr.open_mfdataset(flist, drop_variables="date").spco2_raw
        xda = xda.rename("mpi_somffn").assign_attrs(units="uatm",
                                                    source=entry["url"],
                                                    **entry["meta"])

        xda = preprocess()(xda)

        return xda
Esempio n. 9
0
    def get_mpi_ulb_somffn(entry):
        """processes data"""

        flist = download(**entry)

        xds = xr.open_mfdataset(flist)
        xda = xds.pco2.where(xds.pco2 > 0).coarsen(lat=4, lon=4).mean()
        xda = xda.rename("mpiulb_somffn").rename(time="month")

        pp = preprocess(rename_coordinates=False, center_months=False)

        xda = pp(xda)
        return xda
Esempio n. 10
0
    def get_cmems_ffnn(entry):
        """processes data"""

        flist = download(**entry, n_jobs=8)

        xds = xr.open_mfdataset(flist, combine="nested", concat_dim="time")
        xda = ((xds.spco2 * 9.867).assign_coords(
            longitude=(xds.longitude - 180) % 360 - 180).rename(
                latitude="lat", longitude="lon").resample(
                    time="1MS").mean().sortby("lon").assign_attrs(
                        units="uatm", source=entry["url"], **entry["meta"]))

        xda = preprocess()(xda)

        return xda
Esempio n. 11
0
def process_item(item):
    if not item.startswith('http'):
        return item + '\n'

    # the server will give EVERYTHING if empty
    if item.endswith('='):
        return '[{0}Format{2}] {0}{1}{2}\n'.format(colorama.Fore.RED, item, colorama.Style.RESET_ALL)

    try:
        data = fetch_data.download(item)
    except:
        return '[{0}Retrive{2}] {0}{1}{2}\n'.format(colorama.Fore.RED, item, colorama.Style.RESET_ALL)

    result = parse_data(data)
    if result == '[404]':
        result = '[{0}Not found{2}] {0}{1}{2}\n'.format(colorama.Fore.RED, item, colorama.Style.RESET_ALL)
    return result
Esempio n. 12
0
def download_salinity(
    catalog_entry,
    verbose=True,
    process_dest="../data/processed/en4_salt_temp.nc",
):
    """Downloads salinity from MetOffice for 1982 until today"""
    import xarray as xr

    from fetch_data import download

    from .utils import preprocess

    if path(process_dest).is_file():
        return process_dest

    flist = download(**catalog_entry, verbose=verbose)
    ds = preprocess()(xr.open_mfdataset(paths=flist)[["salinity"]].sel(
        depth=0, method="nearest").drop("depth"))

    encode = {k: dict(zlib=True, complevel=4) for k in ds}
    ds.load().to_netcdf(process_dest, encoding=encode)

    return process_dest
Esempio n. 13
0
def download_sst_ice(
    catalog_entry,
    process_dest="../data/processed/noaa_oisst_sst_icec.nc",
):
    """Downloads OISSTv2 data from NOAA"""
    import xarray as xr

    from fetch_data import download

    from .utils import preprocess

    if path(process_dest).is_file():
        return process_dest

    flist = download(**catalog_entry)

    ds = (xr.open_mfdataset(paths=flist, preprocess=preprocess()).where(
        lambda a: a.icec.notnull()).drop("time_bnds"))

    ds.to_netcdf(process_dest,
                 encoding={k: dict(zlib=True, complevel=4)
                           for k in ds})

    return process_dest
Esempio n. 14
0
def get_jra55_wind_speed(
        url="leave empty - replaced in function",  # for transparency
        download_dest="../data/raw/jra_55/{file_format}/{year}",
        process_dest="../data/processed/jra55_wind_speed_moments.nc",
        years=range(1982, 2021),
        verbose=False,
        n_jobs=8,
):
    """
    TODO: add readme to netCDF folder.
    TODO: add readme to
    """
    from pathlib import Path as path

    from dask.diagnostics import ProgressBar
    from fetch_data import download
    from fetch_data.core import create_download_readme
    from fetch_data.utils import commong_substring
    from pandas import Timestamp
    from xarray import concat

    years = list(years)
    process_dest = p = path(process_dest)
    process_dest = p.parent / f"{p.stem}_{years[0]}-{years[-1]}{p.suffix}"

    if path(process_dest).is_file():
        return process_dest
    else:
        print(f"File does not exist: {process_dest}")

    cookies = RDAMScookies().get_cookies()
    grib_names = []
    for y in years:
        t0 = Timestamp(f"{y}")
        t1 = Timestamp(f"{y+1}")
        grib_names += download(
            # JRA URLs switch from annual to monthly in 2014
            url=make_jra_6hrly_urls(t0=t0, t1=t1),
            dest=download_dest.format(
                year=y, file_format="grib"),  # store the data per year
            login=dict(cookies=cookies),
            verbose=verbose,
            n_jobs=n_jobs,
            log_name="../downloading.log",
            readme_fname="../README.txt",
            meta=jra_meta,
        )

    # replace the path '/grib/' with netcdf for he conversion
    netcdf_names = [
        f.replace("/grib/", "/netcdf/") + ".nc" for f in grib_names
    ]
    # the function grib_to_netcdf has been made to run in parallel with the decorator
    flist = grib_to_netcdf(grib_names, netcdf_names, n_jobs=n_jobs)

    jra_meta["processing"] = (
        "Data has been converted from grib file format to netCDF4 using the cfgrib "
        "package. Variables without dimensions have been dropped. ")
    jra_meta["variables"] = "u10, v10"
    jra_meta["grib_source"] = download_dest.format(year="YYYY",
                                                   file_format="grib")
    create_download_readme(
        "README.md",
        url=commong_substring(grib_names) + "...",
        dest=str(
            path(download_dest.format(year="YYYY",
                                      file_format="netcdf")).parent),
        meta=jra_meta,
    )

    # we get the folders for each year
    folders = sorted(list(set([path(f).parent for f in flist])))
    xds = []
    for folder in folders:
        # list nc files - assumes u10 and v10 in the folder
        ylist = list(folder.glob("*.nc"))
        xds += (calculate_wind_speed(ylist), )

    with ProgressBar():
        wind_speed = concat(xds, "time").load()

    process_dest = path(process_dest)
    process_dest.parent.mkdir(exist_ok=True, parents=True)

    jra_meta["processing"] = (
        "Data has been converted from grib file format to netCDF4)."
        "u10 and v10 data was loaded and the wind_speed was calculated with "
        "(u10^2 + v10^2)^0.5. The first, second, and third moments "
        "(wind_speed^n) were calculated from wind_speed. Note that these "
        "variables were calculated at the model resolution and then scaled "
        "up to monthly by 1 degree to preserve the variability that would "
        "otherwise be lost in the squared function. ")
    jra_meta["variables"] = "wind_speed, wind_speed^2, wind_speed^3"
    jra_meta["netcdf_source"] = download_dest.format(year="YYYY",
                                                     file_format="netcdf")

    wind_speed.attrs = jra_meta
    wind_speed.to_netcdf(
        str(process_dest),
        encoding={k: {
            "complevel": 4,
            "zlib": True
        }
                  for k in wind_speed},
    )

    return str(process_dest)