Example #1
0
def test_ar07w(datafiles, out):
    ar07w = pd.read_csv(FIXTURE_DIR + 'AR07W_stations.txt',
                        skipinitialspace=True)
    ds = xr.open_dataset(FIXTURE_DIR + 'woa_labrador.nc', decode_times=False)
    proj = lib_easy_coloc.projection(ar07w['lon'].values,
                                     ar07w['lat'].values,
                                     grid=ds)
    fld = proj.run(ds['t_an'][:], outtype=out)
    if out == 'ndarray':
        assert isinstance(fld, np.ndarray)
        assert fld.shape == (1, 102, 30)
    elif out == 'dataframe':
        assert isinstance(fld, pd.DataFrame)
    return None
    # shift dates to middle of the month
    ds['time'] = pd.date_range(start=f'{ds.time.dt.year[0].values}-{ds.time.dt.month[0].values:02}',
                            end=f'{ds.time.dt.year[-1].values}-{ds.time.dt.month[-1].values:02}',
                            freq='MS')

    # ==========================================
    # Here we start making the ovar dataset
    # ==========================================
    # Trim the dates to sample_dates
    ovar = ds[ovar_name].sel(time=sample_dates)
    ovar['lat'] = ds.latitude
    ovar['lon'] = ds.longitude

    # create source grid and target section objects
    # this requires lon,lat from stations and the source grid dataset containing lon,lat
    proj = lib_easy_coloc.projection(df['longitude'].values,df['latitude'].values,grid=ovar,
                                     from_global=True)

    # 4-D max for easy_coloc. Not entirely sure what we are squeezing out?
    ovar = ovar.squeeze() 

    # run the projection on the WOA analyzed temperature (t_an)
    fld = np.zeros((len(sample_dates),len(ovar.lev),len(df)))

    # 
    for ind in range(5, 130, 5):
        dates = sample_dates[ind-5:ind]
        fld_tem = proj.run(ovar.sel(time=dates)[:])
        fld[ind-5:ind,:,:] = fld_tem

    # create datarray with sampling information
    sampled_var = xr.DataArray(fld,
def model_to_glodap(ovar_name=None,
                    model=None,
                    catalog_path='../catalogs/pangeo-cmip6.json',
                    qc_path='../qc'):
    '''
    generate_model_section(ovar_name, model)
    
    Input
    ==========
    ovar_name : variable name (eg 'dissic')
    model : model name (eg CanESM5)
    
    Output
    ===========
    ds : dataset of section output
    '''
    institue = {
        'CanESM5': 'CCCma',
        'CNRM-ESM2-1': 'CNRM-CERFACS',
        'IPSL-CM6A-LR': 'IPSL',
        'MIROC-ES2L': 'MIROC',
        'UKESM1-0-LL': 'MOHC',
        'GISS-E2-1-G-CC': 'NASA-GISS',
        'GISS-E2-1-G': 'NASA-GISS'
    }

    # Get CMIP6 output from intake_esm
    col = intake.open_esm_datastore(catalog_path)
    cat = col.search(experiment_id='historical',
                     table_id='Omon',
                     variable_id=ovar_name,
                     grid_label='gn')

    # dictionary of subset data
    dset_dict = cat.to_dataset_dict(zarr_kwargs={'consolidated': True},
                                    cdf_kwargs={'chunks': {}})

    # Put data into dataset
    ds = dset_dict[f'CMIP.{institue[model]}.{model}.historical.Omon.gn']

    # Rename olevel to lev
    coord_dict = {
        'olevel': 'lev'
    }  # a dictionary for converting coordinate names
    if 'olevel' in ds.dims:
        ds = ds.rename(coord_dict)

    # load GLODAP station information from csv file
    # drop nans, reset index, and drop uneeded variable
    df = pd.read_csv(f'{qc_path}/GLODAPv2.2019_COORDS.csv')
    df = df.dropna()
    df = df.reset_index().drop('Unnamed: 0', axis=1)

    # Genearte times list and put into dataframe
    times = [
        f'{int(year)}-{int(month):02d}'
        for year, month in zip(df.year, df.month)
    ]
    df['dates'] = times

    # Find unique dates, these are the sample dates
    sample_dates = df['dates'].sort_values().unique()

    # Parse the historical period
    sample_dates = sample_dates[0:125]
    sample_dates = [
        dateutil.parser.parse(date) - pd.Timedelta('16 day')
        for date in sample_dates
    ]

    # shift dates to middle of the month
    ds['time'] = pd.date_range(
        start=f'{ds.time.dt.year[0].values}-{ds.time.dt.month[0].values:02}',
        end=f'{ds.time.dt.year[-1].values}-{ds.time.dt.month[-1].values:02}',
        freq='MS')

    # ==========================================
    # Here we start making the ovar dataset
    # ==========================================
    # Trim the dates to sample_dates
    ovar = ds[ovar_name].sel(time=sample_dates)
    ovar['lat'] = ds.latitude
    ovar['lon'] = ds.longitude

    # create source grid and target section objects
    # this requires lon,lat from stations and the source grid dataset containing lon,lat
    proj = lib_easy_coloc.projection(df['longitude'].values,
                                     df['latitude'].values,
                                     grid=ovar,
                                     from_global=True)

    # 4-D max for easy_coloc. Not entirely sure what we are squeezing out?
    ovar = ovar.squeeze()

    # run the projection on the WOA analyzed temperature (t_an)
    fld = np.zeros((len(sample_dates), len(ovar.lev), len(df)))

    #
    for ind in range(5, 130, 5):
        dates = sample_dates[ind - 5:ind]
        fld_tem = proj.run(ovar.sel(time=dates)[:])
        fld[ind - 5:ind, :, :] = fld_tem

    # create datarray with sampling information
    sampled_var = xr.DataArray(fld,
                               dims=['time', 'lev', 'all_stations'],
                               coords={
                                   'time': ovar['time'],
                                   'lev': ovar['lev'],
                                   'all_stations': df.index.values,
                                   'dx': ('all_stations', df.dx.values),
                                   'bearing':
                                   ('all_stations', df.bearing.values),
                                   'lat': ('all_stations', df.latitude.values),
                                   'lon':
                                   ('all_stations', df.longitude.values),
                               },
                               attrs={
                                   'units': ovar.units,
                                   'long_name': ovar.long_name
                               })

    # Glodap expo codes
    expc = pd.read_csv(f'{qc_path}/FILTERED_GLODAP_EXPOCODE.csv')

    # convert datarray to dataset
    # This grabs everything
    ds = sampled_var.to_dataset(name=ovar.name)

    return ds
Example #4
0
def model_to_glodap(ovar_name=None,
                model=None,
                catalog_path='../catalogs/pangeo-cmip6.json',
                qc_path='../qc',
                output_path='../../sections/'):
    
    """Interpolates model to GLODAP points.

    This function samples the model as GLODAP and writes the
    resampled data to disk. Runtime of about <5 minutes per 
    model.
    
    Temporal sampling is done as though every cruise was conduct-
    ed at the same time. Temporal sampling is adjusted to match 
    cruises with model_to_section, among other things.
    
    This function must be run before model_to_section, but only
    needs to be run once

    Args:
        ovar_name: ocean variable name
        model: name of CMIP6 model
        catalog_path: path to catalog used by intake-esm
        qc_path: location of qc'd model sections
        output_path: where the output is written

    Returns:
        xarray Dataset
        
    """

    # Get CMIP6 output from intake-esm
    col = intake.open_esm_datastore(catalog_path)
    cat = col.search(experiment_id='historical',
                     table_id='Omon',
                     source_id=model,
                     variable_id=ovar_name,
                     grid_label='gn')

    # dictionary of xarray datasets
    dset_dict = cat.to_dataset_dict(zarr_kwargs={'consolidated': True},
                                    cdf_kwargs={'chunks': {}})

    
    # we need to know the intitute that ran the model to get the correct xarray dataset
    model_institute_df = cat.df.drop_duplicates(subset='source_id')[['source_id','institution_id']]
    institute = model_institute_df.institution_id[model_institute_df.source_id==model].iloc[0]
    
    # get the xarray dataset for the corresponding model
    ds = dset_dict[f'CMIP.{institute}.{model}.historical.Omon.gn']
    
    # CMIP6 files were submitted with inconsistent coordinate names
    # make coordinate names consistent by renaming
    coord_rename_map = make_rename_map(ds,model)
    ds = ds.rename(coord_rename_map[model])

    # load GLODAP station information from csv file
    # drop nans, reset index, and drop uneeded variable
    df = pd.read_csv(f'{qc_path}/GLODAPv2.2019_COORDS.csv')
    df = df.dropna()
    df = df.reset_index().drop('Unnamed: 0', axis=1)

    # Generate list of dates from the separate year and month columns and put into dataframe
    dates = [f'{int(year)}-{int(month):02d}-01' for year,month in zip(df.year,df.month)]
    df['dates'] = dates

    # Find unique dates, these are the sample dates
    sample_dates = df['dates'].sort_values().unique()

    # Look only at the historical period
    # convert to datetime
    sample_dates = sample_dates[0:125]
    sample_dates = [dateutil.parser.parse(date) for date in sample_dates]

    # homogenize model dates to first of the month
    ds['time'] = pd.date_range(start=f'{ds.time.dt.year[0].values}-{ds.time.dt.month[0].values:02}',
                               end=f'{ds.time.dt.year[-1].values}-{ds.time.dt.month[-1].values:02}',
                               freq='MS')

    # ==========================================
    # Here we start making the ovar dataset
    # ==========================================
    
    # Trim model dates to sample_dates
    ovar = ds[ovar_name].sel(time=sample_dates)
      
    ovar['latitude'] = ds.latitude
    ovar['longitude'] = ds.longitude 

    # create source grid and target section objects
    # this requires lon,lat from stations and the source grid dataset containing 'longitude','latitude'
    proj = lib_easy_coloc.projection(df['longitude'].values,df['latitude'].values,grid=ovar,coord_names=['longitude', 'latitude'],
                                     from_global=True)
    
    # get the realization (ex: r10i1p1f1)
    realizations = cat.df[cat.df['source_id']==model].member_id.values
    
    # len(realizations) gives the number of ensemble members
    # if block for models with only one ensemble member in the database
    if len(realizations) < 2:
        
        fld = np.zeros((len(sample_dates),len(ovar.lev),len(df)))

        ovar = ovar.squeeze()
        
        for ind in range(5, 130, 5):
            dates = sample_dates[ind-5:ind]
            fld_tem = proj.run(ovar.sel(time=dates)[:])
            fld[ind-5:ind,:,:] = fld_tem

        # create datarray with sampling information
        sampled_var = xr.DataArray(fld,
                                   dims=['time','lev','all_stations'],
                                   coords={'time':ovar['time'],
                                           'lev':ovar['lev'],
                                           'all_stations':df.index.values,
                                           'dx':('all_stations',df.dx.values),
                                           'bearing':('all_stations',df.bearing.values),
                                           'lat':('all_stations',df.latitude.values),
                                           'lon':('all_stations',df.longitude.values),
                                          },
                                   attrs={'units':ovar.units,
                                          'long_name':ovar.long_name
                                         }
                                  )

        ds = sampled_var.to_dataset(name=ovar.name)
        ds.to_netcdf(f'{output_path}/{ovar.name}_{model}_{realizations[0]}.nc')
        
    # right now, if there are multiple ensemble members, we only sample one
    if len(realizations) > 2:
        
        fld = np.zeros((len(sample_dates),len(ovar.lev),len(df)))

        ovar = ovar[0,].squeeze()
        
        for ind in range(5, 130, 5):
            dates = sample_dates[ind-5:ind]
            fld_tem = proj.run(ovar.sel(time=dates)[:])
            fld[ind-5:ind,:,:] = fld_tem

        # create datarray with sampling information
        sampled_var = xr.DataArray(fld,
                                   dims=['time','lev','all_stations'],
                                   coords={'time':ovar['time'],
                                           'lev':ovar['lev'],
                                           'all_stations':df.index.values,
                                           'dx':('all_stations',df.dx.values),
                                           'bearing':('all_stations',df.bearing.values),
                                           'lat':('all_stations',df.latitude.values),
                                           'lon':('all_stations',df.longitude.values),
                                          },
                                   attrs={'units':ovar.units,
                                          'long_name':ovar.long_name
                                         }
                                  )

        ds = sampled_var.to_dataset(name=ovar.name)
        ds.to_netcdf(f'{output_path}/{ovar.name}_{model}_{realizations[0]}.nc')
Example #5
0
from easy_coloc import lib_easy_coloc
import xarray as xr
import pandas as pd
import cartopy as cart
import matplotlib.pylab as plt
from matplotlib import cm

# load stations information from csv file
ar07w = pd.read_csv('../easy_coloc/test/test_files/AR07W_stations.txt',skipinitialspace=True)

# load gridded dataset
ds = xr.open_dataset('../easy_coloc/test/test_files/woa_labrador.nc',decode_times=False)

# create source grid and target section objects
# this requires lon,lat from stations and the source grid dataset containing lon,lat
proj = lib_easy_coloc.projection(ar07w['lon'].values,ar07w['lat'].values,grid=ds,
                                 from_global=False)

# run the projection on the WOA analyzed temperature (t_an)
fld = proj.run(ds['t_an'][:])


plt.figure(figsize=[6,6])
m = plt.axes(projection=cart.crs.PlateCarree())
m.scatter(ar07w['lon'].values,ar07w['lat'].values,c=fld[0,0,:])
m.coastlines()
m.add_feature(cart.feature.LAND, facecolor='0.75')
m.set_extent([-75, -35, 35, 65], crs=cart.crs.PlateCarree())
gl = m.gridlines(draw_labels=True)

plt.figure(figsize=[6,6])
plt.contourf(ar07w['lat'].values,-ds['depth'],fld[0,:,:],30,cmap=cm.gist_ncar)
import xmitgcm
import numpy as np

# load stations information from csv file
ar07w = pd.read_csv('../data/AR07W_stations.txt',skipinitialspace=True)

# load gridded dataset
ds = xmitgcm.open_mdsdataset('../data/global_oce_llc90/',prefix=['T'],geometry='llc')

# quick look at the input data, the face we need for AR07W is #10
#ds['T'].sel(face=10,k=0,time=8).plot(cmap=cm.gist_ncar); plt.show()

# create source grid and target section objects
# this requires lon,lat from stations and the source grid dataset containing lon,lat
# here subsetting face 10 of ds and passing the names of lon/lat coords in ds
proj = lib_easy_coloc.projection(ar07w['lon'].values,ar07w['lat'].values,grid=ds.sel(face=10),
                                 coord_names=['XC','YC'],from_global=False)

# run the projection on the WOA analyzed temperature (t_an)
fld = proj.run(ds['T'].sel(face=10),mask_value=0)

plt.figure(figsize=[6,6])
m = plt.axes(projection=cart.crs.PlateCarree())
m.scatter(ar07w['lon'].values,ar07w['lat'].values,c=fld[0,0,:])
m.coastlines()
m.add_feature(cart.feature.LAND, facecolor='0.75')
m.set_extent([-75, -35, 35, 65], crs=cart.crs.PlateCarree())
gl = m.gridlines(draw_labels=True)

plt.figure(figsize=[6,6])
plt.contourf(ar07w['lat'].values,ds['Z'],np.ma.masked_values(fld[0,:,:],0),30,cmap=cm.gist_ncar)
plt.colorbar()