def test_ar07w(datafiles, out): ar07w = pd.read_csv(FIXTURE_DIR + 'AR07W_stations.txt', skipinitialspace=True) ds = xr.open_dataset(FIXTURE_DIR + 'woa_labrador.nc', decode_times=False) proj = lib_easy_coloc.projection(ar07w['lon'].values, ar07w['lat'].values, grid=ds) fld = proj.run(ds['t_an'][:], outtype=out) if out == 'ndarray': assert isinstance(fld, np.ndarray) assert fld.shape == (1, 102, 30) elif out == 'dataframe': assert isinstance(fld, pd.DataFrame) return None
# shift dates to middle of the month ds['time'] = pd.date_range(start=f'{ds.time.dt.year[0].values}-{ds.time.dt.month[0].values:02}', end=f'{ds.time.dt.year[-1].values}-{ds.time.dt.month[-1].values:02}', freq='MS') # ========================================== # Here we start making the ovar dataset # ========================================== # Trim the dates to sample_dates ovar = ds[ovar_name].sel(time=sample_dates) ovar['lat'] = ds.latitude ovar['lon'] = ds.longitude # create source grid and target section objects # this requires lon,lat from stations and the source grid dataset containing lon,lat proj = lib_easy_coloc.projection(df['longitude'].values,df['latitude'].values,grid=ovar, from_global=True) # 4-D max for easy_coloc. Not entirely sure what we are squeezing out? ovar = ovar.squeeze() # run the projection on the WOA analyzed temperature (t_an) fld = np.zeros((len(sample_dates),len(ovar.lev),len(df))) # for ind in range(5, 130, 5): dates = sample_dates[ind-5:ind] fld_tem = proj.run(ovar.sel(time=dates)[:]) fld[ind-5:ind,:,:] = fld_tem # create datarray with sampling information sampled_var = xr.DataArray(fld,
def model_to_glodap(ovar_name=None, model=None, catalog_path='../catalogs/pangeo-cmip6.json', qc_path='../qc'): ''' generate_model_section(ovar_name, model) Input ========== ovar_name : variable name (eg 'dissic') model : model name (eg CanESM5) Output =========== ds : dataset of section output ''' institue = { 'CanESM5': 'CCCma', 'CNRM-ESM2-1': 'CNRM-CERFACS', 'IPSL-CM6A-LR': 'IPSL', 'MIROC-ES2L': 'MIROC', 'UKESM1-0-LL': 'MOHC', 'GISS-E2-1-G-CC': 'NASA-GISS', 'GISS-E2-1-G': 'NASA-GISS' } # Get CMIP6 output from intake_esm col = intake.open_esm_datastore(catalog_path) cat = col.search(experiment_id='historical', table_id='Omon', variable_id=ovar_name, grid_label='gn') # dictionary of subset data dset_dict = cat.to_dataset_dict(zarr_kwargs={'consolidated': True}, cdf_kwargs={'chunks': {}}) # Put data into dataset ds = dset_dict[f'CMIP.{institue[model]}.{model}.historical.Omon.gn'] # Rename olevel to lev coord_dict = { 'olevel': 'lev' } # a dictionary for converting coordinate names if 'olevel' in ds.dims: ds = ds.rename(coord_dict) # load GLODAP station information from csv file # drop nans, reset index, and drop uneeded variable df = pd.read_csv(f'{qc_path}/GLODAPv2.2019_COORDS.csv') df = df.dropna() df = df.reset_index().drop('Unnamed: 0', axis=1) # Genearte times list and put into dataframe times = [ f'{int(year)}-{int(month):02d}' for year, month in zip(df.year, df.month) ] df['dates'] = times # Find unique dates, these are the sample dates sample_dates = df['dates'].sort_values().unique() # Parse the historical period sample_dates = sample_dates[0:125] sample_dates = [ dateutil.parser.parse(date) - pd.Timedelta('16 day') for date in sample_dates ] # shift dates to middle of the month ds['time'] = pd.date_range( start=f'{ds.time.dt.year[0].values}-{ds.time.dt.month[0].values:02}', end=f'{ds.time.dt.year[-1].values}-{ds.time.dt.month[-1].values:02}', freq='MS') # ========================================== # Here we start making the ovar dataset # ========================================== # Trim the dates to sample_dates ovar = ds[ovar_name].sel(time=sample_dates) ovar['lat'] = ds.latitude ovar['lon'] = ds.longitude # create source grid and target section objects # this requires lon,lat from stations and the source grid dataset containing lon,lat proj = lib_easy_coloc.projection(df['longitude'].values, df['latitude'].values, grid=ovar, from_global=True) # 4-D max for easy_coloc. Not entirely sure what we are squeezing out? ovar = ovar.squeeze() # run the projection on the WOA analyzed temperature (t_an) fld = np.zeros((len(sample_dates), len(ovar.lev), len(df))) # for ind in range(5, 130, 5): dates = sample_dates[ind - 5:ind] fld_tem = proj.run(ovar.sel(time=dates)[:]) fld[ind - 5:ind, :, :] = fld_tem # create datarray with sampling information sampled_var = xr.DataArray(fld, dims=['time', 'lev', 'all_stations'], coords={ 'time': ovar['time'], 'lev': ovar['lev'], 'all_stations': df.index.values, 'dx': ('all_stations', df.dx.values), 'bearing': ('all_stations', df.bearing.values), 'lat': ('all_stations', df.latitude.values), 'lon': ('all_stations', df.longitude.values), }, attrs={ 'units': ovar.units, 'long_name': ovar.long_name }) # Glodap expo codes expc = pd.read_csv(f'{qc_path}/FILTERED_GLODAP_EXPOCODE.csv') # convert datarray to dataset # This grabs everything ds = sampled_var.to_dataset(name=ovar.name) return ds
def model_to_glodap(ovar_name=None, model=None, catalog_path='../catalogs/pangeo-cmip6.json', qc_path='../qc', output_path='../../sections/'): """Interpolates model to GLODAP points. This function samples the model as GLODAP and writes the resampled data to disk. Runtime of about <5 minutes per model. Temporal sampling is done as though every cruise was conduct- ed at the same time. Temporal sampling is adjusted to match cruises with model_to_section, among other things. This function must be run before model_to_section, but only needs to be run once Args: ovar_name: ocean variable name model: name of CMIP6 model catalog_path: path to catalog used by intake-esm qc_path: location of qc'd model sections output_path: where the output is written Returns: xarray Dataset """ # Get CMIP6 output from intake-esm col = intake.open_esm_datastore(catalog_path) cat = col.search(experiment_id='historical', table_id='Omon', source_id=model, variable_id=ovar_name, grid_label='gn') # dictionary of xarray datasets dset_dict = cat.to_dataset_dict(zarr_kwargs={'consolidated': True}, cdf_kwargs={'chunks': {}}) # we need to know the intitute that ran the model to get the correct xarray dataset model_institute_df = cat.df.drop_duplicates(subset='source_id')[['source_id','institution_id']] institute = model_institute_df.institution_id[model_institute_df.source_id==model].iloc[0] # get the xarray dataset for the corresponding model ds = dset_dict[f'CMIP.{institute}.{model}.historical.Omon.gn'] # CMIP6 files were submitted with inconsistent coordinate names # make coordinate names consistent by renaming coord_rename_map = make_rename_map(ds,model) ds = ds.rename(coord_rename_map[model]) # load GLODAP station information from csv file # drop nans, reset index, and drop uneeded variable df = pd.read_csv(f'{qc_path}/GLODAPv2.2019_COORDS.csv') df = df.dropna() df = df.reset_index().drop('Unnamed: 0', axis=1) # Generate list of dates from the separate year and month columns and put into dataframe dates = [f'{int(year)}-{int(month):02d}-01' for year,month in zip(df.year,df.month)] df['dates'] = dates # Find unique dates, these are the sample dates sample_dates = df['dates'].sort_values().unique() # Look only at the historical period # convert to datetime sample_dates = sample_dates[0:125] sample_dates = [dateutil.parser.parse(date) for date in sample_dates] # homogenize model dates to first of the month ds['time'] = pd.date_range(start=f'{ds.time.dt.year[0].values}-{ds.time.dt.month[0].values:02}', end=f'{ds.time.dt.year[-1].values}-{ds.time.dt.month[-1].values:02}', freq='MS') # ========================================== # Here we start making the ovar dataset # ========================================== # Trim model dates to sample_dates ovar = ds[ovar_name].sel(time=sample_dates) ovar['latitude'] = ds.latitude ovar['longitude'] = ds.longitude # create source grid and target section objects # this requires lon,lat from stations and the source grid dataset containing 'longitude','latitude' proj = lib_easy_coloc.projection(df['longitude'].values,df['latitude'].values,grid=ovar,coord_names=['longitude', 'latitude'], from_global=True) # get the realization (ex: r10i1p1f1) realizations = cat.df[cat.df['source_id']==model].member_id.values # len(realizations) gives the number of ensemble members # if block for models with only one ensemble member in the database if len(realizations) < 2: fld = np.zeros((len(sample_dates),len(ovar.lev),len(df))) ovar = ovar.squeeze() for ind in range(5, 130, 5): dates = sample_dates[ind-5:ind] fld_tem = proj.run(ovar.sel(time=dates)[:]) fld[ind-5:ind,:,:] = fld_tem # create datarray with sampling information sampled_var = xr.DataArray(fld, dims=['time','lev','all_stations'], coords={'time':ovar['time'], 'lev':ovar['lev'], 'all_stations':df.index.values, 'dx':('all_stations',df.dx.values), 'bearing':('all_stations',df.bearing.values), 'lat':('all_stations',df.latitude.values), 'lon':('all_stations',df.longitude.values), }, attrs={'units':ovar.units, 'long_name':ovar.long_name } ) ds = sampled_var.to_dataset(name=ovar.name) ds.to_netcdf(f'{output_path}/{ovar.name}_{model}_{realizations[0]}.nc') # right now, if there are multiple ensemble members, we only sample one if len(realizations) > 2: fld = np.zeros((len(sample_dates),len(ovar.lev),len(df))) ovar = ovar[0,].squeeze() for ind in range(5, 130, 5): dates = sample_dates[ind-5:ind] fld_tem = proj.run(ovar.sel(time=dates)[:]) fld[ind-5:ind,:,:] = fld_tem # create datarray with sampling information sampled_var = xr.DataArray(fld, dims=['time','lev','all_stations'], coords={'time':ovar['time'], 'lev':ovar['lev'], 'all_stations':df.index.values, 'dx':('all_stations',df.dx.values), 'bearing':('all_stations',df.bearing.values), 'lat':('all_stations',df.latitude.values), 'lon':('all_stations',df.longitude.values), }, attrs={'units':ovar.units, 'long_name':ovar.long_name } ) ds = sampled_var.to_dataset(name=ovar.name) ds.to_netcdf(f'{output_path}/{ovar.name}_{model}_{realizations[0]}.nc')
from easy_coloc import lib_easy_coloc import xarray as xr import pandas as pd import cartopy as cart import matplotlib.pylab as plt from matplotlib import cm # load stations information from csv file ar07w = pd.read_csv('../easy_coloc/test/test_files/AR07W_stations.txt',skipinitialspace=True) # load gridded dataset ds = xr.open_dataset('../easy_coloc/test/test_files/woa_labrador.nc',decode_times=False) # create source grid and target section objects # this requires lon,lat from stations and the source grid dataset containing lon,lat proj = lib_easy_coloc.projection(ar07w['lon'].values,ar07w['lat'].values,grid=ds, from_global=False) # run the projection on the WOA analyzed temperature (t_an) fld = proj.run(ds['t_an'][:]) plt.figure(figsize=[6,6]) m = plt.axes(projection=cart.crs.PlateCarree()) m.scatter(ar07w['lon'].values,ar07w['lat'].values,c=fld[0,0,:]) m.coastlines() m.add_feature(cart.feature.LAND, facecolor='0.75') m.set_extent([-75, -35, 35, 65], crs=cart.crs.PlateCarree()) gl = m.gridlines(draw_labels=True) plt.figure(figsize=[6,6]) plt.contourf(ar07w['lat'].values,-ds['depth'],fld[0,:,:],30,cmap=cm.gist_ncar)
import xmitgcm import numpy as np # load stations information from csv file ar07w = pd.read_csv('../data/AR07W_stations.txt',skipinitialspace=True) # load gridded dataset ds = xmitgcm.open_mdsdataset('../data/global_oce_llc90/',prefix=['T'],geometry='llc') # quick look at the input data, the face we need for AR07W is #10 #ds['T'].sel(face=10,k=0,time=8).plot(cmap=cm.gist_ncar); plt.show() # create source grid and target section objects # this requires lon,lat from stations and the source grid dataset containing lon,lat # here subsetting face 10 of ds and passing the names of lon/lat coords in ds proj = lib_easy_coloc.projection(ar07w['lon'].values,ar07w['lat'].values,grid=ds.sel(face=10), coord_names=['XC','YC'],from_global=False) # run the projection on the WOA analyzed temperature (t_an) fld = proj.run(ds['T'].sel(face=10),mask_value=0) plt.figure(figsize=[6,6]) m = plt.axes(projection=cart.crs.PlateCarree()) m.scatter(ar07w['lon'].values,ar07w['lat'].values,c=fld[0,0,:]) m.coastlines() m.add_feature(cart.feature.LAND, facecolor='0.75') m.set_extent([-75, -35, 35, 65], crs=cart.crs.PlateCarree()) gl = m.gridlines(draw_labels=True) plt.figure(figsize=[6,6]) plt.contourf(ar07w['lat'].values,ds['Z'],np.ma.masked_values(fld[0,:,:],0),30,cmap=cm.gist_ncar) plt.colorbar()