def load_dailyrel(datafiles, yearnm='year', onset_varnm='D_ONSET', retreat_varnm='D_RETREAT'): ds = atm.load_concat(datafiles, concat_dim=yearnm) if isinstance(ds, xray.DataArray): ds = ds.to_dataset() varnms = ds.data_vars.keys() if onset_varnm is not None: onset = ds[onset_varnm] varnms.remove(onset_varnm) else: onset = np.nan * ds[yearnm] if retreat_varnm is not None: retreat = ds[retreat_varnm] varnms.remove(retreat_varnm) else: retreat = np.nan * ds[yearnm] # Remaining data variable is the data field varnm = varnms[0] data = ds[varnm] # Copy attributes from the first file in the list with xray.open_dataset(datafiles[0]) as ds0: data.attrs = ds0[varnm].attrs return data, onset, retreat
def read_daily_eta(var_id, level, year, month, days=None, concat_dim='TIME', xsub='[330:2:450]', ysub='[60:2:301]', verbose=True): """Return MERRA daily eta-level data for a single variable. Reads a single eta level of daily MERRA data from OpenDAP urls and concatenates into a DataArray for the selected days of the month. Parameters ---------- var_id : str Variable ID. Can be generic ID from the list below, in which case get_varname() is called to get the specific ID for MERRA. Or var_id can be the exact name as it appears in MERRA data files. Generic IDs: {'u', 'v', 'omega', 'hgt', 'T', 'q', 'ps', 'evap', 'precip'} level : int Eta level to extract (0-71). Level 71 is near-surface and level 0 is the top of atmosphere. year, month : int Numeric year and month (1-12). days : list of ints, optional Subset of days to read. If None, all days are included. concat_dim : str, optional Name of dimension for concatenation. xsub, ysub : str, optional Indices of longitude and latitude subsets to extract. verbose : bool, optional If True, print updates while processing files. Returns ------- data : xray.DataArray or xray.Dataset Daily data (3-hourly or hourly) for the month or a selected subset of days. """ varnm = get_varname(var_id) tsub = '[0:1:3]' zsub = '[%d:1:%d]' % (level, level) def datafile(year, mon, day, varnm, xsub, ysub, zsub, tsub): basedir = ('http://goldsmr3.sci.gsfc.nasa.gov:80/opendap/MERRA/' 'MAI6NVANA.5.2.0/') url = ('%s%d/%02d/MERRA100.prod.assim.inst6_3d_ana_Nv.%d%02d%02d.hdf' '?%s%s%s%s%s,XDim%s,YDim%s,Height%s,TIME%s') % (basedir, year, mon, year, mon, day, varnm, tsub, zsub, ysub, xsub, xsub, ysub, zsub, tsub) return url if days is None: days = range(1, atm.days_this_month(year, month) + 1) urls = [datafile(year, month, day, varnm, xsub, ysub, zsub, tsub) for day in atm.makelist(days)] var = atm.load_concat(urls, varnm, concat_dim, verbose=verbose) return var
#months = [4, 5, 6, 7, 8, 9] #monthstr='apr-sep_' def datafile(datadir, year, mon): filn = datadir + 'merra_vimt_%d%02d.nc' % (year, mon) return filn def savefile(datadir, varnm, year, monthstr, pmin): filn = datadir + 'merra_%s_ps-%.0fmb_%s%d.nc' filn = filn % (varnm, pmin/100, monthstr, year) return filn # Read daily data from each year and month and concatenate together for y, year in enumerate(years): files = [datafile(datadir, year, mon) for mon in months] ds = atm.load_concat(files, concat_dim='day') pmin = ds['uq_int'].attrs['pmin'] filn = savefile(datadir, 'vimt', year, monthstr, pmin) print('Saving VIMT to ' + filn) ds.to_netcdf(filn) # Compute moisture flux convergence and save to files print('Calculating MFC') mfc = atm.moisture_flux_conv(ds['uq_int'], ds['vq_int'], already_int=True) mfc.attrs['long_name'] = mfc.name mfc.name = 'MFC' for key in ds['uq_int'].attrs: mfc.attrs[key] = ds['uq_int'].attrs[key] filn = savefile(datadir, 'MFC', year, monthstr, pmin) print('Saving MFC to ' + filn) atm.save_nc(filn, mfc)
calc_kw = {'latlon' : latlon, 'plevs' : plevs, 'dp_vars' : dp_vars, 'sector_lons' : sector_lons} nc_kw = { 'merra2' : {'format' : 'NETCDF4_classic', 'engine' : 'netcdf4'}, 'merra' : {'format' : None, 'engine' : None}}[version] # Read data and concatenate for year in years: dailyfiles = collections.defaultdict(list) for month in months: url_dict = get_url_dict(year, month, version, vargroups) days = range(1, atm.days_this_month(year, month) + 1) jdays = atm.season_days(atm.month_str(month), atm.isleap(year)) for day, jday in zip(days, jdays): files = read_groups(url_dict, vargroups, datadir, year, month, day, jday, calc_kw, nc_kw) for nm in files: dailyfiles[nm] += [files[nm]] # Consolidate daily files into yearly files and delete daily files for nm in dailyfiles: data = atm.load_concat(dailyfiles[nm], concat_dim='day') for varnm in data.data_vars: var = data[varnm] filenm = get_filename(var, version, datadir, year) var.name = var.attrs.get('varnm', varnm) print('Saving to ' + filenm) atm.save_nc(filenm, var) print('Deleting daily files') for filenm in dailyfiles[nm]: print(filenm) os.remove(filenm)
def calc_fluxes(year, month, var_ids=['u', 'q', 'T', 'theta', 'theta_e', 'hgt'], concat_dim='TIME', scratchdir=None, keepscratch=False, verbose=True): """Return the monthly mean of MERRA daily fluxes. Reads MERRA daily data from OpenDAP urls, computes fluxes, and returns the monthly mean of the daily variable and its zonal and meridional fluxes. Parameters ---------- year, month : int Numeric year and month (1-12). var_ids : list of str, optional IDs of variables to include. concat_dim : str, optional Name of dimension for concatenation. scratchdir : str, optional Directory path to store temporary files while processing data. If omitted, the current working directory is used. keepscratch : bool, optional If True, scratch files are kept in scratchdir. Otherwise they are deleted. verbose : bool, optional If True, print updates while processing files. Returns ------- data : xray.Dataset Mean of daily data and the mean of the daily zonal fluxes (u * var) and meridional fluxes (v * var), for each variable in var_ids. """ nms = [get_varname(nm) for nm in atm.makelist(var_ids)] u_nm, v_nm = get_varname('u'), get_varname('v') nms.extend([u_nm, v_nm]) if 'theta' in nms: nms.append(get_varname('T')) if 'theta_e' in nms: nms.extend([get_varname('T'), get_varname('q')]) nms = set(nms) days = range(1, atm.days_this_month(year, month) + 1) def scratchfile(nm, k, year, month, day): filestr = '%s_level%d_%d%02d%02d.nc' % (nm, k, year, month, day) if scratchdir is not None: filestr = scratchdir + '/' + filestr return filestr # Read metadata from one file to get pressure-level array dataset = 'p_daily' url = url_list(dataset, return_dict=False)[0] with xray.open_dataset(url) as ds: pname = atm.get_coord(ds, 'plev', 'name') plev = atm.get_coord(ds, 'plev') # Pressure levels in Pa for theta/theta_e calcs p_units = atm.pres_units(ds[pname].units) pres = atm.pres_convert(plev, p_units, 'Pa') # Get daily data (raw and calculate extended variables) def get_data(nms, pres, year, month, day, concat_dim, subset_dict, verbose): # Lists of raw and extended variables ids = list(nms) ext = [] for var in ['theta', 'theta_e']: if var in ids: ext.append(var) ids.remove(var) # Read raw data and calculate extended variables data = read_daily(ids, year, month, day, concat_dim=concat_dim, subset_dict=subset_dict, verbose=verbose) if 'theta' in ext: print_if('Computing potential temperature', verbose) T = data[get_varname('T')] data['theta'] = atm.potential_temp(T, pres) if 'theta_e' in ext: print_if('Computing equivalent potential temperature', verbose) T = data[get_varname('T')] q = data[get_varname('q')] data['theta_e'] = atm.equiv_potential_temp(T, pres, q) return data # Iterate over vertical levels for k, p in enumerate(plev): subset_dict = {pname : (p, p)} print_if('Pressure-level %.1f' % p, verbose) files = [] for day in days: # Read data for this level and day ds = get_data(nms, pres[k], year, month, day, concat_dim, subset_dict, verbose) # Compute fluxes print_if('Computing fluxes', verbose) u = ds[get_varname('u')] v = ds[get_varname('v')] for nm in var_ids: var = ds[get_varname(nm)] varname, attrs, _, _ = atm.meta(var) u_var = u * var v_var = v * var u_var.name = get_varname(u_nm) + '*' + var.name units = var.attrs['units'] + ' * ' + u.attrs['units'] u_var.attrs['units'] = units v_var.name = get_varname(v_nm) + '*' + var.name v_var.attrs['units'] = units ds[u_var.name] = u_var ds[v_var.name] = v_var # Save to temporary scratch file filenm = scratchfile('fluxes', k, year, month, day) files.append(filenm) print_if('Saving to scratch file ' + filenm, verbose) ds.to_netcdf(filenm) # Concatenate daily scratch files ds = atm.load_concat(files) if not keepscratch: for f in files: os.remove(f) # Compute monthly means print_if('Computing monthly means', verbose) if k == 0: data = ds.mean(dim=concat_dim) else: data = xray.concat([data, ds.mean(dim=concat_dim)], dim=pname) for var in data.data_vars: data[var].attrs = ds[var].attrs return data
def load_daily_season(pathstr, year, season='ann', var_ids=None, lat1=-90, lat2=90, lon1=0, lon2=360, verbose=True, concat_dim=None): """Return daily data for a selected year, season and lat-lon subset. Loads daily data from locally saved files and concatenates it into a single DataArray or Dataset for that year and season. Parameters ---------- pathstr : str Beginning of path for each data file, where each file name is in the format *yyyymm.nc. e.g. pathstr = '~/datastore/merra/daily/u200_' year : int Year to load. season : str, optional Season to load. Valid values are as listed in atm.season_months() e.g. 'jul', 'jja', 'ann' Default is entire year ('ann') var_ids : str or list of str, optional Variable(s) to extract. If omitted, all variables in the data are included and the output is a Dataset. lat1, lat2, lon1, lon2 : floats, optional Lat-lon subset to extract. concat_dim : str, optional Name of time dimension for concatenation. If None, then atm.get_coord() is called to get the name from the data file. verbose : bool, optional If True, print updates while processing files. Returns ------- data : xray.DataArray or xray.Dataset """ months = atm.season_months(season) paths = [] for m in months: datestr = '%d%02d' % (year, m) paths.append(pathstr + datestr + '.nc') # Make sure longitude range is consistent with data with xray.open_dataset(paths[0]) as ds: lonmax = atm.lon_convention(atm.get_coord(ds, 'lon')) if concat_dim is None: concat_dim = atm.get_coord(ds, 'time', 'name') if lon2 - lon1 == 360: if lonmax < lon2: offset = -180 elif lonmax > lon2: offset = 180 else: offset = 0 lon1, lon2 = lon1 + offset, lon2 + offset print(lon1, lon2, lonmax) # Load daily data if var_ids is None: var_nms = None else: var_nms = [get_varname(var_id) for var_id in atm.makelist(var_ids)] subset_dict = {'lat' : (lat1, lat2), 'lon' : (lon1, lon2)} data = atm.load_concat(paths, var_nms, concat_dim, subset_dict, verbose) return data
def read_daily(var_ids, year, month, days=None, concat_dim='TIME', subset_dict=None, verbose=True): """Return MERRA daily pressure-level data for selected variable(s). Reads daily MERRA data from OpenDAP urls and concatenates into a single DataArray or Dataset for the selected days of the month. Parameters ---------- var_ids : str or list of str Variable ID(s). Can be generic ID from the list below, in which case get_varname() is called to get the specific ID for MERRA. Or var_id can be the exact name as it appears in MERRA data files. Generic IDs: {'u', 'v', 'omega', 'hgt', 'T', 'q', 'ps', 'evap', 'precip'} year, month : int Numeric year and month (1-12). days : list of ints, optional Subset of days to read. If None, all days are included. concat_dim : str, optional Name of dimension for concatenation. subset_dict : dict of 2-tuples, optional Dimensions and subsets to extract. Each entry in subset_dict is in the form {dim_name : (lower_or_list, upper)}, where: - dim_name : string Name of dimension to extract from. The dimension name can be the actual dimension name (e.g. 'XDim') or a generic name (e.g. 'lon') and get_coord() is called to find the specific name. - lower_or_list : scalar or list of int or float If scalar, then used as the lower bound for the subset range. If list, then the subset matching the list will be extracted. - upper : int, float, or None Upper bound for subset range. If lower_or_list is a list, then upper is ignored and should be set to None. verbose : bool, optional If True, print updates while processing files. Returns ------- data : xray.DataArray or xray.Dataset Daily data (3-hourly or hourly) for the month or a selected subset of days. """ var_ids = atm.makelist(var_ids) var_nms = [get_varname(var_id) for var_id in var_ids] dataset = get_dataset(var_ids[0], 'daily') urls = url_list(dataset) if days is None: # All days in the month dates = ['%d%02d' % (year, month)] elif isinstance(days, int): # Single day dates = ['%d%02d%02d' % (year, month, days)] else: # Subset of days dates = ['%d%02d%02d' % (year, month, d) for d in days] paths = [] for date in dates: paths.extend([urls[key] for key in urls.keys() if date in key]) data = atm.load_concat(paths, var_nms, concat_dim, subset_dict, verbose) return data