def read_data_matched(fname='matched_data_live.nc', mask=True, **kwargs): ''' read in file produced by write_matched_data() ''' from netCDF4 import Dataset from numpy import zeros from libclass import var from libgeo import planck_inv from lblrtm_utils import AIRS_ancillary from numpy.ma import masked_where cdf = Dataset(fname, 'r') nw = len(cdf.dimensions['wavenumber']) nt = len(cdf.dimensions['time']) data = Matched_data(size=nt) wvn = cdf.variables['wavenumber'][:] cld = cdf.variables['cloud'] dis = cdf.variables['lbldis'] air = cdf.variables['airs'] lat = cdf.variables['latitude'] lon = cdf.variables['longitude'] tim = cdf.variables['time'] arg = {'attrn': ['wavenumber'], 'attrv': [wvn]} mask_l2 = AIRS_ancillary().l2_ignore for n in xrange(nt): lbldis = dis[n, :] airs = air[n, :] #_blank out bad channels if mask: lbldis = masked_where(mask_l2, lbldis) airs = masked_where(mask_l2, airs) lbldis = masked_where(lbldis == -9999, lbldis) airs = masked_where(airs == -9999, airs) lbldis = var(lbldis, **arg) airs = var(airs, **arg) lbldis_tb = var(planck_inv(lbldis, wvn, domain='wavenumber'), **arg) airs_tb = var(planck_inv(airs, wvn, domain='wavenumber'), **arg) data[n] = (cld[n], lbldis, airs, lbldis_tb, airs_tb, lat[n], lon[n], tim[n]) return data
def obsnew_field( recs, nx=360, ny=180, **kwargs ): ''' Takes recarray of obsnewdata and puts it into a 2d field for plot recs : cl.sat_object Will crash if more than 1 dtg pass ''' dtg = lt.unique( recs.dtg, unique=True ) lat = np.linspace( -89.5, 89.5, ny ) lon = np.linspace( -179.5, 179.5, nx ) #_create dummy array to hold values tau = np.zeros((ny,nx)) - 9999. #_convert lats and lons to indices i, j = lt.ll2ij( recs.lat, recs.lon, lat, lon ) tau[j,i] = recs.tau idx = np.where( tau == -9999. ) tau[idx] = 0. ## tau = np.ma.masked_where( tau == -9999., tau ) ## tau = np.ma.MaskedArray.filled( tau, 0. ) ## tau = np.linspace(0.1,0.8,nx*ny).reshape(ny,nx) tau = cl.var( tau, attrv=(lat,lon) ) return tau
def read_forecasts( path_fcst=os.environ['PRODUCTS']+'/icap/', label='2012FL', sites=None, **kwargs ): ''' Reads merged forecast files produced by ICAP.merge_data.py These are collacted with AERONET sites ''' from glob import glob import libtools as lt import libnva as ln import libclass as cl import numpy as np from multiprocessing import Queue, Process path_fcst = '/'.join((path_fcst, 'seasonal_statistics-'+label, 'points')) if sites == None: files = glob( path_fcst + '/*-*-'+label+'.nc' ) #_AFTER else: files = [] for site in sites: files.extend(glob(path_fcst+'/*-'+site+'-'+label+'.nc')) records = False print( path_fcst ) dtype = [ ('values', cl.var), ('model', 'a16'), ('epoch', 'i8'), ('fhr', 'i4'), ('code', 'a10'), ('lat', 'f4'), ('lon', 'f4'), ('variable', 'a20'), ('ensemble', 'i4'), ('dimname', tuple), ('dimsize', tuple) ] records = np.recarray( (0,), dtype=dtype ) groups = lt.setup_groups( files, **kwargs ) for group in groups: l = len(group) q = [None]*l t = [None]*l for i in xrange(l): file = group[i] q[i] = Queue() args = (file,) kwargs.update({'pipe':q[i]}) t[i] = Process( target=ln.read_period_points, args=args, kwargs=kwargs ) t[i].start() for i in xrange(l): tmp, attrv = q[i].get() for rec in tmp: rec.values = cl.var( rec.values, attrn=['member'], attrv=attrv ) records = lt.merge([records,tmp]) ln.check_member_order( records ) return records
def read_masingarfcst(dtg, require=[ 'sulfate_aod', 'dust_aod', 'smoke_aod', 'seasalt_aod', 'total_aod' ], path=dir_prod + '/MASINGAR'): ''' Read MASINGAR netcdf file aod[dtg][spec] = read_geos5fcst(dtg) dtg = string, start dtg finc = integer, step up to fhr nt = integer, number of timesteps to fhr, normally diagnosed fhr = integer, maximum forecast hour from dtg path = string, directory of MASINGAR files ''' from netCDF4 import Dataset, Variable dtg_fcst = dtg file = path + '/' + dtg_fcst[:6] + '/' + dtg_fcst \ + '_aod_masingar.nc' dbg(dtg) model = 'MASINGAR' aod = cl.model_object() vars = lm.aod_vars() fhr = 120 specs = [s.lower() for s in mod_dict[model]['specs']] if os.path.isfile(file) == False: raise IOError, 'WARNING: ' + file + ' is missing.' else: dbg(file, l=2) handle = Dataset(file, mode='r', format='NETCDF3_CLASSIC') null, ny, nx = handle.variables['total'].shape lons = handle.variables['lon'][:] lats = handle.variables['lat'][:] times = handle.variables['time'][:] dtg_init = lt.epoch2dtg(times[0]) #_Brings dtg to 00z instead of 22 dtg_fhr = lt.newdtg(dtg_fcst, fhr) nt = len(times) #_Loop over each variable, store in dictionary for spec in specs: #handle.variables: if spec == 'lat' or spec == 'lon' or spec == 'time': continue long_name = vars[spec]['long_name'] spec_mas = spec.replace('_aod', '') tmp = handle.variables[spec_mas] for t in np.arange(nt): s = times[t] dtg_loop = lt.epoch2dtg(s) if dtg_loop > dtg_fhr: break vhr = lt.find_runlength(dtg_init, dtg_loop) / 3600 attrv = ( lats, lons, ) data = cl.var(handle.variables[spec_mas][t, :, :], attrv=attrv) dimname = vars[spec]['dims'] dimsize = data.shape aod.resize(aod.size + 1) aod[-1] = (data, model, dtg_init, dtg_loop, vhr, 'global', spec, 0, dimname, dimsize, '', long_name) handle.close() #_Check for required species for s in require: size = ln.subset(aod, variable=s).size if size == 0: err = 'Not all ' + model + ' species available' raise RuntimeError, err return aod
def read_nmmbfcst(dtg, path=dir_prod + '/NMMB', require=['dust_aod', 'seasalt_aod']): ''' Read NMMB netcdf file aod[dtg][spec] = read_geos5fcst(dtg) dtg = string, start dtg finc = integer, step up to fhr nt = integer, number of timesteps to fhr, normally diagnosed fhr = integer, maximum forecast hour from dtg path = string, directory of MASINGAR files ''' from netCDF4 import Dataset, Variable dtg_fcst = dtg file = '/'.join((path, dtg_fcst[:6], dtg_fcst + '-NMMB_BSC_CTM-ICAP.nc')) dbg(dtg) model = 'NMMB' aod = cl.model_object() vars = lm.aod_vars() species = {'dust_aod550': 'dust_aod', 'salt_aod550': 'seasalt_aod'} #_BSC added seasalt on this date if dtg < '2012112000': del species['salt_aod550'] if not os.path.isfile(file): error = file + ' is missing' dbg(error, l=2) else: dbg(file, l=2) handle = Dataset(file, mode='r', format='NETCDF3_CLASSIC') x, ny, nx = handle.variables['dust_aod550'].shape lons = handle.variables['lon'][nx / 2, 1:] lats = handle.variables['lat'][:, ny / 2] times = handle.variables['time'][:] dtg_init = handle.dtg_init #_Brings dtg to 00z instead of 22 nt = len(times) #_Loop over species dictionary to read in ncdf variables for spec in species: spec_loc = species[spec] long_name = vars[spec_loc]['long_name'] tmp = handle.variables[spec] for t in np.arange(nt): s = times[t] dtg_loop = lt.newdtg(dtg_init, s) vhr = lt.find_runlength( dtg_init, dtg_loop ) \ / 3600 #_Setup attributes and add to recarray attrv = ( lats, lons, ) data = cl.var(tmp[t, :, 1:], attrv=attrv) dimname = vars[spec_loc]['dims'] dimsize = data.shape aod.resize(aod.size + 1) aod[-1] = (data, model, dtg_init, dtg_loop, vhr, 'global', spec_loc, 0, dimname, dimsize, '', long_name) handle.close() #_Check for required species for s in require: size = ln.subset(aod, variable=s).size if size == 0: err = 'Not all ' + model + ' species available' raise RuntimeError, err return aod
def read_geos5fcst(dtg, require=[ 'dust_aod', 'smoke_aod', 'seasalt_aod', 'sulfate_aod', 'total_aod' ], path=dir_prod + '/GEOS5'): ''' Read GEOS-5 netcdf file aod[dtg][spec] = read_geos5fcst(dtg) dtg = string, start dtg finc = integer, step up to fhr nt = integer, number of timesteps to fhr, normally diagnosed fhr = integer, maximum forecast hour from dtg path = string, directory of GEOS-5 files ''' #_These are the species FROM GMAO. # Not the list getting PLOTTED. That # is in the model dictionary from netCDF4 import Dataset, Variable dbg(dtg) aod = cl.model_object() vars = lm.aod_vars() model = 'GEOS5' finc = 6 fhr = 120 species = [ 'dust_aod', 'blackcarbon_aod', 'organiccarbon_aod', 'seasalt_aod', 'sulfate_aod' ] file = path + '/' + dtg[:6] + '/' + dtg + '_aod_geos5.nc' dbg('Reading ' + file, l=2) if os.path.isfile(file) == False: raise IOError, file + ' is missing.' else: handle = Dataset(file, mode='r', format='NETCDF3_CLASSIC') lons = handle.variables['lons'][:] lats = handle.variables['lats'][:] times = handle.variables['times'][:] dtg_init = lt.gsfc_day2dtg(times[0]) #_Brings dtg to 00z, not 22 dtg_fcst = lt.newdtg(dtg_init, fhr) times = times[::finc] nt = len(times) for spec in species: # handle.variables: spec_geos = spec.replace('_aod', '') tmp = handle.variables[spec_geos] long_name = vars[spec]['long_name'] for t in np.arange(nt): days = times[t] dtg_loop = lt.gsfc_day2dtg(days) if dtg_loop > dtg_fcst: break vhr = lt.find_runlength(dtg_init, dtg_loop) / 3600 data = cl.var(tmp[t * finc, :, :], attrv=[lats, lons]) dimname = vars[spec]['dims'] dimsize = data.shape aod.resize(aod.size + 1) aod[-1] = (data, model, dtg_init, dtg_loop, vhr, 'global', spec, 0, dimname, dimsize, '', long_name) #_Replace this with masked_outside if len(np.where(tmp > 1e3)) > 0: raise ValueError, 'GEOS-5 data out of range for ' + \ dtg_loop + ' ' + spec handle.close() #_If we have bc and oc, sum them to generate smoke specie spec_smoke = ['blackcarbon_aod', 'organiccarbon_aod'] dtg_pres = aod.dtg_vald[0] for spec in spec_smoke: test = ln.subset(aod, variable=spec) if test.size > 0: pass else: break else: #-If all species present, sum and generate 'total'i WALTER long_name = vars['smoke_aod']['long_name'] bc = ln.subset(aod, variable='blackcarbon_aod') oc = ln.subset(aod, variable='organiccarbon_aod') #_Get arrays of dtgs available for species dtg_bc = bc.dtg_vald dtg_oc = oc.dtg_vald #_Find where we have both values dtg_smoke = lt.intersection([dtg_bc, dtg_oc]) for dtg_loop in dtg_smoke: if dtg_loop > dtg_fcst: break #_KLUUUUUUUUDGE vhr = lt.find_runlength(dtg_init, dtg_loop) / 3600 bc_loop = ln.subset(bc, dtg_vald=dtg_loop).values[0] oc_loop = ln.subset(oc, dtg_vald=dtg_loop).values[0] attrv = [lats, lons] tmp = np.sum([ bc_loop, oc_loop, ], axis=0) data = cl.var(tmp, attrv=attrv) dimname = vars['smoke_aod']['dims'] dimsize = data.shape aod.resize(aod.size + 1) aod[-1] = (data, model, dtg_init, dtg_loop, vhr, 'global', 'smoke_aod', 0, dimname, dimsize, '', long_name) #_If we have all necessary species, sum total spec_total = [ 'dust_aod', 'seasalt_aod', 'sulfate_aod', 'smoke_aod' ] for spec in spec_total: test = ln.subset(aod, variable=spec, dtg_vald=dtg_loop) if test.size > 0: pass else: dbg('Cannot calc total AOD ' + dtg_loop + ' ' + spec, l=2) break else: #-If bc and oc present, generate 'smoke' specie long_name = vars['total_aod']['long_name'] aod_loop = ln.subset(aod, dtg_vald=dtg_loop) dust_loop = ln.subset(aod_loop, variable=['dust_aod']).values[0] salt_loop = ln.subset(aod_loop, variable=['seasalt_aod']).values[0] sulf_loop = ln.subset(aod_loop, variable=['sulfate_aod']).values[0] smoke_loop = ln.subset(aod_loop, variable=['smoke_aod']).values[0] tmp = np.sum([dust_loop, salt_loop, sulf_loop, smoke_loop], axis=0) attrv = [lats, lons] data = cl.var(tmp, attrv=attrv) dimname = vars['total_aod']['dims'] dimsize = data.shape aod.resize(aod.size + 1) aod[-1] = (data, model, dtg_init, dtg_loop, vhr, 'global', 'total_aod', 0, dimname, dimsize, '', long_name) #_Check for required species for s in require: size = ln.subset(aod, variable=s).size if size == 0: raise RuntimeError, 'Not all GEOS-5 species available' return aod
def read_maccfcst(dtg, path=dir_prod + '/MACC', require=[ 'biomassburning_aod', 'dust_aod', 'seasalt_aod', 'sulfate_aod', 'total_aod' ]): """ MACC files are read together - pass the dtg and a path to build everything before _aod in the filename aod[dtg][spec] = read_maccfcst(dtg) dtg = string, initial date fstrt = integer, start # hours after dtg finc = integer, timestep """ from netCDF4 import Dataset, Variable dbg(dtg) model = 'MACC' aod = cl.model_object() vars = lm.aod_vars() if dtg < '2012102600': species = { 'biomassburning_aod': '210.210', 'blackcarbon_aod': '211.210', 'dust_aod': '209.210', 'organicmatter_aod': '210.210', 'seasalt_aod': '208.210', 'sulfate_aod': '212.210', 'total_aod': '207.210' } else: species = { 'biomassburning_aod': 'bbaod550', 'blackcarbon_aod': 'bcaod550', 'dust_aod': 'duaod550', 'organicmatter_aod': 'omaod550', 'seasalt_aod': 'ssaod550', 'sulfate_aod': 'suaod550', 'total_aod': 'aod550' } dtg_init = dtg prefix = path + '/' + dtg_init[:6] + '/' + dtg_init #_Create list of dtgs to return for spec in species: file_spec = prefix + '_' + spec + '_550_macc.nc' if os.path.isfile(file_spec) == False: error = file_spec + ' is missing' dbg(error, l=2) if spec in require: raise IOError, error else: dbg(('Reading', file_spec), l=2) code = species[spec] key = 'smoke_aod' if spec == 'biomassburning_aod' else spec long_name = vars[key]['long_name'] handle = Dataset(file_spec, mode='r', format='NETCDF3_CLASSIC') null, ny, nx = handle.variables[code].shape lons = np.append(handle.variables['longitude'][nx/2:] \ - 360., handle.variables['longitude'][:nx/2] ) lats = handle.variables['latitude'][::-1] times = handle.variables['time'][:] for time in times: t = times.tolist().index(time) dtg_loop = lt.ecmwf_day2dtg(time) #_Fix the orientation of dateline split tmp = handle.variables[code][t, :, :] values = np.append(tmp[::-1, nx / 2:], tmp[::-1, :nx / 2], axis=1) aod.resize(aod.size + 1) data = cl.var(values, attrv=[lats, lons]) dimname = vars[key]['dims'] dimsize = data.shape vhr = lt.find_runlength(dtg_init, dtg_loop) / 3600 aod[-1] = (data, model, dtg, dtg_loop, vhr, 'global', key, 0, dimname, dimsize, '', long_name) handle.close() if aod.size == 0: raise IOError, 'All macc fields missing' return aod
def read_ngacfcst(dtg, path=dir_prod + '/NGAC', require=['dust_aod']): ''' dtg: First valid time retrieved, dtg_vald if fstrt!=0 fstrt: How far into a forecast to start ( dtg - fstrt = dtg_init ) nt: Number of timesteps retrieved require:Species to not produce error ''' dbg(dtg) from netCDF4 import Dataset, Variable model = 'NGAC' species = [s.lower() for s in mod_dict[model]['specs']] dtg_fcst = dtg prefix = '/'.join((path, dtg_fcst[:6], dtg_fcst)) aod = cl.model_object() vars = lm.aod_vars() finc = 3 file_spec = prefix + '_aod_550_ngac.nc' for spec in species: long_name = vars[spec]['long_name'] if not os.path.isfile(file_spec): error = file_spec + ' is missing' dbg(error, l=2) if spec in require: raise IOError, error else: dbg(file_spec, l=2) handle = Dataset(file_spec, mode='r', format='NETCDF3_CLASSIC') null, ny, nx = handle.variables[spec].shape lons = handle.variables['lon'][:] lats = handle.variables['lat'][:] times_all = handle.variables['time'][:] times = handle.variables['time'][:] dt = (times[1] - times[0]) / 3600 dtg_end = lt.epoch2dtg(times[-1]) nt = len(times) dtg_init = lt.epoch2dtg(times_all[0]) #_Make loop dict for time index dtg2t_ind = {} for t in np.arange(len(times_all)): dtg2t_ind[lt.epoch2dtg(times_all[t])] = t for time in times: #np.arange(nt): t = times.tolist().index(time) vhr = t * finc dtg_loop = lt.epoch2dtg(time) tmp = handle.variables[spec][t, :, :] #_Certain time periods need to be scaled if dtg_loop >= '2011071300' \ and dtg_loop <= '2012012000': tmp = tmp / 10. attrv = ( lats, lons, ) data = cl.var(tmp, attrv=attrv) dimname = vars[spec]['dims'] dimsize = data.shape aod.resize(aod.size + 1) aod[-1] = (data, model, dtg_init, dtg_loop, vhr, 'global', spec, 0, dimname, dimsize, '', long_name) handle.close() #_Check for required species if aod.size == 0: raise IOError, 'missing fields' return aod
def join_icap(aod, fhr=120, fstrt=0, nt=None, finc=6, members=lm.current_icap(), **kwargs): ''' Put all icap forecasts on common NAAPS grid for usage with ensemble statistics require_all limits the returned values to timesteps when every model present (at all) can provide data. So if MACC is in the mix, no 00z. ''' if 'ICAP' in members: members.remove('ICAP') dbg(aod.size) #_Calculate last dtg species = [s.lower() for s in mod_dict['ICAP']['specs']] nx = mod_dict['ICAP']['nx'] ny = mod_dict['ICAP']['ny'] lons = np.linspace(-179.5, 179.5, nx) lats = np.linspace(-89.5, 89.5, ny) finc = 6 icap = cl.model_object() vars = lm.aod_vars() #_Create list of models with ANY data and icap models #_Loop over species, join models we have for specs dtg_valds = set(aod.dtg_vald) #_list of unique dtgs dtg_init = lt.unique(aod.dtg_init)[0] #_Create array of for missing data ## nan_2darray = np.empty((ny, nx)) ## nan_2darray[:] = NaN #_There's gotta ba shorthand for this nan_2darray = np.zeros((ny, nx)) - 9999. nens_max = len(members) for spec in species: dbg(spec, l=2) long_name = vars[spec]['long_name'] for dtg_vald in dtg_valds: #_make recarray for one dtg, spec, but multiple models aod_sub = ln.subset(aod, variable=spec, model=members, dtg_vald=dtg_vald) #_regrid models aod_rgd = np.empty((nens_max, ny, nx)) aod_rgd[:] = -9999. for e in np.arange(nens_max): #_get model name and append it to dimension name = members[e] #_pull gridded data for specific model tmp = ln.subset(aod_sub, model=name) if tmp.size == 1: #_Should have single rec d = tmp.values[0] x = d.lon y = d.lat #_Regrid model data to icap x/y aod_rgd[e, :, :] = lt.regrid_field(x, y, d, lons, lats).transpose() elif tmp.size == 0: #_Model data missing aod_rgd[e, :, :] = nan_2darray.copy() else: print 'How did this happen?' return -1 #_Get indices that are non-physical neg_idx = np.where(aod_rgd < -1e-5) aod_rgd[neg_idx] = -9999. #_SLOW aod_rgd = np.ma.masked_where(aod_rgd == -9999., aod_rgd) #_Convert to masked array and count present models nens = ln.check_members(aod_rgd) ### miss = ( aod_rgd[:,0,0] == -9999. ).tolist().count(True) ### nens = nens_max - miss data = cl.var(aod_rgd, attrv=( members, lats, lons, )) dimsize = data.shape dimname = ( 'member', 'lat', 'lon', ) vhr = lt.find_runlength(dtg_init, dtg_vald) / 3600 icap.resize(icap.size + 1) icap[-1] = (data, 'ICAP', dtg_init, dtg_vald, vhr, 'global', spec, nens, dimname, dimsize, '', long_name) #_Limit to forecasts every finc hours idx_fhr = np.where(icap.fhr % finc == 0)[0] icap = icap[idx_fhr] return icap
def filter(records, strict_icap=True, members=['NAAPS', 'GEOS5', 'MACC', 'MASINGAR', 'NGAC'], modes=False, **kwargs): ''' Builds recarray model_object() containing only ICAP records that 1. Contain all models in members 2. Contain all species of that member as defined in libmeta records : model_object(), np.recarray() of aod model data members : list, list of names to require to return ''' dbg(records.size) tmp_model = lt.unique(records.model) if tmp_model != ['ICAP']: dbg(('filter for icap only', tmp_model), l=3) return records if not strict_icap: dbg(('icap not set to strict, returning'), l=3) return records #_Make expected species list for each model specs = ln.expected_vars() #_Initialize return object out = cl.model_object() #_REDUCE________________________________________________________________ #_CLEANUP_______________________________________________________________ #_Remove records lacking any of the required members #_Loop over each ICAP record for rec in records: #_take slice to check for masked members mask = rec.values[:, 0, 0].mask v = rec.variable #_loop over each model for this record, see if variable # is both expected and present desired = [] for model in members: #_make list of expected species for each model idx = rec.values.member.tolist().index(model) ### dbg(( rec.values.member, model, idx )) #_make list of indices to keep desired.append(idx) #_see if model is masked, and if so, break loop # leaving record out test = mask if type(mask) == np.bool_ else mask[idx] if test and v in specs[model]: dbg((rec.dtg_vald, v, 'filtered'), l=1) dbg((model, 'was the cause'), l=1) break #_if it makes it passed all members, add to return array else: #_Need to reducse attrv to plug var back into recarry atn, atv = ln.get_attr(rec) mem_idx = atn.index('member') atv[mem_idx] = atv[mem_idx][desired] #_Update dimsize vals = rec.values.copy() vals_out = vals[desired, :, :] rec.dimsize = vals_out.shape #_Put back into original record rec.values = cl.var(vals_out, attrn=atn, attrv=atv) out = lt.merge((out, rec)) dbg(out.size) return out