def read_nmmbfcst(dtg, path=dir_prod + '/NMMB', require=['dust_aod', 'seasalt_aod']): ''' Read NMMB netcdf file aod[dtg][spec] = read_geos5fcst(dtg) dtg = string, start dtg finc = integer, step up to fhr nt = integer, number of timesteps to fhr, normally diagnosed fhr = integer, maximum forecast hour from dtg path = string, directory of MASINGAR files ''' from netCDF4 import Dataset, Variable dtg_fcst = dtg file = '/'.join((path, dtg_fcst[:6], dtg_fcst + '-NMMB_BSC_CTM-ICAP.nc')) dbg(dtg) model = 'NMMB' aod = cl.model_object() vars = lm.aod_vars() species = {'dust_aod550': 'dust_aod', 'salt_aod550': 'seasalt_aod'} #_BSC added seasalt on this date if dtg < '2012112000': del species['salt_aod550'] if not os.path.isfile(file): error = file + ' is missing' dbg(error, l=2) else: dbg(file, l=2) handle = Dataset(file, mode='r', format='NETCDF3_CLASSIC') x, ny, nx = handle.variables['dust_aod550'].shape lons = handle.variables['lon'][nx / 2, 1:] lats = handle.variables['lat'][:, ny / 2] times = handle.variables['time'][:] dtg_init = handle.dtg_init #_Brings dtg to 00z instead of 22 nt = len(times) #_Loop over species dictionary to read in ncdf variables for spec in species: spec_loc = species[spec] long_name = vars[spec_loc]['long_name'] tmp = handle.variables[spec] for t in np.arange(nt): s = times[t] dtg_loop = lt.newdtg(dtg_init, s) vhr = lt.find_runlength( dtg_init, dtg_loop ) \ / 3600 #_Setup attributes and add to recarray attrv = ( lats, lons, ) data = cl.var(tmp[t, :, 1:], attrv=attrv) dimname = vars[spec_loc]['dims'] dimsize = data.shape aod.resize(aod.size + 1) aod[-1] = (data, model, dtg_init, dtg_loop, vhr, 'global', spec_loc, 0, dimname, dimsize, '', long_name) handle.close() #_Check for required species for s in require: size = ln.subset(aod, variable=s).size if size == 0: err = 'Not all ' + model + ' species available' raise RuntimeError, err return aod
def read_masingarfcst(dtg, require=[ 'sulfate_aod', 'dust_aod', 'smoke_aod', 'seasalt_aod', 'total_aod' ], path=dir_prod + '/MASINGAR'): ''' Read MASINGAR netcdf file aod[dtg][spec] = read_geos5fcst(dtg) dtg = string, start dtg finc = integer, step up to fhr nt = integer, number of timesteps to fhr, normally diagnosed fhr = integer, maximum forecast hour from dtg path = string, directory of MASINGAR files ''' from netCDF4 import Dataset, Variable dtg_fcst = dtg file = path + '/' + dtg_fcst[:6] + '/' + dtg_fcst \ + '_aod_masingar.nc' dbg(dtg) model = 'MASINGAR' aod = cl.model_object() vars = lm.aod_vars() fhr = 120 specs = [s.lower() for s in mod_dict[model]['specs']] if os.path.isfile(file) == False: raise IOError, 'WARNING: ' + file + ' is missing.' else: dbg(file, l=2) handle = Dataset(file, mode='r', format='NETCDF3_CLASSIC') null, ny, nx = handle.variables['total'].shape lons = handle.variables['lon'][:] lats = handle.variables['lat'][:] times = handle.variables['time'][:] dtg_init = lt.epoch2dtg(times[0]) #_Brings dtg to 00z instead of 22 dtg_fhr = lt.newdtg(dtg_fcst, fhr) nt = len(times) #_Loop over each variable, store in dictionary for spec in specs: #handle.variables: if spec == 'lat' or spec == 'lon' or spec == 'time': continue long_name = vars[spec]['long_name'] spec_mas = spec.replace('_aod', '') tmp = handle.variables[spec_mas] for t in np.arange(nt): s = times[t] dtg_loop = lt.epoch2dtg(s) if dtg_loop > dtg_fhr: break vhr = lt.find_runlength(dtg_init, dtg_loop) / 3600 attrv = ( lats, lons, ) data = cl.var(handle.variables[spec_mas][t, :, :], attrv=attrv) dimname = vars[spec]['dims'] dimsize = data.shape aod.resize(aod.size + 1) aod[-1] = (data, model, dtg_init, dtg_loop, vhr, 'global', spec, 0, dimname, dimsize, '', long_name) handle.close() #_Check for required species for s in require: size = ln.subset(aod, variable=s).size if size == 0: err = 'Not all ' + model + ' species available' raise RuntimeError, err return aod
def read_geos5fcst(dtg, require=[ 'dust_aod', 'smoke_aod', 'seasalt_aod', 'sulfate_aod', 'total_aod' ], path=dir_prod + '/GEOS5'): ''' Read GEOS-5 netcdf file aod[dtg][spec] = read_geos5fcst(dtg) dtg = string, start dtg finc = integer, step up to fhr nt = integer, number of timesteps to fhr, normally diagnosed fhr = integer, maximum forecast hour from dtg path = string, directory of GEOS-5 files ''' #_These are the species FROM GMAO. # Not the list getting PLOTTED. That # is in the model dictionary from netCDF4 import Dataset, Variable dbg(dtg) aod = cl.model_object() vars = lm.aod_vars() model = 'GEOS5' finc = 6 fhr = 120 species = [ 'dust_aod', 'blackcarbon_aod', 'organiccarbon_aod', 'seasalt_aod', 'sulfate_aod' ] file = path + '/' + dtg[:6] + '/' + dtg + '_aod_geos5.nc' dbg('Reading ' + file, l=2) if os.path.isfile(file) == False: raise IOError, file + ' is missing.' else: handle = Dataset(file, mode='r', format='NETCDF3_CLASSIC') lons = handle.variables['lons'][:] lats = handle.variables['lats'][:] times = handle.variables['times'][:] dtg_init = lt.gsfc_day2dtg(times[0]) #_Brings dtg to 00z, not 22 dtg_fcst = lt.newdtg(dtg_init, fhr) times = times[::finc] nt = len(times) for spec in species: # handle.variables: spec_geos = spec.replace('_aod', '') tmp = handle.variables[spec_geos] long_name = vars[spec]['long_name'] for t in np.arange(nt): days = times[t] dtg_loop = lt.gsfc_day2dtg(days) if dtg_loop > dtg_fcst: break vhr = lt.find_runlength(dtg_init, dtg_loop) / 3600 data = cl.var(tmp[t * finc, :, :], attrv=[lats, lons]) dimname = vars[spec]['dims'] dimsize = data.shape aod.resize(aod.size + 1) aod[-1] = (data, model, dtg_init, dtg_loop, vhr, 'global', spec, 0, dimname, dimsize, '', long_name) #_Replace this with masked_outside if len(np.where(tmp > 1e3)) > 0: raise ValueError, 'GEOS-5 data out of range for ' + \ dtg_loop + ' ' + spec handle.close() #_If we have bc and oc, sum them to generate smoke specie spec_smoke = ['blackcarbon_aod', 'organiccarbon_aod'] dtg_pres = aod.dtg_vald[0] for spec in spec_smoke: test = ln.subset(aod, variable=spec) if test.size > 0: pass else: break else: #-If all species present, sum and generate 'total'i WALTER long_name = vars['smoke_aod']['long_name'] bc = ln.subset(aod, variable='blackcarbon_aod') oc = ln.subset(aod, variable='organiccarbon_aod') #_Get arrays of dtgs available for species dtg_bc = bc.dtg_vald dtg_oc = oc.dtg_vald #_Find where we have both values dtg_smoke = lt.intersection([dtg_bc, dtg_oc]) for dtg_loop in dtg_smoke: if dtg_loop > dtg_fcst: break #_KLUUUUUUUUDGE vhr = lt.find_runlength(dtg_init, dtg_loop) / 3600 bc_loop = ln.subset(bc, dtg_vald=dtg_loop).values[0] oc_loop = ln.subset(oc, dtg_vald=dtg_loop).values[0] attrv = [lats, lons] tmp = np.sum([ bc_loop, oc_loop, ], axis=0) data = cl.var(tmp, attrv=attrv) dimname = vars['smoke_aod']['dims'] dimsize = data.shape aod.resize(aod.size + 1) aod[-1] = (data, model, dtg_init, dtg_loop, vhr, 'global', 'smoke_aod', 0, dimname, dimsize, '', long_name) #_If we have all necessary species, sum total spec_total = [ 'dust_aod', 'seasalt_aod', 'sulfate_aod', 'smoke_aod' ] for spec in spec_total: test = ln.subset(aod, variable=spec, dtg_vald=dtg_loop) if test.size > 0: pass else: dbg('Cannot calc total AOD ' + dtg_loop + ' ' + spec, l=2) break else: #-If bc and oc present, generate 'smoke' specie long_name = vars['total_aod']['long_name'] aod_loop = ln.subset(aod, dtg_vald=dtg_loop) dust_loop = ln.subset(aod_loop, variable=['dust_aod']).values[0] salt_loop = ln.subset(aod_loop, variable=['seasalt_aod']).values[0] sulf_loop = ln.subset(aod_loop, variable=['sulfate_aod']).values[0] smoke_loop = ln.subset(aod_loop, variable=['smoke_aod']).values[0] tmp = np.sum([dust_loop, salt_loop, sulf_loop, smoke_loop], axis=0) attrv = [lats, lons] data = cl.var(tmp, attrv=attrv) dimname = vars['total_aod']['dims'] dimsize = data.shape aod.resize(aod.size + 1) aod[-1] = (data, model, dtg_init, dtg_loop, vhr, 'global', 'total_aod', 0, dimname, dimsize, '', long_name) #_Check for required species for s in require: size = ln.subset(aod, variable=s).size if size == 0: raise RuntimeError, 'Not all GEOS-5 species available' return aod
def read_maccfcst(dtg, path=dir_prod + '/MACC', require=[ 'biomassburning_aod', 'dust_aod', 'seasalt_aod', 'sulfate_aod', 'total_aod' ]): """ MACC files are read together - pass the dtg and a path to build everything before _aod in the filename aod[dtg][spec] = read_maccfcst(dtg) dtg = string, initial date fstrt = integer, start # hours after dtg finc = integer, timestep """ from netCDF4 import Dataset, Variable dbg(dtg) model = 'MACC' aod = cl.model_object() vars = lm.aod_vars() if dtg < '2012102600': species = { 'biomassburning_aod': '210.210', 'blackcarbon_aod': '211.210', 'dust_aod': '209.210', 'organicmatter_aod': '210.210', 'seasalt_aod': '208.210', 'sulfate_aod': '212.210', 'total_aod': '207.210' } else: species = { 'biomassburning_aod': 'bbaod550', 'blackcarbon_aod': 'bcaod550', 'dust_aod': 'duaod550', 'organicmatter_aod': 'omaod550', 'seasalt_aod': 'ssaod550', 'sulfate_aod': 'suaod550', 'total_aod': 'aod550' } dtg_init = dtg prefix = path + '/' + dtg_init[:6] + '/' + dtg_init #_Create list of dtgs to return for spec in species: file_spec = prefix + '_' + spec + '_550_macc.nc' if os.path.isfile(file_spec) == False: error = file_spec + ' is missing' dbg(error, l=2) if spec in require: raise IOError, error else: dbg(('Reading', file_spec), l=2) code = species[spec] key = 'smoke_aod' if spec == 'biomassburning_aod' else spec long_name = vars[key]['long_name'] handle = Dataset(file_spec, mode='r', format='NETCDF3_CLASSIC') null, ny, nx = handle.variables[code].shape lons = np.append(handle.variables['longitude'][nx/2:] \ - 360., handle.variables['longitude'][:nx/2] ) lats = handle.variables['latitude'][::-1] times = handle.variables['time'][:] for time in times: t = times.tolist().index(time) dtg_loop = lt.ecmwf_day2dtg(time) #_Fix the orientation of dateline split tmp = handle.variables[code][t, :, :] values = np.append(tmp[::-1, nx / 2:], tmp[::-1, :nx / 2], axis=1) aod.resize(aod.size + 1) data = cl.var(values, attrv=[lats, lons]) dimname = vars[key]['dims'] dimsize = data.shape vhr = lt.find_runlength(dtg_init, dtg_loop) / 3600 aod[-1] = (data, model, dtg, dtg_loop, vhr, 'global', key, 0, dimname, dimsize, '', long_name) handle.close() if aod.size == 0: raise IOError, 'All macc fields missing' return aod
def read_ngacfcst(dtg, path=dir_prod + '/NGAC', require=['dust_aod']): ''' dtg: First valid time retrieved, dtg_vald if fstrt!=0 fstrt: How far into a forecast to start ( dtg - fstrt = dtg_init ) nt: Number of timesteps retrieved require:Species to not produce error ''' dbg(dtg) from netCDF4 import Dataset, Variable model = 'NGAC' species = [s.lower() for s in mod_dict[model]['specs']] dtg_fcst = dtg prefix = '/'.join((path, dtg_fcst[:6], dtg_fcst)) aod = cl.model_object() vars = lm.aod_vars() finc = 3 file_spec = prefix + '_aod_550_ngac.nc' for spec in species: long_name = vars[spec]['long_name'] if not os.path.isfile(file_spec): error = file_spec + ' is missing' dbg(error, l=2) if spec in require: raise IOError, error else: dbg(file_spec, l=2) handle = Dataset(file_spec, mode='r', format='NETCDF3_CLASSIC') null, ny, nx = handle.variables[spec].shape lons = handle.variables['lon'][:] lats = handle.variables['lat'][:] times_all = handle.variables['time'][:] times = handle.variables['time'][:] dt = (times[1] - times[0]) / 3600 dtg_end = lt.epoch2dtg(times[-1]) nt = len(times) dtg_init = lt.epoch2dtg(times_all[0]) #_Make loop dict for time index dtg2t_ind = {} for t in np.arange(len(times_all)): dtg2t_ind[lt.epoch2dtg(times_all[t])] = t for time in times: #np.arange(nt): t = times.tolist().index(time) vhr = t * finc dtg_loop = lt.epoch2dtg(time) tmp = handle.variables[spec][t, :, :] #_Certain time periods need to be scaled if dtg_loop >= '2011071300' \ and dtg_loop <= '2012012000': tmp = tmp / 10. attrv = ( lats, lons, ) data = cl.var(tmp, attrv=attrv) dimname = vars[spec]['dims'] dimsize = data.shape aod.resize(aod.size + 1) aod[-1] = (data, model, dtg_init, dtg_loop, vhr, 'global', spec, 0, dimname, dimsize, '', long_name) handle.close() #_Check for required species if aod.size == 0: raise IOError, 'missing fields' return aod
def join_icap(aod, fhr=120, fstrt=0, nt=None, finc=6, members=lm.current_icap(), **kwargs): ''' Put all icap forecasts on common NAAPS grid for usage with ensemble statistics require_all limits the returned values to timesteps when every model present (at all) can provide data. So if MACC is in the mix, no 00z. ''' if 'ICAP' in members: members.remove('ICAP') dbg(aod.size) #_Calculate last dtg species = [s.lower() for s in mod_dict['ICAP']['specs']] nx = mod_dict['ICAP']['nx'] ny = mod_dict['ICAP']['ny'] lons = np.linspace(-179.5, 179.5, nx) lats = np.linspace(-89.5, 89.5, ny) finc = 6 icap = cl.model_object() vars = lm.aod_vars() #_Create list of models with ANY data and icap models #_Loop over species, join models we have for specs dtg_valds = set(aod.dtg_vald) #_list of unique dtgs dtg_init = lt.unique(aod.dtg_init)[0] #_Create array of for missing data ## nan_2darray = np.empty((ny, nx)) ## nan_2darray[:] = NaN #_There's gotta ba shorthand for this nan_2darray = np.zeros((ny, nx)) - 9999. nens_max = len(members) for spec in species: dbg(spec, l=2) long_name = vars[spec]['long_name'] for dtg_vald in dtg_valds: #_make recarray for one dtg, spec, but multiple models aod_sub = ln.subset(aod, variable=spec, model=members, dtg_vald=dtg_vald) #_regrid models aod_rgd = np.empty((nens_max, ny, nx)) aod_rgd[:] = -9999. for e in np.arange(nens_max): #_get model name and append it to dimension name = members[e] #_pull gridded data for specific model tmp = ln.subset(aod_sub, model=name) if tmp.size == 1: #_Should have single rec d = tmp.values[0] x = d.lon y = d.lat #_Regrid model data to icap x/y aod_rgd[e, :, :] = lt.regrid_field(x, y, d, lons, lats).transpose() elif tmp.size == 0: #_Model data missing aod_rgd[e, :, :] = nan_2darray.copy() else: print 'How did this happen?' return -1 #_Get indices that are non-physical neg_idx = np.where(aod_rgd < -1e-5) aod_rgd[neg_idx] = -9999. #_SLOW aod_rgd = np.ma.masked_where(aod_rgd == -9999., aod_rgd) #_Convert to masked array and count present models nens = ln.check_members(aod_rgd) ### miss = ( aod_rgd[:,0,0] == -9999. ).tolist().count(True) ### nens = nens_max - miss data = cl.var(aod_rgd, attrv=( members, lats, lons, )) dimsize = data.shape dimname = ( 'member', 'lat', 'lon', ) vhr = lt.find_runlength(dtg_init, dtg_vald) / 3600 icap.resize(icap.size + 1) icap[-1] = (data, 'ICAP', dtg_init, dtg_vald, vhr, 'global', spec, nens, dimname, dimsize, '', long_name) #_Limit to forecasts every finc hours idx_fhr = np.where(icap.fhr % finc == 0)[0] icap = icap[idx_fhr] return icap