def read_data_matched(fname='matched_data_live.nc', mask=True, **kwargs):
    ''' read in file produced by write_matched_data() '''
    from netCDF4 import Dataset
    from numpy import zeros
    from libclass import var
    from libgeo import planck_inv
    from lblrtm_utils import AIRS_ancillary
    from numpy.ma import masked_where

    cdf = Dataset(fname, 'r')

    nw = len(cdf.dimensions['wavenumber'])
    nt = len(cdf.dimensions['time'])

    data = Matched_data(size=nt)

    wvn = cdf.variables['wavenumber'][:]
    cld = cdf.variables['cloud']
    dis = cdf.variables['lbldis']
    air = cdf.variables['airs']
    lat = cdf.variables['latitude']
    lon = cdf.variables['longitude']
    tim = cdf.variables['time']

    arg = {'attrn': ['wavenumber'], 'attrv': [wvn]}
    mask_l2 = AIRS_ancillary().l2_ignore

    for n in xrange(nt):
        lbldis = dis[n, :]
        airs = air[n, :]

        #_blank out bad channels
        if mask:
            lbldis = masked_where(mask_l2, lbldis)
            airs = masked_where(mask_l2, airs)

            lbldis = masked_where(lbldis == -9999, lbldis)
            airs = masked_where(airs == -9999, airs)

        lbldis = var(lbldis, **arg)
        airs = var(airs, **arg)
        lbldis_tb = var(planck_inv(lbldis, wvn, domain='wavenumber'), **arg)
        airs_tb = var(planck_inv(airs, wvn, domain='wavenumber'), **arg)

        data[n] = (cld[n], lbldis, airs, lbldis_tb, airs_tb, lat[n], lon[n],
                   tim[n])

    return data
Beispiel #2
0
def obsnew_field( recs, nx=360, ny=180, **kwargs ):
	'''
	Takes recarray of obsnewdata and puts it into a 2d field for plot

	recs	: cl.sat_object
	
	Will crash if more than 1 dtg pass
	'''
	dtg = lt.unique( recs.dtg, unique=True )

	lat = np.linspace( -89.5, 89.5, ny )
	lon = np.linspace( -179.5, 179.5, nx )
	
	#_create dummy array to hold values
	tau = np.zeros((ny,nx)) - 9999.
	
	#_convert lats and lons to indices
	i, j = lt.ll2ij( recs.lat, recs.lon, lat, lon )

	tau[j,i] = recs.tau
	idx = np.where( tau == -9999. )
	tau[idx] = 0.
##	tau = np.ma.masked_where( tau == -9999., tau )
##	tau = np.ma.MaskedArray.filled( tau, 0. )
##	tau = np.linspace(0.1,0.8,nx*ny).reshape(ny,nx)
	tau = cl.var( tau, attrv=(lat,lon) )
	return tau
Beispiel #3
0
def read_forecasts( path_fcst=os.environ['PRODUCTS']+'/icap/', label='2012FL',
	sites=None, **kwargs ):
	'''
	Reads merged forecast files produced by ICAP.merge_data.py
	These are collacted with AERONET sites	
	'''
	from glob import glob
	import libtools as lt
	import libnva   as ln
	import libclass as cl
	import numpy    as np
	from multiprocessing import Queue, Process

	path_fcst = '/'.join((path_fcst, 'seasonal_statistics-'+label, 'points'))
	if sites == None:
		files = glob( path_fcst + '/*-*-'+label+'.nc' ) #_AFTER
	else: 
		files = []
		for site in sites:
			files.extend(glob(path_fcst+'/*-'+site+'-'+label+'.nc'))

	records = False
	print( path_fcst )
	dtype = [       ('values', cl.var),
	                ('model', 'a16'),
	                ('epoch', 'i8'),
	                ('fhr', 'i4'),
	                ('code', 'a10'),
	                ('lat', 'f4'),
	                ('lon', 'f4'),
	                ('variable', 'a20'),
	                ('ensemble', 'i4'),
	                ('dimname', tuple),
	                ('dimsize', tuple) ]
	records = np.recarray( (0,), dtype=dtype )

	groups = lt.setup_groups( files, **kwargs )
	for group in groups:
	        l = len(group)
	        q = [None]*l
	        t = [None]*l
	        for i in xrange(l):
	                file = group[i]
	                q[i] = Queue()
	                args = (file,)
	                kwargs.update({'pipe':q[i]})
	                t[i] = Process( target=ln.read_period_points,
	                        args=args, kwargs=kwargs )
	                t[i].start()

	        for i in xrange(l):
	                tmp, attrv = q[i].get()
	                for rec in tmp:
	                        rec.values = cl.var( rec.values,
	                                attrn=['member'], attrv=attrv )
	                records = lt.merge([records,tmp])

	ln.check_member_order( records )

	return records
Beispiel #4
0
def read_masingarfcst(dtg,
                      require=[
                          'sulfate_aod', 'dust_aod', 'smoke_aod',
                          'seasalt_aod', 'total_aod'
                      ],
                      path=dir_prod + '/MASINGAR'):
    '''
	  Read MASINGAR netcdf file
	  aod[dtg][spec] = read_geos5fcst(dtg)
		dtg	= string, 	start dtg
		finc	= integer,	step up to fhr
		nt	= integer,	number of timesteps to fhr, 
					normally diagnosed
		fhr	= integer,	maximum forecast hour from dtg
		path	= string,	directory of MASINGAR files
	'''
    from netCDF4 import Dataset, Variable
    dtg_fcst = dtg
    file  = path + '/' + dtg_fcst[:6] + '/' + dtg_fcst \
      + '_aod_masingar.nc'
    dbg(dtg)

    model = 'MASINGAR'
    aod = cl.model_object()
    vars = lm.aod_vars()

    fhr = 120
    specs = [s.lower() for s in mod_dict[model]['specs']]
    if os.path.isfile(file) == False:
        raise IOError, 'WARNING: ' + file + ' is missing.'
    else:
        dbg(file, l=2)
        handle = Dataset(file, mode='r', format='NETCDF3_CLASSIC')
        null, ny, nx = handle.variables['total'].shape
        lons = handle.variables['lon'][:]
        lats = handle.variables['lat'][:]
        times = handle.variables['time'][:]
        dtg_init = lt.epoch2dtg(times[0])  #_Brings dtg to 00z instead of 22
        dtg_fhr = lt.newdtg(dtg_fcst, fhr)
        nt = len(times)

        #_Loop over each variable, store in dictionary
        for spec in specs:  #handle.variables:
            if spec == 'lat' or spec == 'lon' or spec == 'time':
                continue
            long_name = vars[spec]['long_name']
            spec_mas = spec.replace('_aod', '')
            tmp = handle.variables[spec_mas]
            for t in np.arange(nt):
                s = times[t]
                dtg_loop = lt.epoch2dtg(s)
                if dtg_loop > dtg_fhr: break

                vhr = lt.find_runlength(dtg_init, dtg_loop) / 3600

                attrv = (
                    lats,
                    lons,
                )
                data = cl.var(handle.variables[spec_mas][t, :, :], attrv=attrv)

                dimname = vars[spec]['dims']
                dimsize = data.shape

                aod.resize(aod.size + 1)
                aod[-1] = (data, model, dtg_init, dtg_loop, vhr, 'global',
                           spec, 0, dimname, dimsize, '', long_name)

        handle.close()

    #_Check for required species
    for s in require:
        size = ln.subset(aod, variable=s).size
        if size == 0:
            err = 'Not all ' + model + ' species available'
            raise RuntimeError, err

    return aod
Beispiel #5
0
def read_nmmbfcst(dtg,
                  path=dir_prod + '/NMMB',
                  require=['dust_aod', 'seasalt_aod']):
    '''
	  Read NMMB netcdf file
	  aod[dtg][spec] = read_geos5fcst(dtg)
		dtg	= string, 	start dtg
		finc	= integer,	step up to fhr
		nt	= integer,	number of timesteps to fhr, 
					normally diagnosed
		fhr	= integer,	maximum forecast hour from dtg
		path	= string,	directory of MASINGAR files
	'''
    from netCDF4 import Dataset, Variable
    dtg_fcst = dtg
    file = '/'.join((path, dtg_fcst[:6], dtg_fcst + '-NMMB_BSC_CTM-ICAP.nc'))
    dbg(dtg)
    model = 'NMMB'
    aod = cl.model_object()
    vars = lm.aod_vars()

    species = {'dust_aod550': 'dust_aod', 'salt_aod550': 'seasalt_aod'}

    #_BSC added seasalt on this date
    if dtg < '2012112000': del species['salt_aod550']

    if not os.path.isfile(file):
        error = file + ' is missing'
        dbg(error, l=2)
    else:
        dbg(file, l=2)
        handle = Dataset(file, mode='r', format='NETCDF3_CLASSIC')
        x, ny, nx = handle.variables['dust_aod550'].shape
        lons = handle.variables['lon'][nx / 2, 1:]
        lats = handle.variables['lat'][:, ny / 2]
        times = handle.variables['time'][:]
        dtg_init = handle.dtg_init  #_Brings dtg to 00z instead of 22
        nt = len(times)

        #_Loop over species dictionary to read in ncdf variables
        for spec in species:
            spec_loc = species[spec]
            long_name = vars[spec_loc]['long_name']
            tmp = handle.variables[spec]
            for t in np.arange(nt):
                s = times[t]
                dtg_loop = lt.newdtg(dtg_init, s)
                vhr = lt.find_runlength( dtg_init, dtg_loop ) \
                   / 3600

                #_Setup attributes and add to recarray
                attrv = (
                    lats,
                    lons,
                )
                data = cl.var(tmp[t, :, 1:], attrv=attrv)
                dimname = vars[spec_loc]['dims']
                dimsize = data.shape
                aod.resize(aod.size + 1)
                aod[-1] = (data, model, dtg_init, dtg_loop, vhr, 'global',
                           spec_loc, 0, dimname, dimsize, '', long_name)
        handle.close()

    #_Check for required species
    for s in require:
        size = ln.subset(aod, variable=s).size
        if size == 0:
            err = 'Not all ' + model + ' species available'
            raise RuntimeError, err
    return aod
Beispiel #6
0
def read_geos5fcst(dtg,
                   require=[
                       'dust_aod', 'smoke_aod', 'seasalt_aod', 'sulfate_aod',
                       'total_aod'
                   ],
                   path=dir_prod + '/GEOS5'):
    '''
	  Read GEOS-5 netcdf file
	  aod[dtg][spec] = read_geos5fcst(dtg)
		dtg	= string, 	start dtg
		finc	= integer,	step up to fhr
		nt	= integer,	number of timesteps to fhr, 
					normally diagnosed
		fhr	= integer,	maximum forecast hour from dtg
		path	= string,	directory of GEOS-5 files
	  '''
    #_These are the species FROM GMAO.
    # Not the list getting PLOTTED. That
    # is in the model dictionary
    from netCDF4 import Dataset, Variable
    dbg(dtg)
    aod = cl.model_object()
    vars = lm.aod_vars()
    model = 'GEOS5'
    finc = 6
    fhr = 120
    species = [
        'dust_aod', 'blackcarbon_aod', 'organiccarbon_aod', 'seasalt_aod',
        'sulfate_aod'
    ]
    file = path + '/' + dtg[:6] + '/' + dtg + '_aod_geos5.nc'
    dbg('Reading ' + file, l=2)

    if os.path.isfile(file) == False:
        raise IOError, file + ' is missing.'
    else:
        handle = Dataset(file, mode='r', format='NETCDF3_CLASSIC')
        lons = handle.variables['lons'][:]
        lats = handle.variables['lats'][:]
        times = handle.variables['times'][:]
        dtg_init = lt.gsfc_day2dtg(times[0])  #_Brings dtg to 00z, not 22
        dtg_fcst = lt.newdtg(dtg_init, fhr)
        times = times[::finc]
        nt = len(times)

        for spec in species:  # handle.variables:
            spec_geos = spec.replace('_aod', '')
            tmp = handle.variables[spec_geos]
            long_name = vars[spec]['long_name']
            for t in np.arange(nt):
                days = times[t]
                dtg_loop = lt.gsfc_day2dtg(days)
                if dtg_loop > dtg_fcst:
                    break

                vhr = lt.find_runlength(dtg_init, dtg_loop) / 3600
                data = cl.var(tmp[t * finc, :, :], attrv=[lats, lons])

                dimname = vars[spec]['dims']
                dimsize = data.shape

                aod.resize(aod.size + 1)
                aod[-1] = (data, model, dtg_init, dtg_loop, vhr, 'global',
                           spec, 0, dimname, dimsize, '', long_name)

                #_Replace this with masked_outside
                if len(np.where(tmp > 1e3)) > 0:
                    raise ValueError, 'GEOS-5 data out of range for ' + \
                     dtg_loop + ' ' + spec

        handle.close()

        #_If we have bc and oc, sum them to generate smoke specie
        spec_smoke = ['blackcarbon_aod', 'organiccarbon_aod']
        dtg_pres = aod.dtg_vald[0]
        for spec in spec_smoke:
            test = ln.subset(aod, variable=spec)
            if test.size > 0:
                pass
            else:
                break
        else:  #-If all species present, sum and generate 'total'i WALTER
            long_name = vars['smoke_aod']['long_name']
            bc = ln.subset(aod, variable='blackcarbon_aod')
            oc = ln.subset(aod, variable='organiccarbon_aod')

            #_Get arrays of dtgs available for species
            dtg_bc = bc.dtg_vald
            dtg_oc = oc.dtg_vald

            #_Find where we have both values
            dtg_smoke = lt.intersection([dtg_bc, dtg_oc])

            for dtg_loop in dtg_smoke:
                if dtg_loop > dtg_fcst: break  #_KLUUUUUUUUDGE

                vhr = lt.find_runlength(dtg_init, dtg_loop) / 3600
                bc_loop = ln.subset(bc, dtg_vald=dtg_loop).values[0]
                oc_loop = ln.subset(oc, dtg_vald=dtg_loop).values[0]

                attrv = [lats, lons]
                tmp = np.sum([
                    bc_loop,
                    oc_loop,
                ], axis=0)
                data = cl.var(tmp, attrv=attrv)
                dimname = vars['smoke_aod']['dims']
                dimsize = data.shape
                aod.resize(aod.size + 1)
                aod[-1] = (data, model, dtg_init, dtg_loop, vhr, 'global',
                           'smoke_aod', 0, dimname, dimsize, '', long_name)

                #_If we have all necessary species, sum total
                spec_total = [
                    'dust_aod', 'seasalt_aod', 'sulfate_aod', 'smoke_aod'
                ]
                for spec in spec_total:
                    test = ln.subset(aod, variable=spec, dtg_vald=dtg_loop)
                    if test.size > 0:
                        pass
                    else:
                        dbg('Cannot calc total AOD ' + dtg_loop + ' ' + spec,
                            l=2)
                        break
                else:  #-If bc and oc present, generate 'smoke' specie
                    long_name = vars['total_aod']['long_name']
                    aod_loop = ln.subset(aod, dtg_vald=dtg_loop)
                    dust_loop = ln.subset(aod_loop,
                                          variable=['dust_aod']).values[0]
                    salt_loop = ln.subset(aod_loop,
                                          variable=['seasalt_aod']).values[0]
                    sulf_loop = ln.subset(aod_loop,
                                          variable=['sulfate_aod']).values[0]
                    smoke_loop = ln.subset(aod_loop,
                                           variable=['smoke_aod']).values[0]
                    tmp = np.sum([dust_loop, salt_loop, sulf_loop, smoke_loop],
                                 axis=0)
                    attrv = [lats, lons]
                    data = cl.var(tmp, attrv=attrv)

                    dimname = vars['total_aod']['dims']
                    dimsize = data.shape

                    aod.resize(aod.size + 1)
                    aod[-1] = (data, model, dtg_init, dtg_loop, vhr, 'global',
                               'total_aod', 0, dimname, dimsize, '', long_name)

    #_Check for required species
    for s in require:
        size = ln.subset(aod, variable=s).size
        if size == 0:
            raise RuntimeError, 'Not all GEOS-5 species available'
    return aod
Beispiel #7
0
def read_maccfcst(dtg,
                  path=dir_prod + '/MACC',
                  require=[
                      'biomassburning_aod', 'dust_aod', 'seasalt_aod',
                      'sulfate_aod', 'total_aod'
                  ]):
    """
	  MACC files are read together - pass the dtg and a path 
		to build everything before _aod in the filename
	  aod[dtg][spec]	= read_maccfcst(dtg)
		dtg		= string, 		initial date
		fstrt		= integer,		start # hours after dtg
		finc		= integer,		timestep
	"""
    from netCDF4 import Dataset, Variable
    dbg(dtg)
    model = 'MACC'
    aod = cl.model_object()
    vars = lm.aod_vars()
    if dtg < '2012102600':
        species = {
            'biomassburning_aod': '210.210',
            'blackcarbon_aod': '211.210',
            'dust_aod': '209.210',
            'organicmatter_aod': '210.210',
            'seasalt_aod': '208.210',
            'sulfate_aod': '212.210',
            'total_aod': '207.210'
        }
    else:
        species = {
            'biomassburning_aod': 'bbaod550',
            'blackcarbon_aod': 'bcaod550',
            'dust_aod': 'duaod550',
            'organicmatter_aod': 'omaod550',
            'seasalt_aod': 'ssaod550',
            'sulfate_aod': 'suaod550',
            'total_aod': 'aod550'
        }

    dtg_init = dtg
    prefix = path + '/' + dtg_init[:6] + '/' + dtg_init

    #_Create list of dtgs to return

    for spec in species:
        file_spec = prefix + '_' + spec + '_550_macc.nc'
        if os.path.isfile(file_spec) == False:
            error = file_spec + ' is missing'
            dbg(error, l=2)
            if spec in require: raise IOError, error
        else:
            dbg(('Reading', file_spec), l=2)
            code = species[spec]
            key = 'smoke_aod' if spec == 'biomassburning_aod' else spec
            long_name = vars[key]['long_name']
            handle = Dataset(file_spec, mode='r', format='NETCDF3_CLASSIC')
            null, ny, nx = handle.variables[code].shape
            lons = np.append(handle.variables['longitude'][nx/2:] \
             - 360., handle.variables['longitude'][:nx/2] )
            lats = handle.variables['latitude'][::-1]
            times = handle.variables['time'][:]

            for time in times:
                t = times.tolist().index(time)
                dtg_loop = lt.ecmwf_day2dtg(time)

                #_Fix the orientation of dateline split
                tmp = handle.variables[code][t, :, :]
                values = np.append(tmp[::-1, nx / 2:],
                                   tmp[::-1, :nx / 2],
                                   axis=1)

                aod.resize(aod.size + 1)
                data = cl.var(values, attrv=[lats, lons])
                dimname = vars[key]['dims']
                dimsize = data.shape
                vhr = lt.find_runlength(dtg_init, dtg_loop) / 3600
                aod[-1] = (data, model, dtg, dtg_loop, vhr, 'global', key, 0,
                           dimname, dimsize, '', long_name)
            handle.close()

    if aod.size == 0: raise IOError, 'All macc fields missing'

    return aod
Beispiel #8
0
def read_ngacfcst(dtg, path=dir_prod + '/NGAC', require=['dust_aod']):
    '''
	dtg:	First valid time retrieved, dtg_vald if fstrt!=0
	fstrt:	How far into a forecast to start ( dtg - fstrt = dtg_init )
	nt:	Number of timesteps retrieved
	require:Species to not produce error 
	'''
    dbg(dtg)
    from netCDF4 import Dataset, Variable
    model = 'NGAC'
    species = [s.lower() for s in mod_dict[model]['specs']]
    dtg_fcst = dtg
    prefix = '/'.join((path, dtg_fcst[:6], dtg_fcst))
    aod = cl.model_object()
    vars = lm.aod_vars()

    finc = 3
    file_spec = prefix + '_aod_550_ngac.nc'
    for spec in species:
        long_name = vars[spec]['long_name']
        if not os.path.isfile(file_spec):
            error = file_spec + ' is missing'
            dbg(error, l=2)
            if spec in require: raise IOError, error
        else:
            dbg(file_spec, l=2)
            handle = Dataset(file_spec, mode='r', format='NETCDF3_CLASSIC')
            null, ny, nx = handle.variables[spec].shape
            lons = handle.variables['lon'][:]
            lats = handle.variables['lat'][:]
            times_all = handle.variables['time'][:]
            times = handle.variables['time'][:]
            dt = (times[1] - times[0]) / 3600
            dtg_end = lt.epoch2dtg(times[-1])
            nt = len(times)

            dtg_init = lt.epoch2dtg(times_all[0])

            #_Make loop dict for time index
            dtg2t_ind = {}
            for t in np.arange(len(times_all)):
                dtg2t_ind[lt.epoch2dtg(times_all[t])] = t

            for time in times:  #np.arange(nt):
                t = times.tolist().index(time)
                vhr = t * finc
                dtg_loop = lt.epoch2dtg(time)

                tmp = handle.variables[spec][t, :, :]

                #_Certain time periods need to be scaled
                if dtg_loop >= '2011071300' \
                and dtg_loop <= '2012012000':
                    tmp = tmp / 10.

                attrv = (
                    lats,
                    lons,
                )
                data = cl.var(tmp, attrv=attrv)
                dimname = vars[spec]['dims']
                dimsize = data.shape
                aod.resize(aod.size + 1)
                aod[-1] = (data, model, dtg_init, dtg_loop, vhr, 'global',
                           spec, 0, dimname, dimsize, '', long_name)
            handle.close()

    #_Check for required species
    if aod.size == 0: raise IOError, 'missing fields'

    return aod
Beispiel #9
0
def join_icap(aod,
              fhr=120,
              fstrt=0,
              nt=None,
              finc=6,
              members=lm.current_icap(),
              **kwargs):
    '''
	Put all icap forecasts on common NAAPS grid for usage with ensemble 
	statistics
	
	require_all limits the returned values to timesteps when every model 
	present (at all) 
		can provide data.  So if MACC is in the mix, no 00z.
	'''
    if 'ICAP' in members: members.remove('ICAP')
    dbg(aod.size)

    #_Calculate last dtg
    species = [s.lower() for s in mod_dict['ICAP']['specs']]
    nx = mod_dict['ICAP']['nx']
    ny = mod_dict['ICAP']['ny']
    lons = np.linspace(-179.5, 179.5, nx)
    lats = np.linspace(-89.5, 89.5, ny)
    finc = 6

    icap = cl.model_object()
    vars = lm.aod_vars()

    #_Create list of models with ANY data and icap models
    #_Loop over species, join models we have for specs
    dtg_valds = set(aod.dtg_vald)  #_list of unique dtgs
    dtg_init = lt.unique(aod.dtg_init)[0]

    #_Create array of for missing data
    ##	nan_2darray = np.empty((ny, nx))
    ##	nan_2darray[:] = NaN #_There's gotta ba shorthand for this
    nan_2darray = np.zeros((ny, nx)) - 9999.

    nens_max = len(members)
    for spec in species:
        dbg(spec, l=2)
        long_name = vars[spec]['long_name']
        for dtg_vald in dtg_valds:
            #_make recarray for one dtg, spec, but multiple models
            aod_sub = ln.subset(aod,
                                variable=spec,
                                model=members,
                                dtg_vald=dtg_vald)

            #_regrid models
            aod_rgd = np.empty((nens_max, ny, nx))
            aod_rgd[:] = -9999.
            for e in np.arange(nens_max):
                #_get model name and append it to dimension
                name = members[e]

                #_pull gridded data for specific model
                tmp = ln.subset(aod_sub, model=name)
                if tmp.size == 1:  #_Should have single rec
                    d = tmp.values[0]
                    x = d.lon
                    y = d.lat

                    #_Regrid model data to icap x/y
                    aod_rgd[e, :, :] = lt.regrid_field(x, y, d, lons,
                                                       lats).transpose()
                elif tmp.size == 0:  #_Model data missing
                    aod_rgd[e, :, :] = nan_2darray.copy()
                else:
                    print 'How did this happen?'
                    return -1

            #_Get indices that are non-physical
            neg_idx = np.where(aod_rgd < -1e-5)
            aod_rgd[neg_idx] = -9999.  #_SLOW
            aod_rgd = np.ma.masked_where(aod_rgd == -9999., aod_rgd)

            #_Convert to masked array and count present models
            nens = ln.check_members(aod_rgd)
            ###			miss = ( aod_rgd[:,0,0] == -9999. ).tolist().count(True)
            ###			nens = nens_max - miss

            data = cl.var(aod_rgd, attrv=(
                members,
                lats,
                lons,
            ))
            dimsize = data.shape
            dimname = (
                'member',
                'lat',
                'lon',
            )
            vhr = lt.find_runlength(dtg_init, dtg_vald) / 3600

            icap.resize(icap.size + 1)
            icap[-1] = (data, 'ICAP', dtg_init, dtg_vald, vhr, 'global', spec,
                        nens, dimname, dimsize, '', long_name)

    #_Limit to forecasts every finc hours
    idx_fhr = np.where(icap.fhr % finc == 0)[0]
    icap = icap[idx_fhr]

    return icap
Beispiel #10
0
def filter(records,
           strict_icap=True,
           members=['NAAPS', 'GEOS5', 'MACC', 'MASINGAR', 'NGAC'],
           modes=False,
           **kwargs):
    '''
	Builds recarray model_object() containing only ICAP records that
		1. Contain all models in members
		2. Contain all species of that member as defined in libmeta

	records	: model_object(),	np.recarray() of aod model data
	members	: list,			list of names to require to return

	'''
    dbg(records.size)
    tmp_model = lt.unique(records.model)
    if tmp_model != ['ICAP']:
        dbg(('filter for icap only', tmp_model), l=3)
        return records
    if not strict_icap:
        dbg(('icap not set to strict, returning'), l=3)
        return records

    #_Make expected species list for each model
    specs = ln.expected_vars()

    #_Initialize return object
    out = cl.model_object()

    #_REDUCE________________________________________________________________
    #_CLEANUP_______________________________________________________________
    #_Remove records lacking any of the required members
    #_Loop over each ICAP record
    for rec in records:
        #_take slice to check for masked members
        mask = rec.values[:, 0, 0].mask
        v = rec.variable

        #_loop over each model for this record, see if variable
        # is both expected and present
        desired = []
        for model in members:
            #_make list of expected species for each model
            idx = rec.values.member.tolist().index(model)
            ###			dbg(( rec.values.member, model, idx ))
            #_make list of indices to keep
            desired.append(idx)

            #_see if model is masked, and if so, break loop
            # leaving record out
            test = mask if type(mask) == np.bool_ else mask[idx]
            if test and v in specs[model]:
                dbg((rec.dtg_vald, v, 'filtered'), l=1)
                dbg((model, 'was the cause'), l=1)
                break

        #_if it makes it passed all members, add to return array
        else:
            #_Need to reducse attrv to plug var back into recarry
            atn, atv = ln.get_attr(rec)
            mem_idx = atn.index('member')
            atv[mem_idx] = atv[mem_idx][desired]

            #_Update dimsize
            vals = rec.values.copy()
            vals_out = vals[desired, :, :]
            rec.dimsize = vals_out.shape

            #_Put back into original record
            rec.values = cl.var(vals_out, attrn=atn, attrv=atv)
            out = lt.merge((out, rec))

    dbg(out.size)
    return out