Exemple #1
0
def opendappaths(inpaths, opts, verbose):
    omfs = []
    dapdims = opts.get('opendapdims', None)
    for inpath in inpaths:
        if verbose > 1:
            print('Opening', inpath, flush=True)
        tmpf = pnc.pncopen(inpath, format='netcdf')
        omfi = pnc.PseudoNetCDFFile()
        for varkey in opts['datakeys'] + opts['geokeys']:
            if verbose > 2:
                print('Processing', varkey, flush=True)
            tmpv = tmpf.variables[varkey]
            for dim, dimlen in zip(tmpv.dimensions, tmpv.shape):
                if dim not in omfi.dimensions:
                    omfi.createDimension(dim, dimlen)
            dtype = tmpv.dtype
            # Aura OMI data is occasionaly stored as an int16
            # and scaled to a float32
            for propkey in ['scale_factor', 'add_offset']:
                if hasattr(tmpv, propkey):
                    stype = getattr(tmpv, propkey).dtype
                    if (dtype.char in ('i', 'h')
                            and stype.char not in ('i', 'h')):
                        dtype = stype

            omfi.copyVariable(tmpv, key=varkey, dtype=dtype)

        if dapdims is not None:
            omfi.renameDimensions(**dapdims, inplace=True)

        omfs.append(omfi)

    return omfs
Exemple #2
0
    def boundingbox(self, path, keys=['time']):
        tmpf = pnc.pncopen(path, format='netcdf')
        out = {}
        if 'time' in keys:
            rtf = pnc.PseudoNetCDFFile()
            rtf.createDimension('time', 1)
            rtf.copyVariable(tmpf['PRODUCT/time'], key='time')
            refdate = rtf.getTimes()[0]
            tunit = refdate.strftime('milliseconds since %F %H:%M:%S+0000')

            tf = pnc.PseudoNetCDFFile()
            tf.createDimension('time', 1)
            tf.copyDimension(tmpf['PRODUCT'].dimensions['scanline'])
            tf.copyVariable(tmpf['PRODUCT/delta_time'], key='time')
            tf.variables['time'].units = tunit
            tf = tf.removeSingleton()
            del tmpf
            times = tf.getTimes()
            out['time'] = times.min(), times.max()

        if 'longitude' in keys:
            longitude = tmpf['PRODUCT/longitude'][:]
            out['longitude'] = longitude.min(), longitude.max()

        if 'longitude' in keys:
            latitude = tmpf['PRODUCT/latitude'][:]
            out['latitude'] = latitude.min(), latitude.max()

        return out
Exemple #3
0
 def __init__(self, path):
     tmpf = pnc.pncopen(path, format='netcdf')
     geogrpk = 'PRODUCT/SUPPORT_DATA/GEOLOCATIONS/'
     outkeys = dict(
         time='PRODUCT/delta_time',
         qa_value='PRODUCT/qa_value',
         latitude='PRODUCT/latitude',
         longitude='PRODUCT/longitude',
         level='PRODUCT/layer',
         hyai='PRODUCT/tm5_constant_a',
         hybi='PRODUCT/tm5_constant_b',
         tropopause_level_index='PRODUCT/tm5_tropopause_layer_index',
         averaging_kernel='PRODUCT/averaging_kernel',
         nitrogendioxide_tropospheric_column=
         'PRODUCT/nitrogendioxide_tropospheric_column',
         air_mass_factor_troposphere='PRODUCT/air_mass_factor_troposphere',
         air_mass_factor_total='PRODUCT/air_mass_factor_total',
         surface_pressure='PRODUCT/SUPPORT_DATA/INPUT_DATA/surface_pressure',
         longitude_bounds=geogrpk + 'longitude_bounds',
         latitude_bounds=geogrpk + 'latitude_bounds',
         viewing_zenith_angle=geogrpk + 'viewing_zenith_angle',
         solar_zenith_angle=geogrpk + 'solar_zenith_angle')
     f = pnc.PseudoNetCDFFile()
     for ok, ik in outkeys.items():
         iv = tmpf[ik]
         for dk, dl in zip(iv.dimensions, iv.shape):
             if dk not in f.dimensions:
                 f.createDimension(dk, dl)
         f.copyVariable(iv, key=ok)
     tf = pnc.PseudoNetCDFFile()
     tf.createDimension('time', 1)
     tf.copyVariable(tmpf['PRODUCT/time'], key='time')
     refdate = tf.getTimes()[0]
     x = np.arange(len(f.dimensions['scanline']))
     y = np.arange(len(f.dimensions['ground_pixel']))
     X, Y = np.meshgrid(x, y)
     outf = f.removeSingleton().slice(scanline=X.ravel(),
                                      ground_pixel=Y.ravel(),
                                      newdims=('retrieval', )).slice(
                                          scanline=X.ravel(),
                                          newdims=('retrieval', )).slice(
                                              ground_pixel=Y.ravel(),
                                              newdims=('retrieval', ))
     outf.renameDimensions(scanline='retrieval', inplace=True)
     outf.renameDimensions(ground_pixel='retrieval', inplace=True)
     tunit = refdate.strftime('milliseconds since %F %H:%M:%S+0000')
     outf.variables['time'].units = tunit
     self.variables = outf.variables
     self.dimensions = outf.dimensions
     self.setncatts(outf.getncatts())
     del tmpf
def bpch_to_netCDF_via_PNC(format='bpch2', filename='ctm.nc',
                           output_file=None, bpch_file=None, folder=None):
    """ Convert bpch to NetCDF using PNC as backend """
    import PseudoNetCDF as pnc
    # Load the file into memory
    infile = pnc.pncopen(bpch_file, format=format)
    # Kludge - reduce DXYP_DXYP dims online
    dxyp = infile.variables['DXYP_DXYP']
    # Surface area should have time dim, if fit does remove it.
    if len(dxyp.shape) == 4:
        dxyp.dimensions = dxyp.dimensions[1:]
        infile.variables['DXYP_DXYP'] = dxyp
    # Now write file to disc
#    pnc.pncwrite(infile, folder+filename)
    pnc.pncwrite(infile, output_file)
Exemple #5
0
def get_sf_data(osse_path, flux_var):
    """
    Gets last gctm.sf.NN file from given path

    Parameters:
        osse_path (str) : directory path where sf files are located
        flux_var  (str) : name of flux variable to consider

    Returns:
        tuple of the following numpy arrays
            - latitude
            - longitude
            - scale factor array with indices [month, lat, lon]

    """
    # find the last scale factor iteration file
    sf_fp = sorted(glob(osse_path + '/gctm.sf*'),
                   key=lambda x: int(x[-2:]))[-1]

    # acquire scale factor pseudo netcdf file
    sf = pnc.pncopen(sf_fp)

    # get latitude and longitude
    lat = sf.variables['latitude'].array()
    lon = sf.variables['longitude'].array()

    # get the scale factors
    sf_arr = sf.variables[flux_var].array()[0, :, :, :]

    return lat, lon, sf_arr
Exemple #6
0
def writeconfig(sectors, outpath):
    hcpaths = []
    for sector in sectors:
        hemco2dpath = hemcotmpl(sector=sector, month=month)
        hemco3dpath = hemco2dpath.replace('0pt1degree', '0pt1degree_3D')
        if os.path.exists(hemco3dpath):
            hcpaths.append(hemco3dpath)
        elif os.path.exists(hemco2dpath):
            hcpaths.append(hemco2dpath)
        else:
            raise KeyError('Could not find regridded: ' + hemco2dpath)

    defaults = set()
    ignores = set()
    with open(outpath, 'w') as hcf:
        hcf.write('(((EPA2016_MONMEAN\n')
        for hcpath in hcpaths:
            hcpatt = changepathtopattern(hcpath)
            sector = getsector(hcpath)
            print(sector, hcpatt, end='', flush=True)
            hcfile = pnc.pncopen(hcpath, format='netcdf')
            for cqkey, v in hcfile.variables.items():
                if cqkey in hcfile.dimensions or cqkey in ('hyai', 'hybi'):
                    continue
                elif cqkey in ('TOLU', ):
                    warn('TOLU mass is duplicated by TOL')
                if cqkey in cq2gc:
                    gctrans = cq2gc.get(cqkey)
                    if len(gctrans) == 0:
                        ignores.add(cqkey)
                else:
                    defaults.add(cqkey)
                    gctrans = [[cqkey, '1007']]
                for gckey, scale in gctrans:
                    if gckey in [
                            'ACET', 'MEK', 'ALD2', 'PRPE', 'PRPA', 'BENZ',
                            'TOLU', 'XYLE', 'EOH', 'ALK4', 'ISOP'
                    ]:
                        units = 'kgC/m2/s'
                    else:
                        units = v.units.strip()
                    opts = dict(unit=units,
                                gckey=gckey,
                                cqkey=cqkey,
                                sector=sector,
                                path=hcpatt,
                                scale=scale,
                                cat='1/2',
                                hier=50)
                    hcf.write(
                        '0 EPA16_{gckey}__{sector}{cqkey} {path}  {cqkey}       2016-2016/1-12/1/0 C xyz  {unit}  {gckey}   {scale}     {cat} {hier}\n'
                        .format(**opts))
                    # If I use - to repeat the file, the mass is from the previous cqkey too.
                    # hcpatt = '-'
            print()
        hcf.write(')))EPA2016_MONMEAN\n')
    print('Ignored', sorted(ignores))
    print('Defaults', sorted(defaults))
def create_sf_dict(dir_path, variable, output_path=None, year=2010):
    """
    Creates a dictionary of data from a collection of gctm.sf.** files. The
    output file is saved as a pickle.

    Parameters:
        dir_path    (str) : directory where code can find the scaling
        variable    (str) : CO2 scaling variable to use in the output
        output_path (str) : save location of output pickle file (if not none)
        year        (int) : starting year for inversion

    Returns:
        dictionary with the following key values
            - time      : numpy array
            - latitude  : numpy array
            - longitude : numpy array
            - sf_array  : numpy array

    NOTE:
    - PseudoNetcdf assumes that the tracerinfo.dat and diaginfo.dat files are
      included in the directory path given.

    TODO:
    - the time dimension in the sf files appear to all point to the same date.
    """
    # create the list of files
    sf_filepaths = sorted(glob.glob(dir_path + 'gctm.sf*'))

    # read in the above
    sf_files = [pnc.pncopen(path) for path in sf_filepaths]

    # get latitude/longitude/time information
    sample_file = sf_files[0]

    latitude = sample_file.variables['latitude'].array()
    longitude = sample_file.variables['longitude'].array()

    time_vals_raw = sample_file.variables['layer9'].array()
    time = [datetime(year, month, day=1) for month in time_vals_raw]

    # concatenate the scaling factors over time
    sf_concat = np.concatenate(
            [i.variables[variable].array() for i in sf_files]
    )

    output_dict = {
        'time': time,
        'latitude': latitude,
        'longitude': longitude,
        'sf_array': sf_concat
    }

    if output_path:
        with open(output_path, 'w') as f:
            pickle.dump(output_dict, f)

    return output_dict
Exemple #8
0
def openfile(path):
    """
    Clean out duplicated days
    """
    mo = int(path[-2:])
    f = pnc.pncopen(path, format='netcdf').subsetVariables(['O3'])
    times = f.getTimes()
    tidx = np.array([ti for ti, t in enumerate(times) if t.month == mo])
    return f.sliceDimensions(TSTEP=tidx)
Exemple #9
0
    def get_timezonefile(self):
        if self.timezonefile is not None:
            return self.timezonefile
        elif os.path.exists(self.timezonepath):
            self.timezonefile = pnc.pncopen(self.timezonepath, format='ioapi')
            return self.get_timezonefile()
        print(
            f'{self.timezonepath} not available;'
            ' calculating UTCOFFSET in hours from longitude...', end=''
        )
        gf = pnc.pncopen(
            self.griddescpath, format='griddesc', GDNAM=self.gdnam
        )
        del gf.variables['TFLAG']
        gf.SDATE = 1970001
        I, J = np.meshgrid(np.arange(gf.NCOLS), np.arange(gf.NROWS))
        lon, lat = gf.ij2ll(I, J)
        utcoffset = (lon / 15)
        tzf = gf.subset([])
        tzvar = tzf.createVariable(
            'UTCOFFSET', 'f', ('TSTEP', 'LAY', 'ROW', 'COL'),
            long_name='UTCOFFSET', var_desc='UTCOFFSET', units='hours'
        )
        tzvar[:] = utcoffset
        mthdvar = tzf.createVariable(
            'METHOD', 'f', ('TSTEP', 'LAY', 'ROW', 'COL'),
            long_name='METHOD', units='none',
            var_desc='METHOD: 0=tz_world.geojson; 1=lon/15'
        )
        mthdvar[:] = 1
        tzf.updatetflag(overwrite=True)
        tzf.updatemeta()
        tzf.FILEDESC = """Calculated TZ from longitude"""
        tzf.HISTORY = """Calculated TZ from longitude"""
        tzf.save(
            self.timezonepath, format='NETCDF4_CLASSIC',
            complevel=1, verbose=0
        ).close()
        print('done')

        return self.get_timezonefile()
Exemple #10
0
def process(args):
    if args.verbose > 1:
        print(f'Opening {args.GRIDDESC} GDNAM={args.GDNAM}', flush=True)
    gf = pnc.pncopen(args.GRIDDESC, format='griddesc', GDNAM=args.GDNAM)
    outpath = args.outpath
    if os.path.exists(outpath):
        print('Using cached', outpath, flush=True)
        return

    opts = eval(open(args.optpath, 'r').read())
    omf = subset(args, gf, opts)
    outf = grid(args, gf, opts, omf)
    outf.save(outpath, verbose=1, complevel=1)
Exemple #11
0
def read_sf_objs(base_df_dir):
    """
    Reads in all objects present in the ./scale_factors directory

    Parameters:
        base_df_dir (str) : base directory where all scale factors can be found

    Returns:
        list of sf objects

    NOTE:
    - tracerinfo and diaginfo files must be present in the given directory
    """
    # obtain the scale factor file names (NOTE: file order doesn't matter)
    file_names = glob(base_df_dir + '/data/scale_factors/sf*')

    return [pnc.pncopen(fn, format='bpch') for fn in file_names]
Exemple #12
0
def getsites(path):
    """
    path : path to ioapi file
    returns i, j locations
    """
    keepvars = [
        'PM25_TOT', 'PM25_SO4', 'PM25_NO3', 'PM25_OC', 'PM25_OM', 'PM25_CL',
        'PMC_CL', 'PM25_EC', 'PM25_SOIL', 'PMC_TOT'
    ]
    print(path, flush=True)
    inf = pnc.pncopen(path, format='ioapi')
    varf = inf.subsetVariables(keepvars)
    sitef = inf.slice(ROW=jc, COL=ic, newdims=('site', ))
    ntimes = len(sitef.dimensions['TSTEP'])
    dims = sitef.variables[keepvars[0]].dimensions
    mymask = mask[None, None, :].repeat(ntimes, 0)

    outf = sitef.mask(mymask, dims=dims)
    return outf
Exemple #13
0
def reorderVarDims(var, dims, key=None):
    """
    Arguments
    ---------
    var: PseudoNetCDFVariable
    dims : iterable
        iterable of dimension names

    Returns
    -------
    outvar : PseudoNetCDFVariable
        var if dims matches or a new var with matching dims

    Notes
    -----
    """
    vardims = list(var.dimensions)
    newdims = ([dk for dk in dims if dk in vardims] +
               [dk for dk in vardims if dk not in dims])
    if newdims == vardims:
        return var

    destax = [di for di, dk in enumerate(newdims)]
    sourceax = [vardims.index(dk) for dk in newdims]
    outdata = np.moveaxis(var[:], sourceax, destax)
    props = var.getncatts()
    if key is None:
        for k in ['Long_name', 'long_name', 'standard_name']:
            if k in props:
                key = getattr(var, k)
                break
        else:
            key = 'unknown'

    outvar = pnc.PseudoNetCDFVariable(None,
                                      key,
                                      outdata.dtype.char,
                                      newdims,
                                      values=outdata)
    outvar.setncatts(var.getncatts())
    return outvar
Exemple #14
0
def combine(inpath, outpath, exprpath, clobber=False):
    """
    Arguments
    ---------
    inpath : path to netcdf input file
    outpath : path to output file
    exprpath : path to text file with expressions

    Returns
    -------
    None
    """
    if os.path.exists(outpath) and not clobber:
        print('Using cached:', outpath)
        return
    spcexpr = open(exprpath, 'r').read()
    infile = pnc.pncopen(inpath, format='ioapi')
    if len(infile.dimensions['TSTEP']) > 1:
        infile = infile.sliceDimensions(TSTEP=slice(None, -1))
    spcfile = infile.copy().eval(spcexpr, inplace=False)
    spcfile.save(outpath, format='NETCDF4_CLASSIC')
Exemple #15
0
def collapse(inpath, outpath, clobber=False):
    """
    Arguments
    ---------
    inpath : path to netcdf input file
    outpath : path to output file

    Returns
    -------
    None
    """
    if os.path.exists(outpath) and not clobber:
        print('Using cached:', outpath)
        return
    infile = pnc.pncopen(inpath, format='ioapi')
    outfile = infile.interpSigma(
        vglvls=outvglvls,
        vgtop=infile.VGTOP,
        interptype='conserve'
    )
    outfile.save(outpath, format='NETCDF4_CLASSIC')
Exemple #16
0
def read_sf_objs(base_df_dir, sf_prefix):
    """
    Reads in all files in directory with provided scale factor prefix.

    E.g. ./scale_factors/sf_*

    where base_df_dir == 'scale_factors' and sf_prefix == 'sf_'

    Parameters:
        base_df_dir (str) : base directory where all scale factors can be found
        sf_prefix   (str) : prefix for each scale factor file

    Returns:
        list of sf objects

    NOTE:
    - tracerinfo and diaginfo files must be present in the given directory
    - all scale factor files are assumed to have the same prefix form
    """
    # obtain the scale factor file names (NOTE: file order doesn't matter)
    file_names = glob(base_df_dir + '/' + sf_prefix + '*')

    return [pnc.pncopen(fn, format='bpch') for fn in file_names]
Exemple #17
0
    def __init__(
        self, gridpath, nominaldate='1970-01-01', format='griddesc', **kwds
    ):
        """
        Arguments
        ---------
        gridpath : str
            path to a GRIDDESC file
        nominaldate : str
            Date for spatial and regional files (default: '1970-01-01')
        format : str
            griddesc, by default, but can be any ioapi_base class
        kwds : mappable
            Keywords for opening GRIDDESC. For example, GDNAM if there are
            multiple domains.

        Returns
        -------
        """
        nominaldate = pd.to_datetime(nominaldate)
        gf = pnc.pncopen(gridpath, format=format, **kwds)
        gf.SDATE = int(nominaldate.strftime('%Y%j'))
        gf.STIME = int(nominaldate.strftime('%H%M%S'))
        gf.TSTEP = 10000
        self.spatialfile = gf.subset([])
        uv = self.spatialfile.createVariable(
            'UNIFORM', 'f', ('TSTEP', 'LAY', 'ROW', 'COL'),
            long_name='UNIFORM', var_desc='UNIFORM', units='none'
        )
        uv[:] = 1 / uv.size
        self.regionfile = gf.subset([])
        dw = self.regionfile.createVariable(
            'DOMAINWIDE', 'f', ('TSTEP', 'LAY', 'ROW', 'COL'),
            long_name='DOMAINWIDE', var_desc='DOMAINWIDE', units='fraction'
        )
        dw[:] = 1.
        self.regions = ['DOMAINWIDE']
Exemple #18
0
import PseudoNetCDF as pnc
from datetime import datetime
import sys
import os
import gc

exists = os.path.exists

# dummy assignent
f = pandoras
panpath = sys.argv[1]
concpat = sys.argv[2]
metpat = sys.argv[3]
outpat = sys.argv[4]

allf = pnc.pncopen(panpath, format='pandoraslb3')
allptimes = allf.getTimes()
moddates = sorted([
    datetime.strptime(p, metpat)
    for p in list(set([t.strftime(metpat) for t in allptimes]))
])
for moddate in moddates:
    metpath = moddate.strftime(metpat)
    concpath = moddate.strftime(concpat)
    outpath = moddate.strftime(outpat)
    if not exists(metpath):
        print('Skipping missing met date', metpath, flush=True)
        continue
    elif not exists(concpath):
        print('Skipping missing model date', concpath, flush=True)
        continue
Exemple #19
0
os.system(
    f"wget --continue -q ftp://newftp.epa.gov/aqmg/global/gadm/gadm36_12US1.IOAPI.nc"
)

# # Opening Files For Reading And Plotting
#

# In[ ]:

smokepaths = {
    sector: smoketmpl(sector=sector, month=month)
    for sector in include_sectors + natural_sectors
}
smokefiles = {
    sector: pnc.pncopen(path, format='ioapi', mode='r')
    for sector, path in smokepaths.items()
}

reffile = smokefiles[include_sectors[0]]

# # Store Grid Parameters for Later Use
#
# * Regridding requires knowing about the grid structure
# * We are pulling all the metadata, so that we can use what we need.
#

# In[ ]:

gridproperties = reffile.getncatts()
exec('nominalarea = XCELL * YCELL', None, gridproperties)
Exemple #20
0
    def allocate(self, infile, alloc_keys, outpath=None, **save_kwds):
        """
        Arguments
        ---------
        infile : str or PseudoNetCDF File
            path to netcdf file (or file) to use as input (format keyword used
            as a modifier)
        alloc_keys : mappable  or str
            alloc_keys key/value pairs map region and spatial allocation
            variables (e.g., DOMAINWIDE and POP) to variables in infile to
            allocate spatially. Each key should be a tuple of region and
            spatial variable (e.g., ('DOMAINWIDE', 'POPULATION')). The region
            key must exist as a variable in self.regionfile and the spatial
            variable must exist in self.spatialfile. Each value should be a
            list of variables in infile to pair with the region/spatial pair.
            One allocation variable can be assigned None instead of a list,
            which results in all unassigned variables being used. If alloc_keys
            is a str, this is equivalent to `alloc_keys={alloc_keys: None}`
        outpath : str or None
            path for output to be saved. If None, outf will be returned and not
            saved

        Returns
        -------
        outf : PseudoNetCDFFile
            file with spatial variation

        Notes
        -----

        """
        if isinstance(infile, str):
            infile = pnc.pncopen(infile, format=format)

        if isinstance(alloc_keys, str):
            alloc_keys = {alloc_keys: None}

        all_keys = []
        for k, v in infile.variables.items():
            if 'LAY' in v.dimensions:
                all_keys.append(k)

        assigned_keys = []

        isnone = []
        for (region, srgkey), varkeys in alloc_keys.items():
            if varkeys is None:
                isnone.append((region, srgkey))
            else:
                assigned_keys.extend(varkeys)

        unassigned_keys = list(set(all_keys).difference(assigned_keys))
        if len(isnone) > 1:
            raise ValueError(f'Can only have 1 None sector; got {isnone}')
        if len(isnone) == 1:
            alloc_keys[isnone[0]] = unassigned_keys

        outf = self.spatialfile.subset([])
        for (regionkey, allockey), varkeys in alloc_keys.items():
            regionvar = self.regionfile.variables[regionkey]
            allocvar = self.spatialfile.variables[allockey]
            factor = regionvar[:] * allocvar[:]
            factor /= factor.sum()
            for varkey in varkeys:
                invar = infile.variables[varkey]
                outvar = outf.createVariable(
                    varkey, 'f', ('TSTEP', 'LAY', 'ROW', 'COL'),
                    long_name=varkeys, vardesc=varkey,
                    units=getattr(invar, 'units', 'unknown')
                )
                outvar[:] = invar[:] * factor

        if outpath is None:
            return outf
        else:
            return outf.save(outpath, **save_kwds)
import sys
import os
import PseudoNetCDF as pnc

# inpath = 'GRIDCRO2D.12US2.35L.160101'
# outpath = 'GRIDCRO2D.12US2.35L.160101.CF.nc'
try:
    inpath, outpath = sys.argv[1:]
except Exception as e:
    print('Usage: python {} <INPATH> <OUTPATH>'.format(sys.argv[0]))

if not os.path.exists(inpath):
    raise IOError(f'{inpath} does not exist.')

if os.path.exists(outpath):
    raise IOError(f'{outpath} exists. Will not overwrite.')

infile = pnc.pncopen(inpath, format='ioapi').copy()
pnc.conventions.ioapi.add_cf_from_ioapi(infile)
infile.save(outpath, verbose=0)
Exemple #22
0
def grid(args, gf, opts, omf):
    """
    Arguments
    ---------
    args: namespace
        must have inpaths, verbose, grndfilterexpr, datafilterexpr
        satpath and any requirements of openpaths
    gf : pnc.PseudoNetCDFFile
        griddesc file that implements IOAPI
    opts : mappable
        Product specific options
    omf : pnc.PseudoNetCDFFile
        subset of data with masks applied

    Returns
    -------
    outf : PseudoNetCDFFile
        dimensions nTimes, nXtrack, and nLevels with datakeys dn geokeys
    """
    outpath = args.outpath
    datakeys = opts['datakeys']
    outkeys = opts.get('outkeys', datakeys)
    latkey = opts.get('Latitude', 'Latitude')
    lonkey = opts.get('Longitude', 'Longitude')
    timekey = opts.get('Time', 'Time')
    tdim = opts.get('time_dim', 'nTimes')
    xdim = opts.get('xtrack_dim', 'nXtrack')
    lcenterdim = opts.get('level_center_dim', 'nLevels')
    ledgedim = opts.get('level_edge_dim', 'nLevelEdges')
    if args.verbose > 1:
        print(f'Calculating time', flush=True)
    for tkey in [timekey, 'Time', 'time', 'TIME']:
        if tkey in omf.variables:
            tf = omf.subsetVariables([tkey]).renameVariable(tkey, 'time')
            tf.variables['time'].units = (
                "seconds since 1993-01-01 00:00:00+0000")
            break
    else:
        tf = pnc.PseudoNetCDFFile()
        tf.createDimension('time', 1)
        t = tf.createVariable('time', 'd', ('time', ))
        t.units = "seconds since 1993-01-01 00:00:00+0000"

    date = tf.getTimes()[0]
    gf.SDATE = int(date.strftime('%Y%j'))
    gf.STIME = 0
    gf.TSTEP = 240000
    LAT = omf.variables[latkey][:]
    LON = omf.variables[lonkey][:]
    i, j = gf.ll2ij(LON, LAT, clean='mask')

    mask2d = omf.variables['BADDATA'][:] == 1
    if mask2d.all():
        print('No data; skipping', outpath, flush=True)
        return
    else:
        print('Making', outpath, flush=True)

    if args.verbose > 0:
        utchour = np.array([t.hour for t in tf.getTimes()])
        localhour = np.ma.masked_where(
            mask2d, utchour[:, None] + omf.variables[lonkey][:] / 15)
        ptiles = [0, 10, 25, 75, 90, 100]
        localhourpct = np.percentile(localhour.compressed(), ptiles)
        localhourpctstr = ' '.join(['{:5.2f}'.format(h) for h in localhourpct])
        ptilestr = ' '.join(['{:5d}'.format(p) for p in ptiles])
        print('Percentiles:', ptilestr)
        print('Local Time :', localhourpctstr)

    outf = gf.copy().subsetVariables(['DUMMY'])

    if lcenterdim in omf.dimensions:
        nk = len(omf.dimensions[lcenterdim])
    else:
        nk = 1

    outf.createDimension('LAY', nk)
    twodkeys = []
    renamevars = opts.get('renamevars', {})
    for ki, varkey in enumerate(outkeys):
        outvarkey = renamevars.get(varkey, varkey)
        if args.verbose > 1:
            print(f'Masking and gridding {varkey} as {outvarkey}', flush=True)
        varv = omf.variables[varkey]
        if tdim not in varv.dimensions:
            continue

        varo = np.ma.masked_invalid(
            reorderVarDims(varv, (tdim, xdim), key=varkey)[:])

        varmask = varo.mask
        mask = broadcastVar(mask2d, varo)
        if mask2d.shape == varmask.shape[:mask2d.ndim]:
            mask = (mask2d.T | varmask.T).T
        elif mask2d.shape == varmask.shape[-mask2d.ndim:]:
            mask = (mask2d | varmask)
        else:
            raise ValueError(
                f'Masks not aligned {mask2d.shape} and {varmask.shape}')
        ol = np.ones(mask.shape)
        myi = np.ma.masked_where(mask, (i.T * ol.T).T).compressed() + 0.5
        myj = np.ma.masked_where(mask, (j.T * ol.T).T).compressed() + 0.5
        if varo.ndim <= 2:
            myk = myj * 0 + .5
            twodkeys.append(outvarkey)
        else:
            myk = np.ma.masked_where(mask,
                                     np.indices(
                                         mask.shape)[-1]).compressed() + 0.5

        if varo.ndim <= 3:
            loc = [myk, myj, myi]
            outdims = ('TSTEP', 'LAY', 'ROW', 'COL')
            bins = (np.arange(nk + 1), np.arange(gf.NROWS + 1),
                    np.arange(gf.NCOLS + 1))
        else:
            myk1, myk2 = np.indices(mask.shape)[-2:]
            myk1 = np.ma.masked_where(mask, myk1).compressed() + 0.5
            myk2 = np.ma.masked_where(mask, myk2).compressed() + 0.5
            loc = [myk1, myk2, myj, myi]
            outdims = ('TSTEP', 'LAY', 'LAY', 'ROW', 'COL')
            bins = (np.arange(nk + 1), np.arange(nk + 1),
                    np.arange(gf.NROWS + 1), np.arange(gf.NCOLS + 1))

        myvcd = np.ma.masked_where(mask, varo[:]).compressed()
        r = binned_statistic_dd(loc, myvcd, 'mean', bins=bins)
        c = binned_statistic_dd(loc, myvcd, 'count', bins=bins)
        var = outf.createVariable(outvarkey,
                                  'f',
                                  outdims,
                                  missing_value=-9.000E36)
        var.var_desc = varkey.ljust(80)
        var.long_name = outvarkey.ljust(16)
        var.units = getunit(varv)
        var[:] = np.ma.masked_invalid(r[0])
        nvar = outf.createVariable('N' + outvarkey,
                                   'f',
                                   outdims,
                                   missing_value=-9.000E36)
        nvar.var_desc = ('Count ' + varkey).ljust(80)
        nvar.long_name = ('N' + outvarkey).ljust(16)
        nvar.units = 'none'
        nvar[:] = c[0]

    delattr(outf, 'VAR-LIST')

    # {dk: slice(None, None, -1) for dk in invertdims}
    if args.verbose > 1:
        print('Calculating pressure for sigma approximation', flush=True)

    if opts['pressurekey'] is None:
        dims = [lcenterdim]
        p = np.array([50000], dtype='f')
        pedges1d = np.array([101325, 0], dtype='f')
    else:
        pkey = opts['pressurekey']
        pvf = omf.subset([pkey])
        pv = pvf.variables[pkey]
        pu = getunit(pv).lower()
        dims = list(pv.dimensions)
        afuncs = {}
        for dk in dims:
            if dk in (lcenterdim, ledgedim):
                ldim = dk
            else:
                afuncs[dk] = 'mean'
        pvmf = pvf.apply(**afuncs)
        pvdf = pvmf.apply(**{ldim: np.diff})
        # If the delta P is negative, invert a bunch of stuff
        if pvdf.variables[pkey].mean() > 0:
            pvmf = pvmf.slice(ldim=slice(None, None, -1))
            pvdf = pvmf.apply(**{ldim: np.diff})
            # 2-D variables have data in layer 0
            # after inverting, it is in layerN
            # it must be inverted again
            outf = outf.slice(LAY=slice(None, None, -1))
            for varkey in twodkeys:
                tmpv = outf.variables[varkey]
                tmpv[:] = tmpv[:, ::-1]

        if pu in ("hpa", "mb"):
            pfactor = 100.
        elif pu == ("pa", "pascal"):
            pfactor = 1.
        else:
            warn('Unknown unit {}; scale factor = 1'.format(pu))
            pfactor = 1.

        # all other dimensions have been averaged
        # so, they have a unity dimension (ROW=1, COL=1)
        p = pvmf.variables[pkey][:].squeeze()
        dp = pvdf.variables[pkey][:].squeeze()
        if ledgedim in dims:
            pedges1d = p
        else:
            hdp = dp / 2
            pedges1d = np.append(np.append(p[:-1] - hdp, p[-1] - hdp[-1]),
                                 p[-1] + hdp[-1])

        # Ensure pedges is never negative
        # heuristic top identification could cause that problem.
        pedges1d = np.maximum(0, pedges1d) * pfactor

    ptop = outf.VGTOP = pedges1d[-1]
    psrf = pedges1d[0]

    if len(dims) == 1:
        # OMI ScatteringWtPressure is on a pressure grid that is not changing
        # in space or time, so there is only one dimension
        outf.VGTYP = 4
        outf.VGLVLS = pedges1d.astype('f')
    else:
        # Other products will be converted to an approximate sigma coordinate
        # This is not strictly true. The OMPROFOZ readme[1] describes the
        # vertical coordinate as follows.
        #
        #    The 25-level vertical pressure grid is set initially at
        #    Pi = 2-i/2 atm for i = 0, 23 and P24 = 0. This pressure grid is
        #    then modified: The daily NCEP thermal tropopause pressure is
        #    used to replace the level closest to it, and layers between
        #    surface and tropopause are distributed equally in logarithmic
        #    pressure. I is on a hybrid sigma/eta coordinate sigma
        #    approximation is being used.
        #
        # [1] https://avdc.gsfc.nasa.gov/pub/data/satellite/Aura/OMI/V03/L2/
        #     OMPROFOZ/OMPROFOZ_readme-v3.pdf
        sigma = (pedges1d[:] - ptop) / (psrf - ptop)
        outf.VGTYP = 7
        outf.VGLVLS = sigma[:].astype('f')

    del outf.variables['DUMMY']
    for k in list(outf.variables):
        klen = len(k)
        if klen > 15:
            print(k, 'too long', len(k))

    if hasattr(outf, 'VAR-LIST'):
        delattr(outf, 'VAR-LIST')

    outf.updatemeta()
    outf.FILEDESC = "cmaqsatproc output"
    outf.HISTORY = sys.argv[0] + ': ' + str(args)

    gc.collect()
    return outf
Exemple #23
0
    def get_dayofweekfile(self, propath=None, read_kwds=None):
        """
        Arguments
        ---------
        propath : str
            path to tpro file ATPRO_WEEKLY file
        read_kwds : dict or None
            If None, default read_kwds are dict(comment='#', index_col=0,
            names=['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun', 'comment'])

        Returns:
            df : PseudoNetCDFFile
                IOAPI-like file with day of week allocations for sectors (as
                variables) with shape  TSTEP=7, LAY=1, ROW=NROWS, COL=NCOLS
        """

        if self.dayofweekfile is not None:
            return self.dayofweekfile
        elif os.path.exists(self.dayofweekpath):
            self.dayofweekfile = pnc.pncopen(
                self.dayofweekpath, format='ioapi'
            )
            return self.get_dayofweekfile()

        if propath is None:
            raise KeyError(
                f'propath required because {self.dayofweekpath} not found'
            )

        print(
            f'{self.dayofweekpath} not available; calculating from {propath}'
        )
        if read_kwds is None:
            read_kwds = dict(
                comment='#', index_col=0,
                names='Mon Tue Wed Thu Fri Sat Sun comment'.split()
            )
        wkdf = pd.read_csv(propath, **read_kwds)

        wkdf.index.name = 'profile_id'
        tzf = self.get_timezonefile()

        day_f = tzf.subset([])
        day_f.createDimension('TSTEP', 25).setunlimited(True)
        day_f.createDimension('LAY', 7)
        day_f.VGLVLS = np.arange(8)
        day_f.VGTYP = 6
        day_f.SDATE = 2020001
        day_f.STIME = 0
        day_f.TSTEP = 10000

        for wkidx, wkrow in wkdf.iterrows():
            cmt = wkrow['comment']
            label = getlabel(cmt)
            print(label, cmt)
            wkvals = weekdayfactor(wkdf, wkidx, tzf)
            wkvar = day_f.createVariable(
                label, 'f', ('TSTEP', 'LAY', 'ROW', 'COL'),
                long_name=label, var_desc=label, units='s/s'
            )
            wkvar[:] = wkvals

        day_f.updatemeta()
        day_f.updatetflag(overwrite=True)
        day_f.FILEDESC = (
            """
## NASA-like metadata
1, 2310
Henderson, Barron
US EPA/Office of Air Quality Planning and Standards
EPA sector-based hourly profiles
Not Applicable
1, 1
2021, 01, 13, 2021, 01, 13
0
...
PI_CONTACT_INFO: [email protected]
PLATFORM: CMAQ Emission processing input
DATA_INFO:  All data in daily average per second rates
UNCERTAINTY:  large, preliminary data based on US averages.
DM_CONTACT_INFO: Henderson, Barron, US EPA, [email protected]
PROJECT_INFO: For easy processing processing of emissions.
STIPULATIONS_ON_USE: Use of these data requires PI notification
OTHER_COMMENTS: The LAY dimension is day of the week (Mon, Tue, ..., Sun)."""
            + "Time is UTC, but the profiles are based on LST days. So, "
            + "UTC_Mon will include hours from Sun and Tue as appropriate "
            + """given the hour offset.
REVISION: R0
R0: Preliminary data
"""
        )
        day_f.save(
            self.dayofweekpath, format='NETCDF4_CLASSIC', complevel=1,
            verbose=0
        ).close()
        return self.get_dayofweekfile()
Exemple #24
0
def openhe5(inpaths, opts, verbose):
    tdim = opts.get('time_dim', 'nTimes')
    xdim = opts.get('xtrack_dim', 'nXtrack')
    lcenterdim = opts.get('level_center_dim', 'nLevels')
    omfs = []
    for inpath in inpaths:
        if verbose > 1:
            print('Opening', inpath, flush=True)
        tmpf = pnc.pncopen(inpath, format='netcdf')
        omfi = pnc.PseudoNetCDFFile.from_ncvs(
            **{
                varkey: tmpf[opts['datagrp']].variables[varkey]
                for varkey in opts['datakeys']
            })
        _applyscale(omfi)

        omgfi = pnc.PseudoNetCDFFile.from_ncvs(
            **{
                varkey: tmpf[opts['geogrp']].variables[varkey]
                for varkey in opts['geokeys']
            })
        _applyscale(omgfi)

        datadims = opts.get('datadims', None)
        geodims = opts.get('geodims', None)
        if datadims is None:
            ddims = list(omfi.dimensions)
            datadims = dict(zip(ddims, [tdim, xdim, lcenterdim]))
            print('Dimension mapping heuristically', flush=True)
            print({dk: len(dv) for dk, dv in omfi.dimensions.items()})
            print('Selected dimension mapping:', datadims, flush=True)

        if geodims is None:
            gdims = list(omgfi.dimensions)
            geodims = dict(zip(gdims, [tdim, xdim, lcenterdim]))
            print('Dimension mapping heuristically', flush=True)
            print({dk: len(dv) for dk, dv in omgfi.dimensions.items()})
            print('Selected dimension mapping:', geodims, flush=True)

        for inkey, outkey in datadims.items():
            if inkey not in omfi.dimensions:
                print('** Error renaming data dimension:\n' +
                      f'Key {inkey} ({outkey}) not found:\n{omfi.dimensions}' +
                      '\n\n** Try increase or decreasing phony numbered' +
                      ' dimensions by 1 in the configuration.' +
                      '\n** Different netcdf versions give them' +
                      ' different names for repeated length dimensions.')
                sys.exit()

        omfi.renameDimensions(**datadims, inplace=True)

        for inkey, outkey in geodims.items():
            if inkey not in omgfi.dimensions:
                print(
                    'Error renaming geo dimension:' +
                    f'Key {inkey} ({outkey}) not found:\n{omgfi.dimensions}' +
                    '\n\n** Try increase or decreasing phony numbered' +
                    ' dimensions by 1 in the configuration.' +
                    '\n** Different netcdf versions give them' +
                    ' different names for repeated length dimensions.')
                sys.exit()

        omgfi.renameDimensions(**geodims, inplace=True)

        for geokey in opts['geokeys']:
            omfi.copyVariable(omgfi.variables[geokey], key=geokey)

        flipdimkeys = opts.get('flipdims', [])
        if len(flipdimkeys) > 0:
            flipslices = {
                k: slice(None, None, -1)
                for k in flipdimkeys if k in omfi.dimensions
            }
            omfi = omfi.sliceDimensions(**flipslices)
        omfs.append(omfi)

    return omfs
Exemple #25
0
    def get_monthlyfile(self, propath=None, read_kwds=None):
        """
        Arguments
        ---------
        propath : str
            path to tpro file ATPRO_MONTHLY file
        read_kwds : dict or None
            If None, default read_kwds are dict(comment='#', index_col=0,
            names=['Jan', ..., 'Dec', 'comment'])

        Returns:
            df : PseudoNetCDFFile
                IOAPI-like file with month of year allocations for sectors (as
                variables) with shape TSTEP=12, LAY=1, ROW=NROWS, COL=NCOLS
        """

        if self.monthlyfile is not None:
            return self.monthlyfile
        elif os.path.exists(self.monthlypath):
            self.monthlyfile = pnc.pncopen(self.monthlypath, format='ioapi')
            return self.get_monthlyfile()
        if propath is None:
            raise KeyError(
                f'propath required because {self.monthlypath} not found'
            )

        print(
            f'{self.monthlypath} not available; calculating from {propath}'
        )
        names = _monnames + ['comment']

        if read_kwds is None:
            read_kwds = dict(comment='#', index_col=0, names=names)

        mondf = pd.read_csv(propath, **read_kwds)
        tzf = self.get_timezonefile()

        mon_f = tzf.subset([])
        mon_f.createDimension('TSTEP', 12).setunlimited(True)
        mon_f.SDATE = 2020001
        mon_f.STIME = 0
        mon_f.TSTEP = 24 * 30.5 * 10000

        for monidx, monrow in mondf.iterrows():
            cmt = monrow['comment']
            label = getlabel(cmt)
            print(label, cmt)
            monvals = monfactor(mondf, monidx, tzf)
            monvar = mon_f.createVariable(
                label, 'f', ('TSTEP', 'LAY', 'ROW', 'COL'),
                long_name=label, var_desc=label, units='s/s'
            )
            monvar[:] = monvals[:, None]

        mon_f.updatemeta()
        mon_f.updatetflag(overwrite=True)
        mon_f.FILEDESC = """
## NASA-like metadata
1, 2310
Henderson, Barron
US EPA/Office of Air Quality Planning and Standards
EPA sector-based hourly profiles
Not Applicable
1, 1
2021, 01, 13, 2021, 01, 13
0
...
PI_CONTACT_INFO: [email protected]
PLATFORM: CMAQ Emission processing input
DATA_INFO:  All data in monthly average per second rates
UNCERTAINTY:  large, preliminary data based on US averages.
DM_CONTACT_INFO: Henderson, Barron, US EPA, [email protected]
PROJECT_INFO: For easy processing processing of emissions.
STIPULATIONS_ON_USE: Use of these data requires PI notification
OTHER_COMMENTS: None.
REVISION: R0
R0: Preliminary data
"""
        mon_f.save(
            self.monthlypath, format='NETCDF4_CLASSIC', complevel=1, verbose=0
        ).close()
        return self.get_monthlyfile()
Exemple #26
0
    def allocate(
        self, infile, outdate, alloc_keys, outpath=None,
        monthly=True, dayofweek=True, diurnal=True,
        time=None, format=None,
        overwrite=False, verbose=0
    ):
        """
        Arguments
        ---------
        infile : str or PseudoNetCDF File
            path to netcdf file (or file) to use as input (format keyword used
            as a modifier)
        outdate : datetime
            date to destination
        outpath : str or None
            path for output to be saved. If None, outf will be returned and not
            saved
        alloc_keys : mappable  or str
            alloc_keys key/value pairs map allocation variables (e.g., ENERGY)
            to variables in infile to allocate temporally. Each key should
            be in monthlyfile/dayofweekfile/diurnalfile variables. And each
            value is a list of variables in infile. One allocation variable can
            be assigned None instead of a list, which results in all unassigned
            variables being used. If alloc_keys is a str, this is equivalent to
            `alloc_keys={alloc_keys: None}`
        monthly : bool
            apply monthly scaling. If file already has months, use month=False
            and time=m to apply other scaling to time m.
        dayofweek : bool
            apply day of week  scaling. If file already has day of week, use
            dayofweek=False and time=d to apply other scaling to time d.
        diurnal : bool
            apply hour of day  scaling. If file already has hour of day, use
            diurnal=False and time=h to apply other scaling to time h.
        time : int or None
            if None, checks to ensure that file has only 1 time and uses first
            (i.e., 0)
        format : str
            format of file or meta data (e.g., netcdf or ioapi; see
            PseudoNetCDF pncopen)

        Returns
        -------
        outf : PseudoNetCDFFile
            file with temporal variation

        Notes
        -----

        1. month, dayofweek, and diurnal can be combined to exlude one or many
           scalings

        """
        remove = False

        if outpath is not None and os.path.exists(outpath):
            if not overwrite:
                raise IOError(f'{outpath} exists')
            else:
                remove = True

        refdate = outdate

        if verbose > 0:
            print('Opening input', flush=True)

        if isinstance(infile, str):
            ef = pnc.pncopen(infile, format=format)
        else:
            ef = infile

        if isinstance(alloc_keys, str):
            alloc_keys = {alloc_keys: None}

        all_keys = []
        for k, v in ef.variables.items():
            if 'LAY' in v.dimensions:
                all_keys.append(k)

        assigned_keys = []

        isnone = []
        for sector, varkeys in alloc_keys.items():
            if varkeys is None:
                isnone.append(sector)
            else:
                assigned_keys.extend(varkeys)

        unassigned_keys = list(set(all_keys).difference(assigned_keys))
        if len(isnone) > 1:
            raise ValueError(f'Can only have 1 None sector; got {isnone}')
        if len(isnone) == 1:
            alloc_keys[isnone[0]] = unassigned_keys

        if time is None:
            if len(ef.dimensions['TSTEP']) > 1:
                print('Time dimension is not 1, so you must choose a time')
            else:
                time = 0

        if format == 'ioapi':
            if verbose > 0:
                print('Appending TFLAG to exclude', flush=True)

        if verbose > 0:
            print('Creating output template', flush=True)

        outf = ef.subset([])
        if 'TFLAG' in outf.variables:
            del outf.variables['TFLAG']

        nsteps = 1
        if monthly:
            nsteps = nsteps * 1
            tstep = 30*240000
        if dayofweek:
            nsteps = nsteps * 1
            tstep = 240000
        if diurnal:
            nsteps = nsteps * 25
            tstep = 10000

        outf.createDimension('TSTEP', nsteps).setunlimited(True)

        if verbose > 0:
            print('Calculating composite factor', flush=True)

        for sectorkey, varkeys in alloc_keys.items():
            factor = self.get_factor(
                sectorkey, refdate,
                diurnal=diurnal, dayofweek=dayofweek, monthly=monthly
            )
            for varkey in varkeys:
                invar = ef.variables[varkey]
                if verbose > 0:
                    print(f'Scaling {varkey}...', flush=True)
                outvar = outf.copyVariable(invar, key=varkey, withdata=False)
                outvar.setncatts(
                    {pk: getattr(invar, pk) for pk in invar.ncattrs()}
                )
                outvar[:] = invar[time] * factor

        outf.SDATE = int(refdate.strftime('%Y%j'))
        outf.STIME = int(refdate.strftime('%H%M%S'))
        outf.TSTEP = tstep
        if format == 'ioapi':
            outf.updatemeta()
            outf.updatetflag(overwrite=True)

        history = getattr(outf, 'HISTORY')
        history += f'apply_temporal({locals})'
        setattr(outf, 'HISTORY', history)
        if outpath is not None and remove:
            os.remove(outpath)

        if outpath is None:
            return outf
        else:
            outf.save(outpath, verbose=0).close()
            return pnc.pncopen(outpath, format='ioapi')
Exemple #27
0
# from matplotlib import use; use('TkAgg')
import matplotlib.pyplot as plt
import numpy as np
import scipy.stats
import PseudoNetCDF as pnc
import pandas as pd
import sys

figpath = sys.argv[1]
inpaths = sys.argv[2:]

inf = pnc.pncmfopen(inpaths, format='netcdf', stackdim='time')
ts = inf.getTimes()
obs = inf.variables['OBS'][:]
obsu = inf.variables['OBSU'][:]
mod = inf.variables['MOD'][:]
lb = obs - 1.96 * np.abs(obsu)
tsm = np.ma.masked_where(lb < 0, ts).compressed()
obsm = np.ma.masked_where(lb < 0, obs).compressed()
obsum = np.ma.masked_where(lb < 0, obsu).compressed()
modm = np.ma.masked_where(lb < 0, mod).compressed()
df = pd.DataFrame(dict(obs=obsm, obsu=obsum, mod=modm),
                  index=tsm).resample('H').mean()
lr = scipy.stats.mstats.linregress(
    np.ma.masked_invalid(df['mod'].values),
    np.ma.masked_invalid(df['obs'].values),
)
ax = df.plot(y=['obs', 'mod'], linestyle='none', marker='o')
ax.text(0, 1, 'r={:.2f}'.format(lr.rvalue), transform=ax.transAxes)
ax.figure.savefig(figpath.replace('.png', '.hourly.png'))
plt.close()
Exemple #28
0
import matplotlib.pyplot as plt
from matplotlib.ticker import StrMethodFormatter

import PseudoNetCDF as pnc

from perim import perimslices

np = plt.np

inpaths = sorted(glob('../combine/*201?????.BCON.combine.nc'))
tslice = slice(None, None, 6)
varks = ['O3PPB', 'ASO4IJ', 'ANO3IJ', 'NOx', 'ANAIJ', 'PMIJ']
infiles = [
    pnc.pncopen(
        inpath,
        format='ioapi').subsetVariables(varks).sliceDimensions(TSTEP=tslice)
    for inpath in inpaths
]
infile = infiles[0].stack(infiles[1:], 'TSTEP')
del infiles
infile.TSTEP = tslice.step * infile.TSTEP
time = infile.getTimes()
warn('Debug using {}h'.format(tslice.step))
lays = np.arange(0, infile.NLAYS + 1)


def sigmabyt(plotfile, vark, title, norm, ticks, formatter, outpath):
    plt.close()
    ax = plotfile.plot(vark,
                       plottype='TSTEP-LAY',
Exemple #29
0
test = """
gcpath = 'CONC/GEOSChem.SpeciesConc.20160701_0000z.nc4'
#gcpath = '/work/ROMO/global/GCv12.0.1/GC/rundirs/geosfp_2x25_standard/Output/GEOSChem.SpeciesConc.20160101_0000z.nc4'
gcexprpath = 'definitions/gc/gc12_to_cb6r3.expr'
aeexprpath = 'definitions/gc/gc12_to_ae6_nvPOA.expr'

args = parser.parse_args([
    '--spcprefix', 'SpeciesConc_', gcpath,
    'GEOS-Chem_Species_Database.json', 'CMAQ.json',
    gcexprpath, aeexprpath
])
"""
args = parser.parse_args()

f = pnc.pncopen(args.inpath)

fromspcs = json.load(open(args.fromjson, 'r'))
tospcs = json.load(open(args.tojson, 'r'))
exprstr = '\n'.join(
    [open(exprpath, 'r').read() for exprpath in args.exprpaths])

noadvspc = [k for k, v in fromspcs.items() if not v['Is_Advected']]
gcspc = [
    k for k, v in fromspcs.items() if not v['Is_Aero'] and v['Is_Advected']
]
aespc = [k for k, v in fromspcs.items() if v['Is_Aero'] and v['Is_Advected']]

spc = gcspc
prefix = args.spcprefix
symtbl = symtable(exprstr, '<pncexpr>', 'exec')
    del tmpf.variables['TFLAG']
    tmpf = fracf.subsetVariables([vark])
    tmpf = tmpf.slice(TSTEP=include)
    return np.ma.filled(tmpf.variables[vark], 0).sum(0, keepdims=True)


def getmask(idf, vark, namelist):
    var = idf.variables[vark]
    i2k = eval(var.description)
    k2i = {k: i for i, k in i2k.items()}
    idlist = [k2i[k] for k in namelist]
    outvar = np.in1d(var[:], idlist).reshape(var.shape)
    return outvar


gadmf = pnc.pncopen(args.inpath, format='ioapi')
if args.variable is None:
    for vark in ['ID_0', 'ID_1', 'ID_2']:
        if vark in gadmf.variables:
            args.variable = vark
            break
    else:
        print('Could not find variable ID_0, ID_1, ID_2')
        exit()

outf = gadmf.slice(TSTEP=0).subsetVariables([args.variable])
configd = json.load(open(args.definitions, mode='r', encoding='utf-8'))
for outvark, namelist in configd.items():
    outv = outf.createVariable(outvark, 'f', ('TSTEP', 'LAY', 'ROW', 'COL'))
    outv.units = '1'
    outv.long_name = outvark.ljust(16)