def _do_filter (p, cmd): try: return eval(cmd, None, p) except SyntaxError: error (_("unable to parse the filter: %s")%cmd) except NameError as e: error (e.message)
def __init__ (self, *args, **kwargs): """ filter : str or list, optional Subset RPN file records using the given criteria. For example, to convert only 24-hour forecasts you could use filter="ip2==24" """ import numpy as np filter = kwargs.pop('filter',None) if filter is None: filter = [] if isinstance(filter,str): filter = [filter] self._filters = tuple(filter) super(FilterRecords,self).__init__(*args,**kwargs) if len(self._filters) == 0: return flags = np.ones(len(self._headers),dtype='bool') records = dict([(n,self._headers[n]) for n in self._headers.dtype.names]) for cmd in self._filters: try: flags &= self._do_filter(records, cmd) except TypeError: error (_("unable to apply the filter: %s")%cmd) # To filter out unwanted records, mark them as "deleted" in the list. self._headers['dltf'] = self._headers['dltf'] | (~flags)
def __init__ (self, filename, header_cache=None, progress=False, minimal_metadata=None, rpnstd_metadata=None, rpnstd_metadata_list=None, ignore_typvar=False, ignore_etiket=False, no_quick_scan=False): """ Read raw records from FSTD files, into the buffer. Multiple files can be read simultaneously. Parameters ---------- filename : str or list The RPN standard file(s) to convert. progress : bool, optional Display a progress bar during the conversion, if the "progress" module is installed. rpnstd_metadata : bool, optional Include all RPN record attributes in the output metadata. rpnstd_metadata_list : str or list, optional Specify a minimal set of RPN record attributes to include in the output file. ignore_typvar : bool, optional Tells the converter to ignore the typvar when deciding if two records are part of the same field. Default is to split the variable on different typvars. ignore_etiket : bool, optional Tells the converter to ignore the etiket when deciding if two records are part of the same field. Default is to split the variable on different etikets. """ from rpnpy.librmn.fstd98 import fstnbr, fstinl, fstprm, fstopenall from rpnpy.librmn.const import FST_RO from fstd2nc.extra import maybeFST as isFST from collections import Counter import numpy as np from glob import glob, has_magic import os import warnings # Set up lock for threading. # The same lock is shared for all Buffer objects, to synchronize access to # librmn. self._lock = _lock # Set up a progress bar for scanning the input files. Bar = _ProgressBar if progress is True else _FakeBar # Set default for minimal_metadata if rpnstd_metadata is not None: minimal_metadata = not rpnstd_metadata if minimal_metadata is None: minimal_metadata = True # Set default for rpnstd_metadata_list if minimal_metadata is True and rpnstd_metadata_list is None: rpnstd_metadata_list = '' if isinstance(rpnstd_metadata_list,str): rpnstd_metadata_list = rpnstd_metadata_list.replace(',',' ') rpnstd_metadata_list = rpnstd_metadata_list.split() if hasattr(rpnstd_metadata_list,'__len__'): rpnstd_metadata_list = tuple(rpnstd_metadata_list) self._rpnstd_metadata_list = rpnstd_metadata_list if not ignore_typvar: # Insert typvar value just after nomvar. self._var_id = self._var_id[0:1] + ('typvar',) + self._var_id[1:] self._human_var_id = self._human_var_id[0:1] + ('%(typvar)s',) + self._human_var_id[1:] if not ignore_etiket: # Insert etiket value just after nomvar. self._var_id = self._var_id[0:1] + ('etiket',) + self._var_id[1:] self._human_var_id = self._human_var_id[0:1] + ('%(etiket)s',) + self._human_var_id[1:] if isinstance(filename,str): infiles = [filename] else: infiles = list(filename) # Apply wildcard and directory expansion to filenames. expanded_infiles = [] for infile in infiles: for f in sorted(glob(infile)) or [infile]: if os.path.isdir(f): for dirpath, dirnames, filenames in os.walk(f,followlinks=True): for filename in filenames: expanded_infiles.append((infile,os.path.join(dirpath,filename))) else: expanded_infiles.append((infile,f)) # Inspect all input files, and extract the headers from valid RPN files. matches = Counter() headers = [] self._files = [] if header_cache is None: header_cache = {} # Show a progress bar when there are multiple input files. if len(expanded_infiles) > 1: expanded_infiles = Bar(_("Inspecting input files"), suffix='%(percent)d%% (%(index)d/%(max)d)').iter(expanded_infiles) for infile, f in expanded_infiles: fkey = f if fkey.startswith('/'): fkey = '__ROOT__'+fkey if fkey not in header_cache and (not os.path.exists(f) or not isFST(f)): matches[infile] += 0 continue matches[infile] += 1 # Read the headers from the file(s) and store the info in the table. filenum = len(self._files) self._files.append(f) if fkey not in header_cache: funit = self._open(filenum) nrecs = fstnbr(funit) h = np.zeros(nrecs, dtype=self._headers_dtype) if no_quick_scan: keys = fstinl(funit) params = map(fstprm, keys) for i,prm in enumerate(params): for n,v in prm.items(): if n in h.dtype.names: h[n][i] = v else: from fstd2nc.extra import all_params params = all_params(funit,out=h) keys = params['key'] # Encode the keys without the file index info. h['key'] = keys h['key'] >>= 10 header_cache[fkey] = h h = header_cache[fkey] # The file info will be an index into a separate file list. h['file_id'] = filenum headers.append(h) # Check if the input entries actually matched anything. for infile, count in matches.items(): if count == 0: if os.path.isfile(infile): warn(_("'%s' is not an RPN standard file.")%infile) elif os.path.isdir(infile): warn(_("Directory '%s' does not contain any RPN standard files.")%infile) elif has_magic(infile): warn(_("No RPN standard files match '%s'.")%infile) elif not os.path.exists(infile): warn(_("'%s' does not exist.")%infile) else: warn(_("Problem with input file '%s'")%infile) nfiles = len(headers) if nfiles == 0: error(_("no input files found!")) info(_("Found %d RPN input file(s)"%nfiles)) self._headers = np.ma.concatenate(headers) # Find all unique meta (coordinate) records, and link a subset of files # that provide all unique metadata records. # This will make it easier to look up the meta records later. meta_mask = np.zeros(len(self._headers),dtype='bool') for meta_name in self._meta_records: meta_name = (meta_name+' ')[:4] meta_mask |= (self._headers['nomvar'] == meta_name) meta_recids = np.where(meta_mask)[0] # Use the same unique parameters as regular variables. # Plus, ig1,ig2,ig3,ig4. # Suppress FutureWarning from numpy about doing this. Probably benign... with warnings.catch_warnings(): warnings.simplefilter("ignore") meta_keys = self._headers.data[meta_mask][list(self._var_id)+['ip1','ip2','ip3','ig1','ig2','ig3','ig4']] meta_keys, ind = np.unique(meta_keys, return_index=True) meta_recids = meta_recids[ind] # Find the files that give these unique coord records. file_ids = sorted(set(self._headers['file_id'][meta_recids])) filenames = [self._files[f] for f in file_ids] if len(filenames) > 500: error(_("Holy crap, how many coordinates do you have???")) # If no coordinates found, just open the first file as a dummy file. # Less error-prone than checking if _meta_funit is defined every time # an FSTD function is called. if len(filenames) == 0: filenames = self._files[0:1] # Open these files and link them together self._meta_funit = fstopenall(filenames, FST_RO)
def _fstd2nc_cmdline(buffer_type=Buffer): from argparse import ArgumentParser from sys import stdout, argv from os.path import exists from rpnpy.librmn.fstd98 import FSTDError, fstopt parser = ArgumentParser(description=_( "Converts an RPN standard file (FSTD) to netCDF format.")) parser.add_argument('infile', nargs='+', metavar='<fstd_file>', help=_('The RPN standard file(s) to convert.')) parser.add_argument('outfile', metavar='<netcdf_file>', help=_('The name of the netCDF file to create.')) buffer_type._cmdline_args(parser) parser.add_argument( '--msglvl', choices=[ '0', 'DEBUG', '2', 'INFORM', '4', 'WARNIN', '6', 'ERRORS', '8', 'FATALE', '10', 'SYSTEM', 'CATAST' ], default='WARNIN', help= _('How much information to print to stdout during the conversion. Default is %(default)s.' )) parser.add_argument( '--nc-format', choices=[ 'NETCDF4', 'NETCDF4_CLASSIC', 'NETCDF3_CLASSIC', 'NETCDF3_64BIT_OFFSET', 'NETCDF3_64BIT_DATA' ], default='NETCDF4', help=_('Which variant of netCDF to write. Default is %(default)s.')) parser.add_argument( '--zlib', action='store_true', help= _("Turn on compression for the netCDF file. Only works for NETCDF4 and NETCDF4_CLASSIC formats." )) parser.add_argument( '--compression', type=int, default=4, help= _("Compression level for the netCDF file. Only used if --zlib is set. Default: 4." )) parser.add_argument( '-f', '--force', action='store_true', help=_("Overwrite the output file if it already exists.")) parser.add_argument( '--no-history', action='store_true', help=_( "Don't put the command-line invocation in the netCDF metadata.")) args = parser.parse_args() buffer_type._check_args(parser, args) args = vars(args) infiles = args.pop('infile') outfile = args.pop('outfile') msglvl = args.pop('msglvl') nc_format = args.pop('nc_format') zlib = args.pop('zlib') force = args.pop('force') no_history = args.pop('no_history') compression = args.pop('compression') progress = args.get('progress', False) # Apply message level criteria. try: msglvl = int(msglvl) except ValueError: msglvl = { 'DEBUG': 0, 'INFORM': 2, 'WARNIN': 4, 'ERRORS': 6, 'FATALE': 8, 'SYSTEM': 10, 'CATAST': 10 }[msglvl] fstopt('MSGLVL', msglvl) try: buf = buffer_type(infiles, **args) except FSTDError: error(_("problem opening one or more input files.")) # Check if output file already exists if exists(outfile) and not force: overwrite = False if stdout.isatty(): while True: print(_("Warning: '%s' already exists! Overwrite? (y/n):") % (outfile), end=' ') try: ans = raw_input() except NameError: ans = input() if ans.lower() in ('y', 'yes', 'o', 'oui'): overwrite = True break if ans.lower() in ('n', 'no', 'non'): overwrite = False break print(_("Sorry, invalid response.")) if overwrite is False: error(_("Refusing to overwrite existing file '%s'.") % (outfile)) # Append the command invocation to the netCDF metadata? if no_history: global_metadata = None else: from datetime import datetime timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S') command = list(argv) # Any arguments with spaces should be surrounded by quotes. for i, c in enumerate(command): if " " in c: command[i] = "'" + c + "'" command = " ".join(command) history = timestamp + ": " + command global_metadata = {"history": history} buf.to_netcdf(outfile, nc_format=nc_format, global_metadata=global_metadata, zlib=zlib, compression=compression, progress=progress)
def _fstd2nc_cmdline_trapped(*args, **kwargs): try: _fstd2nc_cmdline(*args, **kwargs) except KeyboardInterrupt: error(_("Aborted by user."))