Ejemplo n.º 1
0
 def _do_filter (p, cmd):
   try:
     return eval(cmd, None, p)
   except SyntaxError:
     error (_("unable to parse the filter: %s")%cmd)
   except NameError as e:
     error (e.message)
Ejemplo n.º 2
0
 def __init__ (self, *args, **kwargs):
   """
   filter : str or list, optional
       Subset RPN file records using the given criteria.  For example, to
       convert only 24-hour forecasts you could use filter="ip2==24"
   """
   import numpy as np
   filter = kwargs.pop('filter',None)
   if filter is None:
     filter = []
   if isinstance(filter,str):
     filter = [filter]
   self._filters = tuple(filter)
   super(FilterRecords,self).__init__(*args,**kwargs)
   if len(self._filters) == 0: return
   flags = np.ones(len(self._headers),dtype='bool')
   records = dict([(n,self._headers[n]) for n in self._headers.dtype.names])
   for cmd in self._filters:
     try:
       flags &= self._do_filter(records, cmd)
     except TypeError:
       error (_("unable to apply the filter: %s")%cmd)
   # To filter out unwanted records, mark them as "deleted" in the list.
   self._headers['dltf'] = self._headers['dltf'] | (~flags)
Ejemplo n.º 3
0
  def __init__ (self, filename, header_cache=None, progress=False, minimal_metadata=None, rpnstd_metadata=None, rpnstd_metadata_list=None, ignore_typvar=False, ignore_etiket=False, no_quick_scan=False):
    """
    Read raw records from FSTD files, into the buffer.
    Multiple files can be read simultaneously.

    Parameters
    ----------
    filename : str or list
        The RPN standard file(s) to convert.
    progress : bool, optional
        Display a progress bar during the conversion, if the "progress"
        module is installed.
    rpnstd_metadata : bool, optional
        Include all RPN record attributes in the output metadata.
    rpnstd_metadata_list : str or list, optional
        Specify a minimal set of RPN record attributes to include in the
        output file.
    ignore_typvar : bool, optional
        Tells the converter to ignore the typvar when deciding if two
        records are part of the same field.  Default is to split the
        variable on different typvars.
    ignore_etiket : bool, optional
        Tells the converter to ignore the etiket when deciding if two
        records are part of the same field.  Default is to split the
        variable on different etikets.
    """
    from rpnpy.librmn.fstd98 import fstnbr, fstinl, fstprm, fstopenall
    from rpnpy.librmn.const import FST_RO
    from fstd2nc.extra import maybeFST as isFST
    from collections import Counter
    import numpy as np
    from glob import glob, has_magic
    import os
    import warnings

    # Set up lock for threading.
    # The same lock is shared for all Buffer objects, to synchronize access to
    # librmn.
    self._lock = _lock

    # Set up a progress bar for scanning the input files.
    Bar = _ProgressBar if progress is True else _FakeBar

    # Set default for minimal_metadata
    if rpnstd_metadata is not None:
      minimal_metadata = not rpnstd_metadata
    if minimal_metadata is None:
      minimal_metadata = True
    # Set default for rpnstd_metadata_list
    if minimal_metadata is True and rpnstd_metadata_list is None:
      rpnstd_metadata_list = ''
    if isinstance(rpnstd_metadata_list,str):
      rpnstd_metadata_list = rpnstd_metadata_list.replace(',',' ')
      rpnstd_metadata_list = rpnstd_metadata_list.split()
    if hasattr(rpnstd_metadata_list,'__len__'):
      rpnstd_metadata_list = tuple(rpnstd_metadata_list)
    self._rpnstd_metadata_list = rpnstd_metadata_list

    if not ignore_typvar:
      # Insert typvar value just after nomvar.
      self._var_id = self._var_id[0:1] + ('typvar',) + self._var_id[1:]
      self._human_var_id = self._human_var_id[0:1] + ('%(typvar)s',) + self._human_var_id[1:]
    if not ignore_etiket:
      # Insert etiket value just after nomvar.
      self._var_id = self._var_id[0:1] + ('etiket',) + self._var_id[1:]
      self._human_var_id = self._human_var_id[0:1] + ('%(etiket)s',) + self._human_var_id[1:]

    if isinstance(filename,str):
      infiles = [filename]
    else:
      infiles = list(filename)

    # Apply wildcard and directory expansion to filenames.
    expanded_infiles = []
    for infile in infiles:
      for f in sorted(glob(infile)) or [infile]:
        if os.path.isdir(f):
          for dirpath, dirnames, filenames in os.walk(f,followlinks=True):
            for filename in filenames:
              expanded_infiles.append((infile,os.path.join(dirpath,filename)))
        else:
          expanded_infiles.append((infile,f))

    # Inspect all input files, and extract the headers from valid RPN files.
    matches = Counter()
    headers = []
    self._files = []
    if header_cache is None: header_cache = {}

    # Show a progress bar when there are multiple input files.
    if len(expanded_infiles) > 1:
      expanded_infiles = Bar(_("Inspecting input files"), suffix='%(percent)d%% (%(index)d/%(max)d)').iter(expanded_infiles)

    for infile, f in expanded_infiles:
      fkey = f
      if fkey.startswith('/'):
        fkey = '__ROOT__'+fkey
      if fkey not in header_cache and (not os.path.exists(f) or not isFST(f)):
        matches[infile] += 0
        continue
      matches[infile] += 1

      # Read the headers from the file(s) and store the info in the table.
      filenum = len(self._files)
      self._files.append(f)
      if fkey not in header_cache:
        funit = self._open(filenum)
        nrecs = fstnbr(funit)
        h = np.zeros(nrecs, dtype=self._headers_dtype)

        if no_quick_scan:
          keys = fstinl(funit)
          params = map(fstprm, keys)
          for i,prm in enumerate(params):
            for n,v in prm.items():
              if n in h.dtype.names:
                h[n][i] = v
        else:
          from fstd2nc.extra import all_params
          params = all_params(funit,out=h)
          keys = params['key']

        # Encode the keys without the file index info.
        h['key'] = keys
        h['key'] >>= 10
        header_cache[fkey] = h
      h = header_cache[fkey]
      # The file info will be an index into a separate file list.
      h['file_id'] = filenum

      headers.append(h)

    # Check if the input entries actually matched anything.
    for infile, count in matches.items():
      if count == 0:
        if os.path.isfile(infile):
          warn(_("'%s' is not an RPN standard file.")%infile)
        elif os.path.isdir(infile):
          warn(_("Directory '%s' does not contain any RPN standard files.")%infile)
        elif has_magic(infile):
          warn(_("No RPN standard files match '%s'.")%infile)
        elif not os.path.exists(infile):
          warn(_("'%s' does not exist.")%infile)
        else:
          warn(_("Problem with input file '%s'")%infile)

    nfiles = len(headers)
    if nfiles == 0:
      error(_("no input files found!"))
    info(_("Found %d RPN input file(s)"%nfiles))

    self._headers = np.ma.concatenate(headers)


    # Find all unique meta (coordinate) records, and link a subset of files
    # that provide all unique metadata records.
    # This will make it easier to look up the meta records later.
    meta_mask = np.zeros(len(self._headers),dtype='bool')
    for meta_name in self._meta_records:
      meta_name = (meta_name+'   ')[:4]
      meta_mask |= (self._headers['nomvar'] == meta_name)
    meta_recids = np.where(meta_mask)[0]
    # Use the same unique parameters as regular variables.
    # Plus, ig1,ig2,ig3,ig4.
    # Suppress FutureWarning from numpy about doing this.  Probably benign...
    with warnings.catch_warnings():
      warnings.simplefilter("ignore")
      meta_keys = self._headers.data[meta_mask][list(self._var_id)+['ip1','ip2','ip3','ig1','ig2','ig3','ig4']]
    meta_keys, ind = np.unique(meta_keys, return_index=True)
    meta_recids = meta_recids[ind]
    # Find the files that give these unique coord records.
    file_ids = sorted(set(self._headers['file_id'][meta_recids]))
    filenames = [self._files[f] for f in file_ids]
    if len(filenames) > 500:
      error(_("Holy crap, how many coordinates do you have???"))
    # If no coordinates found, just open the first file as a dummy file.
    # Less error-prone than checking if _meta_funit is defined every time
    # an FSTD function is called.
    if len(filenames) == 0:
      filenames = self._files[0:1]
    # Open these files and link them together
    self._meta_funit = fstopenall(filenames, FST_RO)
Ejemplo n.º 4
0
def _fstd2nc_cmdline(buffer_type=Buffer):
    from argparse import ArgumentParser
    from sys import stdout, argv
    from os.path import exists
    from rpnpy.librmn.fstd98 import FSTDError, fstopt
    parser = ArgumentParser(description=_(
        "Converts an RPN standard file (FSTD) to netCDF format."))
    parser.add_argument('infile',
                        nargs='+',
                        metavar='<fstd_file>',
                        help=_('The RPN standard file(s) to convert.'))
    parser.add_argument('outfile',
                        metavar='<netcdf_file>',
                        help=_('The name of the netCDF file to create.'))
    buffer_type._cmdline_args(parser)
    parser.add_argument(
        '--msglvl',
        choices=[
            '0', 'DEBUG', '2', 'INFORM', '4', 'WARNIN', '6', 'ERRORS', '8',
            'FATALE', '10', 'SYSTEM', 'CATAST'
        ],
        default='WARNIN',
        help=
        _('How much information to print to stdout during the conversion.  Default is %(default)s.'
          ))
    parser.add_argument(
        '--nc-format',
        choices=[
            'NETCDF4', 'NETCDF4_CLASSIC', 'NETCDF3_CLASSIC',
            'NETCDF3_64BIT_OFFSET', 'NETCDF3_64BIT_DATA'
        ],
        default='NETCDF4',
        help=_('Which variant of netCDF to write.  Default is %(default)s.'))
    parser.add_argument(
        '--zlib',
        action='store_true',
        help=
        _("Turn on compression for the netCDF file.  Only works for NETCDF4 and NETCDF4_CLASSIC formats."
          ))
    parser.add_argument(
        '--compression',
        type=int,
        default=4,
        help=
        _("Compression level for the netCDF file. Only used if --zlib is set. Default: 4."
          ))
    parser.add_argument(
        '-f',
        '--force',
        action='store_true',
        help=_("Overwrite the output file if it already exists."))
    parser.add_argument(
        '--no-history',
        action='store_true',
        help=_(
            "Don't put the command-line invocation in the netCDF metadata."))
    args = parser.parse_args()
    buffer_type._check_args(parser, args)
    args = vars(args)
    infiles = args.pop('infile')
    outfile = args.pop('outfile')
    msglvl = args.pop('msglvl')
    nc_format = args.pop('nc_format')
    zlib = args.pop('zlib')
    force = args.pop('force')
    no_history = args.pop('no_history')
    compression = args.pop('compression')
    progress = args.get('progress', False)

    # Apply message level criteria.
    try:
        msglvl = int(msglvl)
    except ValueError:
        msglvl = {
            'DEBUG': 0,
            'INFORM': 2,
            'WARNIN': 4,
            'ERRORS': 6,
            'FATALE': 8,
            'SYSTEM': 10,
            'CATAST': 10
        }[msglvl]
    fstopt('MSGLVL', msglvl)

    try:
        buf = buffer_type(infiles, **args)
    except FSTDError:
        error(_("problem opening one or more input files."))

    # Check if output file already exists
    if exists(outfile) and not force:
        overwrite = False
        if stdout.isatty():
            while True:
                print(_("Warning: '%s' already exists!  Overwrite? (y/n):") %
                      (outfile),
                      end=' ')
                try:
                    ans = raw_input()
                except NameError:
                    ans = input()
                if ans.lower() in ('y', 'yes', 'o', 'oui'):
                    overwrite = True
                    break
                if ans.lower() in ('n', 'no', 'non'):
                    overwrite = False
                    break
                print(_("Sorry, invalid response."))
        if overwrite is False:
            error(_("Refusing to overwrite existing file '%s'.") % (outfile))

    # Append the command invocation to the netCDF metadata?
    if no_history:
        global_metadata = None
    else:
        from datetime import datetime
        timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
        command = list(argv)
        # Any arguments with spaces should be surrounded by quotes.
        for i, c in enumerate(command):
            if " " in c:
                command[i] = "'" + c + "'"
        command = " ".join(command)
        history = timestamp + ": " + command
        global_metadata = {"history": history}

    buf.to_netcdf(outfile,
                  nc_format=nc_format,
                  global_metadata=global_metadata,
                  zlib=zlib,
                  compression=compression,
                  progress=progress)
Ejemplo n.º 5
0
def _fstd2nc_cmdline_trapped(*args, **kwargs):
    try:
        _fstd2nc_cmdline(*args, **kwargs)
    except KeyboardInterrupt:
        error(_("Aborted by user."))