Exemple #1
0
 def _check_args(cls, parser, args):
     from datetime import datetime
     super(netCDF_IO, cls)._check_args(parser, args)
     # Parse the reference date into a datetime object.
     if args.reference_date is not None:
         try:
             datetime.strptime(args.reference_date, '%Y-%m-%d')
         except ValueError:
             parser.error(
                 _("Unable to to parse the reference date '%s'.  Expected format is '%s'"
                   ) % (args.reference_date, _('YYYY-MM-DD')))
Exemple #2
0
 def _cmdline_args(cls, parser):
     import argparse
     super(netCDF_Atts, cls)._cmdline_args(parser)
     parser.add_argument(
         '--metadata-file',
         type=argparse.FileType('r'),
         action='append',
         help=
         _('Use metadata from the specified file.  You can repeat this option multiple times to build metadata from different sources.'
           ))
     parser.add_argument(
         '--rename',
         metavar="OLDNAME=NEWNAME,...",
         help=_('Apply the specified name changes to the variables.'))
Exemple #3
0
 def _cmdline_args(cls, parser):
     super(netCDF_IO, cls)._cmdline_args(parser)
     parser.add_argument(
         '--time-units',
         choices=['seconds', 'minutes', 'hours', 'days'],
         default='hours',
         help=_(
             'The units for the output time axis.  Default is %(default)s.')
     )
     parser.add_argument(
         '--reference-date',
         metavar=_('YYYY-MM-DD'),
         help=
         _('The reference date for the output time axis.  The default is the starting date in the RPN file.'
           ))
Exemple #4
0
 def _check_args(cls, parser, args):
     super(netCDF_Atts, cls)._check_args(parser, args)
     if args.rename is not None:
         try:
             dict(r.split('=') for r in args.rename.split(','))
         except ValueError:
             parser.error(_("Unable to parse the rename arguments."))
Exemple #5
0
 def _do_filter (p, cmd):
   try:
     return eval(cmd, None, p)
   except SyntaxError:
     error (_("unable to parse the filter: %s")%cmd)
   except NameError as e:
     error (e.message)
Exemple #6
0
 def _cmdline_args(cls, parser):
     super(Ensembles, cls)._cmdline_args(parser)
     parser.add_argument(
         '--ensembles',
         action='store_true',
         help=
         _('Collect different etikets for the same variable together into an "ensemble" axis.'
           ))
Exemple #7
0
 def _cmdline_args(cls, parser):
     super(RemoveStuff, cls)._cmdline_args(parser)
     parser.add_argument(
         '--exclude',
         metavar='NAME,NAME,...',
         help=
         _("Exclude some axes or derived variables from the output.  Note that axes will only be excluded if they have a length of 1."
           ))
Exemple #8
0
 def _cmdline_args(cls, parser):
     super(XYCoords, cls)._cmdline_args(parser)
     parser.add_argument(
         '--subgrid-axis',
         action='store_true',
         help=
         _('For data on supergrids, split the subgrids along a "subgrid" axis.  The default is to leave the subgrids stacked together as they are in the RPN file.'
           ))
Exemple #9
0
 def _cmdline_args(cls, parser):
     super(Series, cls)._cmdline_args(parser)
     #group = parser.add_argument_group(_('Options for profile data'))
     group = parser
     group.add_argument(
         '--profile-momentum-vars',
         metavar='VAR1,VAR2,...',
         help=_(
             'Comma-separated list of variables that use momentum levels.'))
     group.add_argument(
         '--profile-thermodynamic-vars',
         metavar='VAR1,VAR2,...',
         help=
         _('Comma-separated list of variables that use thermodynamic levels.'
           ))
     group.add_argument(
         '--missing-bottom-profile-level',
         action='store_true',
         help=_('Assume the bottom level of the profile data is missing.'))
Exemple #10
0
 def _cmdline_args (cls, parser):
   from fstd2nc import __version__
   from argparse import SUPPRESS
   parser.add_argument('--version', action='version', version=__version__)
   group = parser.add_mutually_exclusive_group()
   _('Display a progress bar during the conversion, if the "progress" module is installed.')
   group.add_argument('--progress', action='store_true', default=True, help=SUPPRESS)
   group.add_argument('--no-progress', action='store_false', dest='progress', help=_('Disable the progress bar.'))
   group = parser.add_mutually_exclusive_group()
   group.add_argument('--minimal-metadata', action='store_true', default=True, help=_("Don't include RPN record attributes and other internal information in the output metadata.")+" "+_("This is the default behaviour."))
   group.add_argument('--rpnstd-metadata', action='store_false', dest='minimal_metadata', help=_("Include all RPN record attributes in the output metadata."))
   group.add_argument('--rpnstd-metadata-list', metavar='nomvar,...', help=_("Specify a minimal set of RPN record attributes to include in the output file."))
   parser.add_argument('--ignore-typvar', action='store_true', help=_('Tells the converter to ignore the typvar when deciding if two records are part of the same field.  Default is to split the variable on different typvars.'))
   parser.add_argument('--ignore-etiket', action='store_true', help=_('Tells the converter to ignore the etiket when deciding if two records are part of the same field.  Default is to split the variable on different etikets.'))
   parser.add_argument('--no-quick-scan', action='store_true', help=SUPPRESS)
Exemple #11
0
 def __init__ (self, *args, **kwargs):
   """
   filter : str or list, optional
       Subset RPN file records using the given criteria.  For example, to
       convert only 24-hour forecasts you could use filter="ip2==24"
   """
   import numpy as np
   filter = kwargs.pop('filter',None)
   if filter is None:
     filter = []
   if isinstance(filter,str):
     filter = [filter]
   self._filters = tuple(filter)
   super(FilterRecords,self).__init__(*args,**kwargs)
   if len(self._filters) == 0: return
   flags = np.ones(len(self._headers),dtype='bool')
   records = dict([(n,self._headers[n]) for n in self._headers.dtype.names])
   for cmd in self._filters:
     try:
       flags &= self._do_filter(records, cmd)
     except TypeError:
       error (_("unable to apply the filter: %s")%cmd)
   # To filter out unwanted records, mark them as "deleted" in the list.
   self._headers['dltf'] = self._headers['dltf'] | (~flags)
Exemple #12
0
 def _cmdline_args (cls, parser):
   super(FilterRecords,cls)._cmdline_args(parser)
   parser.add_argument('--filter', metavar='CONDITION', action='append', help=_("Subset RPN file records using the given criteria.  For example, to convert only 24-hour forecasts you could use --filter ip2==24"))
Exemple #13
0
    def _makevars(self):
        from fstd2nc.mixins import _var_type, _axis_type, _dim_type
        from fstd2nc.mixins.dates import stamp2datetime
        from collections import OrderedDict
        import numpy as np
        from rpnpy.librmn.fstd98 import fstlir

        forecast_axis = None  # To attach the forecast axis.
        station = None  # To attach the station names as coordinates.
        momentum = thermo = None  # To attach the vertical axes.

        super(Series, self)._makevars()

        # Get station and forecast info.
        # Need to read from original records, because this into isn't in the
        # data stream.
        station_header = fstlir(self._meta_funit, nomvar='STNS')
        if station_header is not None:
            array = station_header['d'].transpose()
            # Re-cast array as string.
            # I don't know why I have to subtract 128 - maybe something to do with
            # how the characters are encoded in the file?
            # This isn't always needed.  Have test files for both cases.
            # Need help making this more robust!
            if array.flatten()[0] >= 128:
                array -= 128
            array = array.view('|S1')
            nstations, strlen = array.shape
            array = array.flatten().view('|S%d' % strlen)
            # Strip out trailing whitespace.
            # Python3: convert bytes to str
            array[:] = [str(arr.decode()).rstrip() for arr in array]
            array = array.view('|S1').reshape(nstations, strlen)
            station_id = _dim_type('station_id', nstations)
            station_strlen = _dim_type('station_strlen', strlen)
            # Encode it as 2D character array for netCDF file output.
            station = _var_type('station', {}, [station_id, station_strlen],
                                array)
        # Create forecast axis.
        forecast_header = fstlir(self._meta_funit, nomvar='HH')
        if forecast_header is not None:
            atts = OrderedDict(units='hours')
            # Note: the information in 'HH' is actually the hour of validity.
            # Need to subtract the hour from the date of origin in order to get
            # the leadtime.
            starting_hour = stamp2datetime(forecast_header['dateo']).hour
            array = forecast_header['d'].flatten() - starting_hour
            forecast_timedelta = np.array(array * 3600, 'timedelta64[s]')
            forecast_axis = _axis_type('forecast', atts, array)
        # Extract vertical coordinates.
        for vertvar in ('SH', 'SV'):
            header = fstlir(self._meta_funit, nomvar=vertvar)
            if header is None: continue
            array = header['d'].squeeze()
            # Drop the top or bottom levels to match the profile data?
            if self._missing_bottom_profile_level:
                array = array[:-1]
            if array.ndim != 1: continue
            atts = OrderedDict(self._get_header_atts(header))
            if vertvar == 'SH': thermo = _axis_type('level', atts, array)
            if vertvar == 'SV': momentum = _axis_type('level', atts, array)

        # 'Y' data should be handled fine by _XYCoords - just give a more
        # specific name to the ni axis for clarity.
        for var in self._varlist:
            if var.atts.get('typvar') == 'T' and var.atts.get('grtyp') == 'Y':
                dims = var.dims
                iaxis = var.getaxis('i')
                if iaxis is not None and station is not None and len(
                        iaxis) == station.shape[0]:
                    var.axes[dims.index('i')] = station.axes[0]

        # Remove degenerate vertical axis for '+' data.
        # (The one coming from IP1, which is not used.)
        for var in self._varlist:
            if var.atts.get('typvar') == 'T' and var.atts.get('grtyp') == '+':
                dims = var.dims
                if 'level' in dims:
                    var.record_id = var.record_id.squeeze(
                        axis=dims.index('level'))
                    var.axes.pop(dims.index('level'))

        # For '+' data, ni is the vertical level, and nj is the forecast.
        known_levels = dict()
        for var in self._varlist:

            if var.atts.get('typvar') != 'T': continue
            if var.atts.get('grtyp') != '+': continue

            dims = var.dims

            # The j dimension is actually the forecast time.
            jaxis = var.getaxis('j')
            if jaxis is not None and forecast_axis is not None and len(
                    jaxis) == len(forecast_axis):
                var.axes[dims.index('j')] = forecast_axis

            # The i dimension is actually the vertical coordinate for this type of
            # data.
            iaxis = var.getaxis('i')
            if iaxis is not None:
                # If there's only 1 level (degenerate), then remove that dimension.
                if len(iaxis) == 1:
                    var.axes.pop(dims.index('i'))
                    continue
                # Try to map to thermodynamic or momentum levels.
                level = iaxis
                level.name = 'level'
                if var.name in self._momentum_vars and momentum is not None:
                    if len(level) == len(momentum):
                        level = momentum
                    else:
                        warn(
                            _("Wrong number of momentum levels found in the data."
                              ))
                if var.name in self._thermo_vars and thermo is not None:
                    if len(level) == len(thermo):
                        level = thermo
                    else:
                        warn(
                            _("Wrong number of thermodynamic levels found in the data."
                              ))
                if level is iaxis:
                    warn(
                        _("Unable to find the vertical coordinates for %s." %
                          var.name))
                    # Attach a generic level dimension.
                    nlev = len(level)
                    if nlev not in known_levels:
                        known_levels[nlev] = _dim_type('level', nlev)
                    level = known_levels[nlev]
                else:
                    # Found vertical levels, now define the level kind so VCoords
                    # mixin can add more metadata.
                    var.atts['kind'] = 5
                var.axes[dims.index('i')] = level

        # Some support for squashing forecasts.
        if getattr(self, '_squash_forecasts', False) is True:
            known_squashed_forecasts = dict()
            known_leadtimes = dict()
            known_reftimes = dict()
            for var in self._varlist:
                # Can only do this for a single date of origin, because the time
                # axis and forecast axis are not adjacent for this type of data.
                time = var.getaxis('time')
                forecast = var.getaxis('forecast')
                if time is None or forecast is None: continue
                if len(time) != 1:
                    warn(
                        _("Can't use datev for timeseries data with multiple dates of origin.  Try re-running with the --dateo option."
                          ))
                    continue
                var.record_id = var.record_id.squeeze(
                    axis=var.dims.index('time'))
                var.axes.pop(var.dims.index('time'))
                key = (id(time), id(forecast))
                if key not in known_squashed_forecasts:
                    time0 = time.array[0]
                    # Convert pandas times (if using pandas for processing the headers)
                    time0 = np.datetime64(time0, 's')
                    # Calculate the date of validity
                    forecast_timedelta = np.array(forecast.array * 3600,
                                                  'timedelta64[s]')
                    squashed_times_array = time0 + forecast_timedelta
                    time = _axis_type(
                        'time',
                        OrderedDict([('standard_name', 'time'),
                                     ('long_name', 'Validity time'),
                                     ('axis', 'T')]), squashed_times_array)
                    known_squashed_forecasts[key] = time
                    # Include forecast and reftime auxiliary coordinates (emulate
                    # what's done in the dates mixin)
                    leadtime = _var_type(
                        'leadtime',
                        OrderedDict([
                            ('standard_name', 'forecast_period'),
                            ('long_name',
                             'Lead time (since forecast_reference_time)'),
                            ('units', 'hours')
                        ]), [time], forecast.array)
                    reftime = _var_type(
                        'reftime',
                        OrderedDict([('standard_name',
                                      'forecast_reference_time')]), {},
                        np.array(time0))
                    known_leadtimes[key] = leadtime
                    known_reftimes[key] = reftime
                var.axes[var.dims.index(
                    'forecast')] = known_squashed_forecasts[key]
                # Add leadtime and reftime as auxiliary coordinates.
                coords = var.atts.get('coordinates', [])
                coords.extend([known_leadtimes[key], known_reftimes[key]])
                var.atts['coordinates'] = coords

        # Hook in the station names as coordinate information.
        if station is not None:
            for station_id, varlist in self._iter_axes('station_id',
                                                       varlist=True):
                # Try to use the provided station coordinate, if it has a consistent
                # length.
                if len(station_id) == station.shape[0]:
                    for var in varlist:
                        var.axes[var.dims.index(
                            'station_id')] = station.axes[0]
                    station_id = station.axes[0]
                    station_coord = station
                # Otherwise, need to construct a new coordinate with the subset of
                # stations used.
                # Assume station_ids start at 1 (not 0).
                else:
                    indices = station_id.array - 1
                    array = station.array[indices, :]
                    # Use _axis_type instead of _dim_type to retain the station_id values.
                    station_id = _axis_type('station_id', {}, station_id.array)
                    axes = [station_id, station.axes[1]]
                    station_coord = _var_type('station', {}, axes, array)
                # Attach the station as a coordinate.
                for var in varlist:
                    coords = var.atts.get('coordinates', [])
                    coords.append(station_coord)
                    var.atts['coordinates'] = coords
Exemple #14
0
    def _makevars(self):
        from fstd2nc.mixins import _iter_type, _var_type, _axis_type, _dim_type
        from collections import OrderedDict
        from rpnpy.librmn.interp import ezqkdef, EzscintError, ezget_nsubgrids
        from rpnpy.librmn.all import readGrid, RMNError
        import numpy as np

        # Scan through the data, and look for any use of horizontal coordinates.
        grids = OrderedDict()
        gridmaps = OrderedDict()
        lats = OrderedDict()
        lons = OrderedDict()
        # Only output 1 copy of 1D coords (e.g. could have repetitions with
        # horizontal staggering.
        coords = set()

        super(XYCoords, self)._makevars()

        # Make sure any LA/LO records get processed first, so we can apply them as
        # coordinates to other variables.
        varlist = self._varlist
        varlist = [v for v in varlist if v.name in ('LA', 'LO')
                   ] + [v for v in varlist if v.name not in ('LA', 'LO')]

        for var in varlist:
            # Don't touch variables with no horizontal grid.
            if all(a not in var.dims for a in ('i', 'j', 'station_id')):
                continue
            # Get grid parameters.
            ni = int(var.atts['ni'])
            nj = int(var.atts['nj'])
            grtyp = var.atts['grtyp']
            ig1 = int(var.atts['ig1'])
            ig2 = int(var.atts['ig2'])
            ig3 = int(var.atts['ig3'])
            ig4 = int(var.atts['ig4'])
            # Uniquely identify the grid for this variable.
            #
            # Use a looser identifier for timeseries data (ni/nj have different
            # meanings here (not grid-related), and could have multiple grtyp
            # values ('+','Y') that should share the same lat/lon info.
            if var.atts.get('typvar', '').strip() == 'T':
                key = ('T', ig1, ig2)
            else:
                key = (grtyp, ni, nj, ig1, ig2, ig3, ig4)
            if grtyp in ('Y', '+'): key = key[1:]
            # Check if we already defined this grid.
            if key not in grids:

                lat = lon = xaxis = yaxis = None

                # Check if GridMap recognizes this grid.
                if grtyp not in self._direct_grids:
                    try:
                        grd = readGrid(self._meta_funit, var.atts.copy())
                        gmap = GridMap.gen_gmap(grd)
                        gmapvar = gmap.gen_gmapvar()
                        gridmaps[key] = gmapvar
                        (xaxis, yaxis, gridaxes, lon, lat) = gmap.gen_xyll()
                    except (TypeError, EzscintError, KeyError, RMNError,
                            ValueError):
                        pass  # Wasn't supported.

                # Otherwise, need to decode the information here.
                if lat is None or lon is None:

                    latatts = OrderedDict()
                    latatts['long_name'] = 'latitude'
                    latatts['standard_name'] = 'latitude'
                    latatts['units'] = 'degrees_north'
                    lonatts = OrderedDict()
                    lonatts['long_name'] = 'longitude'
                    lonatts['standard_name'] = 'longitude'
                    lonatts['units'] = 'degrees_east'

                    latarray = lonarray = None
                    try:
                        # First, handle non-ezqkdef grids.
                        if grtyp in self._direct_grids:
                            latarray = self._find_coord(
                                var, '^^')['d'].squeeze(axis=2)
                            lonarray = self._find_coord(
                                var, '>>')['d'].squeeze(axis=2)
                        # Handle ezqkdef grids.
                        else:
                            gdid = ezqkdef(ni, nj, grtyp, ig1, ig2, ig3, ig4,
                                           self._meta_funit)
                            ll = gdll(gdid)
                            latarray = ll['lat']
                            lonarray = ll['lon']
                            xycoords = gdgaxes(gdid)
                            ax = xycoords['ax'].transpose()
                            ay = xycoords['ay'].transpose()
                            # Convert from degenerate 2D arrays to 1D arrays.
                            ax = ax[0, :]
                            ay = ay[:, 0]
                            xaxis = _axis_type('x', {'axis': 'X'}, ax)
                            yaxis = _axis_type('y', {'axis': 'Y'}, ay)

                    except (TypeError, EzscintError, KeyError, RMNError,
                            ValueError):
                        pass

                    # Check for LA/LO variables, and use these as the coordinates if
                    # nothing else available.
                    if latarray is None and var.name == 'LA':
                        var.name = 'lat'
                        var.atts.update(latatts)
                        #grids[key] = list(var.axes)
                        lats[key] = var
                        continue
                    if lonarray is None and var.name == 'LO':
                        var.name = 'lon'
                        var.atts.update(lonatts)
                        grids[key] = list(var.axes)
                        lons[key] = var
                        continue

                    if latarray is None or lonarray is None:
                        warn(
                            _("Unable to get lat/lon coordinates for '%s'") %
                            var.name)
                        continue

                    # Construct lat/lon variables from latarray and lonarray.
                    latarray = latarray.transpose(
                    )  # Switch from Fortran to C order.
                    lonarray = lonarray.transpose(
                    )  # Switch from Fortran to C order.

                    # Case 1: lat/lon can be resolved into 1D Cartesian coordinates.
                    # Calculate the mean lat/lon arrays in double precision.
                    meanlat = np.mean(np.array(latarray, dtype=float),
                                      axis=1,
                                      keepdims=True)
                    meanlon = np.mean(np.array(lonarray, dtype=float),
                                      axis=0,
                                      keepdims=True)
                    if latarray.shape[
                            1] > 1 and lonarray.shape[1] > 1 and np.allclose(
                                latarray, meanlat) and np.allclose(
                                    lonarray, meanlon):
                        # Reduce back to single precision for writing out.
                        meanlat = np.array(meanlat,
                                           dtype=latarray.dtype).squeeze()
                        meanlon = np.array(meanlon,
                                           dtype=lonarray.dtype).squeeze()
                        # Ensure monotonicity of longitude field.
                        # (gdll may sometimes wrap last longitude to zero).
                        # Taken from old fstd_core.c code.
                        if meanlon[-2] > meanlon[-3] and meanlon[-1] < meanlon[
                                -2]:
                            meanlon[-1] += 360.
                        latarray = meanlat
                        lonarray = meanlon
                        lat = _axis_type('lat', latatts, latarray)
                        lon = _axis_type('lon', lonatts, lonarray)
                        gridaxes = [lat, lon]

                    # Case 2: lat/lon are series of points.
                    elif latarray.shape[0] == 1 and lonarray.shape[
                            0] == 1 and ('i' in var.dims
                                         or 'station_id' in var.dims):
                        latarray = latarray[0, :]
                        lonarray = lonarray[0, :]
                        # Special case for station data
                        station_id = var.getaxis('station_id')
                        if station_id is not None:
                            gridaxes = [station_id]
                            # Subset the lat/lon to the stations that are actually found.
                            # Assuming the station id (ip3) starts at 1.
                            if isinstance(station_id, _axis_type):
                                indices = np.array(station_id.array,
                                                   dtype=int) - 1
                                latarray = latarray[indices]
                                lonarray = lonarray[indices]
                        else:
                            gridaxes = [var.getaxis('i')]
                        lat = _var_type('lat', latatts, gridaxes, latarray)
                        lon = _var_type('lon', lonatts, gridaxes, lonarray)

                    # Case 3: General 2D lat/lon fields on X/Y coordinate system.
                    elif xaxis is not None and yaxis is not None:
                        gridaxes = [yaxis, xaxis]
                        # Special case: have supergrid data, and the user wants to split it?
                        if grtyp == 'U' and self._subgrid_axis:
                            ngrids = ezget_nsubgrids(gdid)
                            ny = len(yaxis.array) // ngrids
                            yaxis.array = yaxis.array[:ny]
                            subgrid = _dim_type('subgrid', ngrids)
                            gridaxes = [subgrid, yaxis, xaxis]
                            latarray = latarray.reshape(ngrids, ny, -1)
                            lonarray = lonarray.reshape(ngrids, ny, -1)
                        lat = _var_type('lat', latatts, gridaxes, latarray)
                        lon = _var_type('lon', lonatts, gridaxes, lonarray)

                    # Case 4: General 2D lat/lon fields with no coordinate system.
                    elif 'i' in var.dims and 'j' in var.dims:
                        gridaxes = [var.getaxis('j'), var.getaxis('i')]
                        lat = _var_type('lat', latatts, gridaxes, latarray)
                        lon = _var_type('lon', lonatts, gridaxes, lonarray)

                # --- End of lat/lon/xaxis/yaxis decoding.

                if lat is None or lon is None:
                    warn(
                        _("Unable to get lat/lon coordinates for '%s'") %
                        var.name)
                    continue

                # Sanity check on lat/lon - make sure we have something of the right size.
                if lat.array.shape == lat.shape and lon.array.shape == lon.shape:
                    grids[key] = gridaxes
                    lats[key] = lat
                    lons[key] = lon
                else:
                    warn(_("Wrong shape of lat/lon for '%s'") % var.name)
                    continue

            # --- End of grid decoding.

            gridaxes = grids[key]
            lat = lats[key]
            lon = lons[key]

            # Update the var's horizontal coordinates.
            newaxes = []
            if len(gridaxes) == 1:
                newaxes = [('i', gridaxes[0])]
            elif len(gridaxes) == 2:
                newaxes = [('j', gridaxes[0]), ('i', gridaxes[1])]
            elif len(gridaxes) == 3:
                newaxes = [('k', gridaxes[0]), ('j', gridaxes[1]),
                           ('i', gridaxes[2])]
            else:
                warn(_("Unusual grid axes for '%s' - ignoring.") % var.name)
            dims = var.dims
            for oldname, newaxis in newaxes:
                if oldname in dims:
                    var.axes[dims.index(oldname)] = newaxis

            # For 2D lat/lon, need to reference them as coordinates in order for
            # netCDF viewers to display the field properly.
            if 'lat' not in var.dims or 'lon' not in var.dims:
                coordinates = var.atts.get('coordinates', [])
                coordinates.extend([lon, lat])
                var.atts['coordinates'] = coordinates

            if key in gridmaps:
                var.atts['grid_mapping'] = gridmaps[key]

            # Throw out superfluous LA/LO variables, if lat/lon was already decoded.
            if var.name == 'LA' and ('lat' in var.dims or lat in coordinates):
                var.name = None
            if var.name == 'LO' and ('lon' in var.dims or lon in coordinates):
                var.name = None

        self._varlist = [v for v in varlist if v.name is not None]
Exemple #15
0
 def _cmdline_args (cls, parser):
   super(Masks,cls)._cmdline_args(parser)
   parser.add_argument('--fill-value', type=float, default=1e30, help=_("The fill value to use for masked (missing) data.  Gets stored as '_FillValue' attribute in the metadata.  Default is '%(default)s'."))
Exemple #16
0
def _fstd2nc_cmdline_trapped(*args, **kwargs):
    try:
        _fstd2nc_cmdline(*args, **kwargs)
    except KeyboardInterrupt:
        error(_("Aborted by user."))
Exemple #17
0
def _fstd2nc_cmdline(buffer_type=Buffer):
    from argparse import ArgumentParser
    from sys import stdout, argv
    from os.path import exists
    from rpnpy.librmn.fstd98 import FSTDError, fstopt
    parser = ArgumentParser(description=_(
        "Converts an RPN standard file (FSTD) to netCDF format."))
    parser.add_argument('infile',
                        nargs='+',
                        metavar='<fstd_file>',
                        help=_('The RPN standard file(s) to convert.'))
    parser.add_argument('outfile',
                        metavar='<netcdf_file>',
                        help=_('The name of the netCDF file to create.'))
    buffer_type._cmdline_args(parser)
    parser.add_argument(
        '--msglvl',
        choices=[
            '0', 'DEBUG', '2', 'INFORM', '4', 'WARNIN', '6', 'ERRORS', '8',
            'FATALE', '10', 'SYSTEM', 'CATAST'
        ],
        default='WARNIN',
        help=
        _('How much information to print to stdout during the conversion.  Default is %(default)s.'
          ))
    parser.add_argument(
        '--nc-format',
        choices=[
            'NETCDF4', 'NETCDF4_CLASSIC', 'NETCDF3_CLASSIC',
            'NETCDF3_64BIT_OFFSET', 'NETCDF3_64BIT_DATA'
        ],
        default='NETCDF4',
        help=_('Which variant of netCDF to write.  Default is %(default)s.'))
    parser.add_argument(
        '--zlib',
        action='store_true',
        help=
        _("Turn on compression for the netCDF file.  Only works for NETCDF4 and NETCDF4_CLASSIC formats."
          ))
    parser.add_argument(
        '--compression',
        type=int,
        default=4,
        help=
        _("Compression level for the netCDF file. Only used if --zlib is set. Default: 4."
          ))
    parser.add_argument(
        '-f',
        '--force',
        action='store_true',
        help=_("Overwrite the output file if it already exists."))
    parser.add_argument(
        '--no-history',
        action='store_true',
        help=_(
            "Don't put the command-line invocation in the netCDF metadata."))
    args = parser.parse_args()
    buffer_type._check_args(parser, args)
    args = vars(args)
    infiles = args.pop('infile')
    outfile = args.pop('outfile')
    msglvl = args.pop('msglvl')
    nc_format = args.pop('nc_format')
    zlib = args.pop('zlib')
    force = args.pop('force')
    no_history = args.pop('no_history')
    compression = args.pop('compression')
    progress = args.get('progress', False)

    # Apply message level criteria.
    try:
        msglvl = int(msglvl)
    except ValueError:
        msglvl = {
            'DEBUG': 0,
            'INFORM': 2,
            'WARNIN': 4,
            'ERRORS': 6,
            'FATALE': 8,
            'SYSTEM': 10,
            'CATAST': 10
        }[msglvl]
    fstopt('MSGLVL', msglvl)

    try:
        buf = buffer_type(infiles, **args)
    except FSTDError:
        error(_("problem opening one or more input files."))

    # Check if output file already exists
    if exists(outfile) and not force:
        overwrite = False
        if stdout.isatty():
            while True:
                print(_("Warning: '%s' already exists!  Overwrite? (y/n):") %
                      (outfile),
                      end=' ')
                try:
                    ans = raw_input()
                except NameError:
                    ans = input()
                if ans.lower() in ('y', 'yes', 'o', 'oui'):
                    overwrite = True
                    break
                if ans.lower() in ('n', 'no', 'non'):
                    overwrite = False
                    break
                print(_("Sorry, invalid response."))
        if overwrite is False:
            error(_("Refusing to overwrite existing file '%s'.") % (outfile))

    # Append the command invocation to the netCDF metadata?
    if no_history:
        global_metadata = None
    else:
        from datetime import datetime
        timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
        command = list(argv)
        # Any arguments with spaces should be surrounded by quotes.
        for i, c in enumerate(command):
            if " " in c:
                command[i] = "'" + c + "'"
        command = " ".join(command)
        history = timestamp + ": " + command
        global_metadata = {"history": history}

    buf.to_netcdf(outfile,
                  nc_format=nc_format,
                  global_metadata=global_metadata,
                  zlib=zlib,
                  compression=compression,
                  progress=progress)
Exemple #18
0
 def _cmdline_args (cls, parser):
   super(Dates,cls)._cmdline_args(parser)
   group = parser.add_mutually_exclusive_group()
   group.add_argument('--datev', '--squash-forecasts', action='store_true', default=True, dest='squash_forecasts', help=_('Use the date of validity for the "time" axis.  This is the default.'))
   group.add_argument('--dateo', '--forecast-axis', action='store_false', dest='squash_forecasts', help=_('Use the date of original analysis for the time axis, and put the forecast times into a separate "forecast" axis.'))
Exemple #19
0
    def to_netcdf(self,
                  filename,
                  nc_format='NETCDF4',
                  global_metadata=None,
                  zlib=False,
                  compression=4,
                  progress=False):
        """
    Write the records to a netCDF file.
    Requires the netCDF4 package.
    """
        from fstd2nc.mixins import _var_type, _ProgressBar, _FakeBar
        from netCDF4 import Dataset
        import numpy as np
        f = Dataset(filename, "w", format=nc_format)

        # Apply global metadata (from config files and global_metadata argument).
        if 'global' in getattr(self, '_metadata', {}):
            f.setncatts(self._metadata['global'])
        if global_metadata is not None:
            f.setncatts(global_metadata)

        # Collect all the records that will be read/written.
        # List of (key,recshape,ncvar,ncind).
        # Note: derived variables (with values stored in memory) will be written
        # immediately, bypassing this list.
        io = []

        self._makevars()

        # Define the dimensions.
        for axis in self._iter_axes():
            # Special case: make the time dimension unlimited.
            if axis.name == 'time' and self._time_unlimited:
                f.createDimension(axis.name, None)
            else:
                f.createDimension(axis.name, len(axis))

        # Generate the variable structures.
        for var in self._iter_objects():

            # Write the variable.
            # Easy case: already have the data.
            if hasattr(var, 'array'):
                v = f.createVariable(var.name,
                                     datatype=var.array.dtype,
                                     dimensions=var.dims,
                                     zlib=zlib,
                                     complevel=compression)
                # Write the metadata.
                v.setncatts(var.atts)
                v[()] = var.array
                continue
            # Hard case: only have the record indices, need to loop over the records.
            # Get the shape of a single record for the variable.
            if not hasattr(var, 'record_id'): continue
            record_shape = var.shape[var.record_id.ndim:]
            # Use this as the "chunk size" for the netCDF file, to improve I/O
            # performance.
            chunksizes = (1, ) * var.record_id.ndim + record_shape
            v = f.createVariable(var.name,
                                 datatype=var.dtype,
                                 dimensions=var.dims,
                                 zlib=zlib,
                                 complevel=compression,
                                 chunksizes=chunksizes,
                                 fill_value=getattr(self, '_fill_value', None))
            # Turn off auto scaling of variables - want to encode the values as-is.
            # 'scale_factor' and 'add_offset' will only be applied when *reading* the
            # the file after it's created.
            v.set_auto_scale(False)
            # Write the metadata.
            v.setncatts(var.atts)
            # Write the data.
            indices = list(np.ndindex(var.record_id.shape))
            # Sort the indices by FSTD key, so we're reading the records in the same
            # order as they're found on disk.
            keys = map(int, var.record_id.flatten())
            for r, ind in zip(keys, indices):
                if r >= 0:
                    io.append((r, record_shape, v, ind))

        # Now, do the actual transcribing of the data.
        # Read/write the data in the same order of records in the RPN file(s) to
        # improve performance.
        Bar = _ProgressBar if progress is True else _FakeBar
        bar = Bar(_("Saving netCDF file"), suffix="%(percent)d%% [%(myeta)s]")
        for r, shape, v, ind in bar.iter(sorted(io)):
            try:
                data = self._fstluk(
                    r, dtype=v.dtype)['d'].transpose().reshape(shape)
                v[ind] = data
            except (IndexError, ValueError):
                warn(
                    _("Internal problem with the script - unable to get data for '%s'"
                      ) % v.name)
                continue

        f.close()
Exemple #20
0
    def _fix_names(self):

        # List of metadata keys that are internal to the FSTD file.
        internal_meta = self._headers.dtype.names

        # Generate unique axis names.
        axis_table = dict()
        for axis in self._iter_axes():
            if axis.name not in axis_table:
                axis_table[axis.name] = []
            axis_table[axis.name].append(axis)
        for axisname, axis_list in axis_table.items():
            if len(axis_list) == 1: continue
            warn(
                _("Multiple %s axes.  Appending integer suffixes to their names."
                  ) % axisname)
            for i, axis in enumerate(axis_list):
                axis.name = axis.name + str(i + 1)

        # Generate a string-based variable id.
        # Only works for true variables from the FSTD source
        # (needs metadata like etiket, etc.)
        def get_var_id(var):
            out = []
            for fmt in self._human_var_id:
                out.append(fmt % var.atts)
            return tuple(out)

        # Generate unique variable names.
        var_table = dict()
        for var in self._iter_objects():
            if var.name not in var_table:
                var_table[var.name] = []
            # Identify the variables by their index in the master list.
            var_table[var.name].append(var)

        for varname, var_list in var_table.items():
            # Only need to rename variables that are non-unique.
            if len(var_list) == 1: continue
            try:
                var_ids = [get_var_id(v) for v in var_list]
            except KeyError:
                # Some derived axes may not have enough metadata to generate an id,
                # so the best we can do is append an integer suffix.
                var_ids = [(str(r), ) for r in range(1, len(var_list) + 1)]

            var_ids = zip(*var_ids)

            # Omit parts of the var_id that are invariant over all the variables.
            var_ids = [var_id for var_id in var_ids if len(set(var_id)) > 1]
            # Starting from the rightmost key, remove as many keys as possible while
            # maintaining uniqueness.
            for j in reversed(range(len(var_ids))):
                test = var_ids[:j] + var_ids[j + 1:]
                if len(set(zip(*test))) == len(var_list):
                    var_ids = test

            var_ids = zip(*var_ids)

            var_ids = ['_'.join(var_id) for var_id in var_ids]

            warn(
                _("Multiple definitions of %s.  Adding unique suffixes %s.") %
                (varname, ', '.join(var_ids)))

            # Apply the name changes.
            for var, var_id in zip(var_list, var_ids):
                var.name = var.name + '_' + var_id

        for var in self._iter_objects():
            # Names must start with a letter or underscore.
            if not var.name[0].isalpha():
                warn(_("Renaming '%s' to '_%s'.") % (var.name, var.name))
                var.name = '_' + var.name

            # Strip out FSTD-specific metadata?
            if not hasattr(var, 'atts'): continue
            if self._rpnstd_metadata_list is not None:
                for n in internal_meta:
                    if n not in self._rpnstd_metadata_list:
                        var.atts.pop(n, None)
Exemple #21
0
  def __init__ (self, filename, header_cache=None, progress=False, minimal_metadata=None, rpnstd_metadata=None, rpnstd_metadata_list=None, ignore_typvar=False, ignore_etiket=False, no_quick_scan=False):
    """
    Read raw records from FSTD files, into the buffer.
    Multiple files can be read simultaneously.

    Parameters
    ----------
    filename : str or list
        The RPN standard file(s) to convert.
    progress : bool, optional
        Display a progress bar during the conversion, if the "progress"
        module is installed.
    rpnstd_metadata : bool, optional
        Include all RPN record attributes in the output metadata.
    rpnstd_metadata_list : str or list, optional
        Specify a minimal set of RPN record attributes to include in the
        output file.
    ignore_typvar : bool, optional
        Tells the converter to ignore the typvar when deciding if two
        records are part of the same field.  Default is to split the
        variable on different typvars.
    ignore_etiket : bool, optional
        Tells the converter to ignore the etiket when deciding if two
        records are part of the same field.  Default is to split the
        variable on different etikets.
    """
    from rpnpy.librmn.fstd98 import fstnbr, fstinl, fstprm, fstopenall
    from rpnpy.librmn.const import FST_RO
    from fstd2nc.extra import maybeFST as isFST
    from collections import Counter
    import numpy as np
    from glob import glob, has_magic
    import os
    import warnings

    # Set up lock for threading.
    # The same lock is shared for all Buffer objects, to synchronize access to
    # librmn.
    self._lock = _lock

    # Set up a progress bar for scanning the input files.
    Bar = _ProgressBar if progress is True else _FakeBar

    # Set default for minimal_metadata
    if rpnstd_metadata is not None:
      minimal_metadata = not rpnstd_metadata
    if minimal_metadata is None:
      minimal_metadata = True
    # Set default for rpnstd_metadata_list
    if minimal_metadata is True and rpnstd_metadata_list is None:
      rpnstd_metadata_list = ''
    if isinstance(rpnstd_metadata_list,str):
      rpnstd_metadata_list = rpnstd_metadata_list.replace(',',' ')
      rpnstd_metadata_list = rpnstd_metadata_list.split()
    if hasattr(rpnstd_metadata_list,'__len__'):
      rpnstd_metadata_list = tuple(rpnstd_metadata_list)
    self._rpnstd_metadata_list = rpnstd_metadata_list

    if not ignore_typvar:
      # Insert typvar value just after nomvar.
      self._var_id = self._var_id[0:1] + ('typvar',) + self._var_id[1:]
      self._human_var_id = self._human_var_id[0:1] + ('%(typvar)s',) + self._human_var_id[1:]
    if not ignore_etiket:
      # Insert etiket value just after nomvar.
      self._var_id = self._var_id[0:1] + ('etiket',) + self._var_id[1:]
      self._human_var_id = self._human_var_id[0:1] + ('%(etiket)s',) + self._human_var_id[1:]

    if isinstance(filename,str):
      infiles = [filename]
    else:
      infiles = list(filename)

    # Apply wildcard and directory expansion to filenames.
    expanded_infiles = []
    for infile in infiles:
      for f in sorted(glob(infile)) or [infile]:
        if os.path.isdir(f):
          for dirpath, dirnames, filenames in os.walk(f,followlinks=True):
            for filename in filenames:
              expanded_infiles.append((infile,os.path.join(dirpath,filename)))
        else:
          expanded_infiles.append((infile,f))

    # Inspect all input files, and extract the headers from valid RPN files.
    matches = Counter()
    headers = []
    self._files = []
    if header_cache is None: header_cache = {}

    # Show a progress bar when there are multiple input files.
    if len(expanded_infiles) > 1:
      expanded_infiles = Bar(_("Inspecting input files"), suffix='%(percent)d%% (%(index)d/%(max)d)').iter(expanded_infiles)

    for infile, f in expanded_infiles:
      fkey = f
      if fkey.startswith('/'):
        fkey = '__ROOT__'+fkey
      if fkey not in header_cache and (not os.path.exists(f) or not isFST(f)):
        matches[infile] += 0
        continue
      matches[infile] += 1

      # Read the headers from the file(s) and store the info in the table.
      filenum = len(self._files)
      self._files.append(f)
      if fkey not in header_cache:
        funit = self._open(filenum)
        nrecs = fstnbr(funit)
        h = np.zeros(nrecs, dtype=self._headers_dtype)

        if no_quick_scan:
          keys = fstinl(funit)
          params = map(fstprm, keys)
          for i,prm in enumerate(params):
            for n,v in prm.items():
              if n in h.dtype.names:
                h[n][i] = v
        else:
          from fstd2nc.extra import all_params
          params = all_params(funit,out=h)
          keys = params['key']

        # Encode the keys without the file index info.
        h['key'] = keys
        h['key'] >>= 10
        header_cache[fkey] = h
      h = header_cache[fkey]
      # The file info will be an index into a separate file list.
      h['file_id'] = filenum

      headers.append(h)

    # Check if the input entries actually matched anything.
    for infile, count in matches.items():
      if count == 0:
        if os.path.isfile(infile):
          warn(_("'%s' is not an RPN standard file.")%infile)
        elif os.path.isdir(infile):
          warn(_("Directory '%s' does not contain any RPN standard files.")%infile)
        elif has_magic(infile):
          warn(_("No RPN standard files match '%s'.")%infile)
        elif not os.path.exists(infile):
          warn(_("'%s' does not exist.")%infile)
        else:
          warn(_("Problem with input file '%s'")%infile)

    nfiles = len(headers)
    if nfiles == 0:
      error(_("no input files found!"))
    info(_("Found %d RPN input file(s)"%nfiles))

    self._headers = np.ma.concatenate(headers)


    # Find all unique meta (coordinate) records, and link a subset of files
    # that provide all unique metadata records.
    # This will make it easier to look up the meta records later.
    meta_mask = np.zeros(len(self._headers),dtype='bool')
    for meta_name in self._meta_records:
      meta_name = (meta_name+'   ')[:4]
      meta_mask |= (self._headers['nomvar'] == meta_name)
    meta_recids = np.where(meta_mask)[0]
    # Use the same unique parameters as regular variables.
    # Plus, ig1,ig2,ig3,ig4.
    # Suppress FutureWarning from numpy about doing this.  Probably benign...
    with warnings.catch_warnings():
      warnings.simplefilter("ignore")
      meta_keys = self._headers.data[meta_mask][list(self._var_id)+['ip1','ip2','ip3','ig1','ig2','ig3','ig4']]
    meta_keys, ind = np.unique(meta_keys, return_index=True)
    meta_recids = meta_recids[ind]
    # Find the files that give these unique coord records.
    file_ids = sorted(set(self._headers['file_id'][meta_recids]))
    filenames = [self._files[f] for f in file_ids]
    if len(filenames) > 500:
      error(_("Holy crap, how many coordinates do you have???"))
    # If no coordinates found, just open the first file as a dummy file.
    # Less error-prone than checking if _meta_funit is defined every time
    # an FSTD function is called.
    if len(filenames) == 0:
      filenames = self._files[0:1]
    # Open these files and link them together
    self._meta_funit = fstopenall(filenames, FST_RO)
Exemple #22
0
  def _makevars_pandas (self):
    from collections import OrderedDict
    import numpy as np
    import pandas as pd
    import warnings

    nrecs = len(self._headers)

    # Degenerate case: no data in buffer
    if nrecs == 0: return

    # Convert records to a pandas DataFrame, which is faster to operate on.
    records = pd.DataFrame.from_records(self._headers)
    # Keep track of original dtypes (may need to re-cast later).
    original_dtypes = dict(self._headers_dtype)

    # Ignore deleted / invalidated records.
    records = records[records['dltf']==0]

    # Keep track of any axes that were generated.
    known_axes = dict()

    # Keep track of any auxiliary coordinates that were generated.
    known_coords = dict()

    # Iterate over each variable.
    # Variables are defined by the entries in _var_id.
    self._varlist = []
    for var_id, var_records in records.groupby(list(self._var_id)):
      var_id = OrderedDict(zip(self._var_id, var_id))
      nomvar = var_id['nomvar'].strip()
      nomvar = str(nomvar.decode()) # Python3: convert bytes to str.
      # Ignore meta records.
      if nomvar in self._meta_records: continue

      # Get the attributes, axes, and corresponding indices of each record.
      atts = OrderedDict()
      axes = OrderedDict()
      indices = OrderedDict()
      coordnames = []
      coord_axes = OrderedDict()
      coord_indices = OrderedDict()
      for n in records.columns:
        if n in self._ignore_atts: continue
        # Ignore columns which are masked out.
        # https://stackoverflow.com/questions/29530232/python-pandas-check-if-any-value-is-nan-in-dataframe
        if var_records[n].isnull().values.any(): continue
        # Get the unique values, in order.
        # Coerce back to original dtype, since masked columns get upcasted to
        # float64 in pandas.DataFrame.from_records.
        try:
          column = var_records[n].astype(original_dtypes[n])
        except TypeError:
          # Some types may not be re-castable.
          # For instance, pandas < 0.23 can't convert between datetime64 with
          # different increments ([ns] and [s]).
          column = var_records[n]
        cat = pd.Categorical(column)
        # Is this column an outer axis?
        if n in self._outer_axes:
          values = tuple(cat.categories)
          if (n,values) not in known_axes:
            known_axes[(n,values)] = _axis_type(name = n, atts = OrderedDict(),
                                   array = np.array(values,dtype=column.dtype))
          axes[n] = known_axes[(n,values)]
          indices[n] = cat.codes
          # Is this also an axis for an auxiliary coordinate?
          for coordname,coordaxes in self._outer_coords.items():
            if n in coordaxes:
              coordnames.append(coordname)
              coord_axes.setdefault(coordname,OrderedDict())[n] = axes[n]
              coord_indices.setdefault(coordname,OrderedDict())[n] = cat.codes
        # Otherwise, does it have a consistent value?
        # If so, can add it to the metadata.
        # Ignore outer coords, since the value is already encoded elsewhere.
        elif len(cat.categories) == 1 and n not in self._outer_coords:
          try:
            v = cat[0]
            # Python3: convert bytes to str.
            if isinstance(v,bytes): v = str(v.decode())
            # Trim string attributes (remove whitespace padding).
            if isinstance(v,str): v = v.strip()
            # Use regular integers for numeric types.
            elif np.can_cast(v,int):
              v = int(v)
            atts[n] = v
          except (TypeError,ValueError):
            pass

      # Recover the proper order for the outer axes.
      # Not necessarily the order of the header table columns.
      axes = OrderedDict((n,axes[n]) for n in self._outer_axes if n in axes)
      indices = tuple([indices[n] for n in self._outer_axes if n in indices])
      for coordname in coord_axes.keys():
        coord_axes[coordname] = OrderedDict((n,coord_axes[coordname][n]) for n in self._outer_axes if n in coord_axes[coordname])
        coord_indices[coordname] = [coord_indices[coordname][n] for n in self._outer_axes if n in coord_indices[coordname]]

      # Construct a multidimensional array to hold the record keys.
      record_id = np.empty(list(map(len,axes.values())), dtype='int32')

      # Assume missing data (nan) unless filled in later.
      record_id[()] = -1

      # Arrange the record keys in the appropriate locations.
      record_id[indices] = var_records.index

      # Get the auxiliary coordinates.
      coords = []
      for n in coordnames:
        # Ignore auxiliary coordinates which are masked out.
        if var_records[n].isnull().values.any(): continue
        # Unique key for this coordinate
        key = (n,tuple(coord_axes[n].items()))
        # Arrange the coordinate values in the appropriate location.
        shape = list(map(len,list(coord_axes[n].values())))
        values = np.zeros(shape,dtype=var_records[n].dtype)
        indices = tuple(coord_indices[n])
        values[indices] = var_records[n]
        if key not in known_coords:
          coord = _var_type (name = n, atts = OrderedDict(),
                           axes = list(coord_axes[n].values()),
                           array = values )
          known_coords[key] = coord
        coords.append(known_coords[key])
      if len(coords) > 0:
        atts['coordinates'] = coords



      # Check if we have full coverage along all axes.
      have_data = [k >= 0 for k in record_id.flatten()]
      if not np.all(have_data):
        warn (_("Missing some records for %s.")%nomvar)

      # Add dummy axes for the ni,nj,nk record dimensions.
      axes['k'] = _dim_type('k',int(var_id['nk']))
      axes['j'] = _dim_type('j',int(var_id['nj']))
      axes['i'] = _dim_type('i',int(var_id['ni']))

      # Determine the optimal data type to use.
      # First, find unique combos of datyp, nbits
      # (want to minimize calls to dtype_fst2numpy).
      x = var_records[['datyp','nbits']].drop_duplicates()
      datyp = map(int,x['datyp'])
      nbits = map(int,x['nbits'])
      dtype_list = map(dtype_fst2numpy, datyp, nbits)
      dtype = np.result_type(*dtype_list)

      var = _iter_type( name = nomvar, atts = atts,
                        axes = list(axes.values()),
                        dtype = dtype,
                        record_id = record_id )
      self._varlist.append(var)
Exemple #23
0
  def _makevars_slow (self):
    from collections import OrderedDict
    import numpy as np
    import warnings

    nrecs = len(self._headers)

    # Degenerate case: no data in buffer
    if nrecs == 0: return

    records = self._headers

    # Ignore deleted / invalidated records.
    deleted = (records['dltf'] == 1)
    if np.any(deleted):
      records = records[~deleted]
    header_indices = np.where(~deleted)[0]

    # Determine the variable identifiers.
    # First, extract the uniquely identifying information from the metadata.
    # Suppress FutureWarning from numpy about doing this.  Probably benign...
    with warnings.catch_warnings():
      warnings.simplefilter("ignore")
      all_var_ids = records.data[list(self._var_id)]

    # Do a pre-processing step to remove ids that are identical to the one
    # immediately before it.
    # This is purely an optimization thing - the np.unique call later on is
    # O(n log n) so want to prune this array as much as possible beforehand.
    # TODO: Update this once numpy has an unsorted "unique"-like function that
    # can run more efficiently when there are relatively few unique elements.
    # (could get ~O(n) with a hash table).
    var_ids = all_var_ids
    flag = np.concatenate(([True], var_ids[1:] != var_ids[:-1]))
    var_ids = var_ids[flag]

    # Now, find the unique var_ids from this pruned list.
    var_ids = np.unique(var_ids)

    # Keep track of axes that were generated
    known_axes = dict()

    # Keep track of any auxiliary coordinates that were generated.
    known_coords = dict()

    # Loop over each variable and construct the data & metadata.
    self._varlist = []
    for var_id in var_ids:
      selection = (all_var_ids == var_id)
      var_records = records[selection]
      var_record_indices = np.where(selection)[0]
      nomvar = var_id['nomvar'].strip()
      nomvar = str(nomvar.decode()) # Python3: convert bytes to str.
      # Ignore meta records.
      if nomvar in self._meta_records: continue

      # Get the metadata for each record.
      atts = OrderedDict()
      for n in records.dtype.names:
        if n in self._outer_axes or n in self._outer_coords or n in self._ignore_atts:
          continue
        v = var_records[n]
        # Remove missing values before continuing.
        v = np.ma.compressed(v)
        if len(v) == 0: continue
        # Only use attributes that are consistent across all variable records.
        if len(set(v)) > 1: continue
        v = v[0]
        # Use regular integers for numeric types.
        if np.can_cast(v.dtype,int):
          v = int(v)
        # Python3: convert bytes to str.
        if isinstance(v,bytes): v = str(v.decode())
        # Trim string attributes (remove whitespace padding).
        if isinstance(v,str): v = v.strip()
        atts[n] = v

      # Get the axes.
      axes = OrderedDict()
      for n in self._outer_axes:
        values = var_records[n]
        # Remove missing values before continuing.
        values = np.ma.compressed(values)
        # Ignore axes that have no actual coordinate values.
        if len(values) == 0: continue
        # Get all unique values (sorted).
        values = tuple(sorted(set(values)))
        if (n,values) not in known_axes:
          known_axes[(n,values)] = _axis_type(name = n, atts = OrderedDict(),
                                              array = np.array(values))
        axes[n] = known_axes[(n,values)]

      # Construct a multidimensional array to hold the record keys.
      record_id = np.empty(list(map(len,axes.values())), dtype='int32')

      # Assume missing data (nan) unless filled in later.
      record_id[()] = -1

      # Arrange the record keys in the appropriate locations.
      indices = []
      for n in axes.keys():
        u, ind = np.unique(var_records[n], return_inverse=True)
        indices.append(ind)
      record_id[tuple(indices)] = header_indices[var_record_indices]

      # Get the auxiliary coordinates.
      coords = []
      for n, coordaxes in self._outer_coords.items():
        # Get the axes for this coordinate.
        # Use the same order of columns as was used for the outer axes,
        # so we get the right coordinate order after sorting.
        coordaxes = OrderedDict((k,v) for k,v in axes.items() if k in coordaxes)
        # Sanity check - do we actually have any of the coordinate axes?
        if len(coordaxes) == 0: continue
        # Unique key for this coordinate
        key = (n,tuple(coordaxes.items()))
        # Arrange the coordinate values in the appropriate location.
        shape = list(map(len,list(coordaxes.values())))
        # Extract all values of the coordinate (including duplicates over
        # other axes).  Will determine which values to put in what order later.
        all_coord_values = np.ma.compressed(var_records[n])
        if len(all_coord_values) == 0: continue
        values = np.zeros(shape,dtype=all_coord_values.dtype)
        indices = []
        for k in coordaxes.keys():
          u, ind = np.unique(var_records[k], return_inverse=True)
          indices.append(ind)
        values[tuple(indices)] = all_coord_values
        if key not in known_coords:
          coord = _var_type (name = n, atts = OrderedDict(),
                           axes = list(coordaxes.values()), array = values )
          known_coords[key] = coord
        coords.append(known_coords[key])
      if len(coords) > 0:
        atts['coordinates'] = coords

      # Check if we have full coverage along all axes.
      have_data = [k >= 0 for k in record_id.flatten()]
      if not np.all(have_data):
        warn (_("Missing some records for %s.")%nomvar)

      # Add dummy axes for the ni,nj,nk record dimensions.
      axes['k'] = _dim_type(name='k', length = int(var_id['nk']))
      axes['j'] = _dim_type(name='j', length = int(var_id['nj']))
      axes['i'] = _dim_type(name='i', length = int(var_id['ni']))

      # Determine the optimal data type to use.
      # First, find unique combos of datyp, nbits
      # (want to minimize calls to dtype_fst2numpy).
      datyp, nbits = zip(*np.unique(var_records.data[['datyp','nbits']]))
      datyp = map(int,datyp)
      nbits = map(int,nbits)
      dtype_list = map(dtype_fst2numpy, datyp, nbits)
      dtype = np.result_type(*dtype_list)

      var = _iter_type( name = nomvar, atts = atts,
                        axes = list(axes.values()),
                        dtype = dtype,
                        record_id = record_id )
      self._varlist.append(var)
Exemple #24
0
def dtype_fst2numpy (datyp, nbits=None):
  from rpnpy.librmn.fstd98 import dtype_fst2numpy
  if datyp == 0:
    warn (_("Raw binary records detected.  The values may not be properly decoded if you're opening on a different platform."))
    datyp = 5
  return dtype_fst2numpy(datyp,nbits)