Exemple #1
0
def main(output_path, delta, ncml_file=None, glob_string=None, apply_to_members=None, hard_start=None, hard_end=None):
    if glob_string is not None:
        collection = Collection.from_glob(glob_string, ncml=ncml_file)
    elif ncml_file is not None:
        collection = Collection.from_ncml_file(ncml_file, apply_to_members=apply_to_members)

    if delta.years > 0:
        starting = collection.aggregation.starting.replace(microsecond=0, second=0, minute=0, hour=0, day=1, month=1)
    elif delta.months > 0:
        starting = collection.aggregation.starting.replace(microsecond=0, second=0, minute=0, hour=0, day=1)
    elif delta.days > 0:
        starting = collection.aggregation.starting.replace(microsecond=0, second=0, minute=0, hour=0)

    windows = collection.bins(delta=delta, starting=starting, hard_start=hard_start, hard_end=hard_end)

    # Create output directory
    if not os.path.exists(output_path):
        os.makedirs(output_path)

    for i, window in enumerate(windows):
        # Create a file name
        starting = window.starting.strftime("%Y%m%dT%H%M")
        ending   = window.ending.strftime("%Y%m%dT%H%M")
        if starting == ending:
            file_name = "{0}.nc".format(starting)
        else:
            file_name = "{0}_TO_{1}.nc".format(starting, ending)
        output_file = os.path.join(output_path, file_name)

        pyaxiomlogger.info("Combining ({0}/{1}) - {2} files into {3}".format(i+1, len(windows), len(window.members), output_file))
        Collection.combine(members=window.members, output_file=output_file)

    return 0
Exemple #2
0
    def from_string(urn_string):
        complete = urn_string.split('#')
        extras = ''
        if len(complete) > 1:
            extras = '#{0}'.format(complete[1])
        parts = complete[0].split(':')

        if len(parts) < 5:
            return IoosUrn()
        urn            = IoosUrn()
        urn.asset_type = parts[2]
        urn.authority  = parts[3]
        urn.label      = parts[4]
        if len(parts) > 5:
            if urn.asset_type == 'station':
                urn.version = parts[5]
            elif len(parts) > 6:
                # Also a verion specified, so this has to be the component
                urn.component = parts[5] + extras
            else:
                logger.info("Assuming that {0} is the 'component' piece of the URN (not the 'version')".format(parts[5] + extras))
                urn.component = parts[5] + extras
        if len(parts) > 6:
            urn.version = parts[6]
        if len(parts) > 7:
            pass
            logger.warning("The URN is too long stripping off '{}'".format(':'.join(parts[7:])))
        return urn
Exemple #3
0
def main(output_path,
         delta,
         ncml_file=None,
         glob_string=None,
         apply_to_members=None,
         hard_start=None,
         hard_end=None):
    if glob_string is not None:
        collection = Collection.from_glob(glob_string, ncml=ncml_file)
    elif ncml_file is not None:
        collection = Collection.from_ncml_file(
            ncml_file, apply_to_members=apply_to_members)

    if delta.years > 0:
        starting = collection.aggregation.starting.replace(microsecond=0,
                                                           second=0,
                                                           minute=0,
                                                           hour=0,
                                                           day=1,
                                                           month=1)
    elif delta.months > 0:
        starting = collection.aggregation.starting.replace(microsecond=0,
                                                           second=0,
                                                           minute=0,
                                                           hour=0,
                                                           day=1)
    elif delta.days > 0:
        starting = collection.aggregation.starting.replace(microsecond=0,
                                                           second=0,
                                                           minute=0,
                                                           hour=0)

    windows = collection.bins(delta=delta,
                              starting=starting,
                              hard_start=hard_start,
                              hard_end=hard_end)

    # Create output directory
    if not os.path.exists(output_path):
        os.makedirs(output_path)

    for i, window in enumerate(windows):
        # Create a file name
        starting = window.starting.strftime("%Y%m%dT%H%M")
        ending = window.ending.strftime("%Y%m%dT%H%M")
        if starting == ending:
            file_name = "{0}.nc".format(starting)
        else:
            file_name = "{0}_TO_{1}.nc".format(starting, ending)
        output_file = os.path.join(output_path, file_name)

        pyaxiomlogger.info("Combining ({0}/{1}) - {2} files into {3}".format(
            i + 1, len(windows), len(window.members), output_file))
        Collection.combine(members=window.members, output_file=output_file)

    return 0
Exemple #4
0
def test_is_mine(klass, fp):
    dsg = CFDataset.load(fp)
    assert dsg.__class__ == klass

    allsubs = list(all_subclasses(CFDataset))
    subs = [s for s in allsubs if s != klass]
    dsg = CFDataset(fp)
    logger.info('\nTesting {}'.format(klass.__name__))
    assert klass.is_mine(dsg) is True
    for s in subs:
        if hasattr(s, 'is_mine'):
            logger.info('  * Trying {}...'.format(s.__name__))
            assert s.is_mine(dsg) is False
    dsg.close()
Exemple #5
0
    def from_glob(cls, glob_string, timevar_name='time', ncml=None):
        dataset_name      = None
        dataset_starting  = None
        dataset_ending    = None
        dataset_variables = []
        dataset_members   = []

        files = glob(glob_string)
        logger.info("Processing aggregation containing {!s} files".format(len(files)))
        for i, filepath in enumerate(files):
            logger.info("Processing member ({0}/{1}) - {2} ".format(i+1, len(files), filepath))
            nc = None
            try:
                if ncml is not None:
                    # Apply NcML
                    tmp_f, tmp_fp = tempfile.mkstemp(prefix="nc")
                    os.close(tmp_f)
                    nc = pyncml.apply(filepath, ncml, output_file=tmp_fp)
                else:
                    nc = netCDF4.Dataset(filepath)

                if dataset_name is None:
                    if hasattr(nc, 'name'):
                        dataset_name = nc.name
                    elif hasattr(nc, 'title'):
                        dataset_name = nc.title
                    else:
                        dataset_name = "Pyaxiom Glob Dataset"

                timevar = nc.variables.get(timevar_name)
                if timevar is None:
                    logger.error("Time variable '{0}' was not found in file '{1}'. Skipping.".format(timevar_name, filepath))
                    continue

                # Start/Stop of NetCDF file
                starting  = netCDF4.num2date(np.min(timevar[:]), units=timevar.units)
                ending    = netCDF4.num2date(np.max(timevar[:]), units=timevar.units)
                variables = filter(None, [ nc.variables[v].standard_name if hasattr(nc.variables[v], 'standard_name') else None for v in nc.variables.keys() ])

                dataset_variables = list(set(dataset_variables + variables))

                if starting.tzinfo is None:
                    starting = starting.replace(tzinfo=pytz.utc)
                if ending.tzinfo is None:
                    ending = ending.replace(tzinfo=pytz.utc)
                if dataset_starting is None or starting < dataset_starting:
                    dataset_starting = starting
                if dataset_ending is None or ending > dataset_ending:
                    dataset_ending = ending

                member = DotDict(path=filepath, standard_names=variables, starting=starting, ending=ending)
                dataset_members.append(member)
            except BaseException:
                logger.exception("Something went wrong with {0}".format(filepath))
                continue
            finally:
                nc.close()
                try:
                    os.remove(tmp_fp)
                except (OSError, UnboundLocalError):
                    pass

        dataset_members = sorted(dataset_members, key=operator.attrgetter('starting'))
        return cls(DotDict(name=dataset_name,
                           timevar_name=timevar_name,
                           starting=dataset_starting,
                           ending=dataset_ending,
                           standard_names=dataset_variables,
                           members=dataset_members))
Exemple #6
0
    def from_glob(cls, glob_string, timevar_name='time', ncml=None):
        dataset_name      = None
        dataset_starting  = None
        dataset_ending    = None
        dataset_variables = []
        dataset_members   = []

        files = glob(glob_string)
        logger.info("Processing aggregation containing {!s} files".format(len(files)))
        for i, filepath in enumerate(files):
            logger.info("Processing member ({0}/{1}) - {2} ".format(i+1, len(files), filepath))
            nc = None
            try:
                if ncml is not None:
                    # Apply NcML
                    tmp_f, tmp_fp = tempfile.mkstemp(prefix="nc")
                    os.close(tmp_f)
                    nc = pyncml.apply(filepath, ncml, output_file=tmp_fp)
                else:
                    nc = netCDF4.Dataset(filepath)

                if dataset_name is None:
                    if 'name' in nc.ncattrs():
                        dataset_name = nc.name
                    elif 'title' in nc.ncattrs():
                        dataset_name = nc.title
                    else:
                        dataset_name = "Pyaxiom Glob Dataset"

                timevar = nc.variables.get(timevar_name)
                if timevar is None:
                    logger.error("Time variable '{0}' was not found in file '{1}'. Skipping.".format(timevar_name, filepath))
                    continue

                # Start/Stop of NetCDF file
                starting  = netCDF4.num2date(np.min(timevar[:]), units=timevar.units)
                ending    = netCDF4.num2date(np.max(timevar[:]), units=timevar.units)
                variables = list([_f for _f in [ nc.variables[v].standard_name if hasattr(nc.variables[v], 'standard_name') else None for v in list(nc.variables.keys()) ] if _f])

                dataset_variables = list(set(dataset_variables + variables))

                if starting.tzinfo is None:
                    starting = starting.replace(tzinfo=pytz.utc)
                if ending.tzinfo is None:
                    ending = ending.replace(tzinfo=pytz.utc)
                if dataset_starting is None or starting < dataset_starting:
                    dataset_starting = starting
                if dataset_ending is None or ending > dataset_ending:
                    dataset_ending = ending

                member = DotDict(path=filepath, standard_names=variables, starting=starting, ending=ending)
                dataset_members.append(member)
            except BaseException:
                logger.exception("Something went wrong with {0}".format(filepath))
                continue
            finally:
                nc.close()
                try:
                    os.remove(tmp_fp)
                except (OSError, UnboundLocalError):
                    pass

        dataset_members = sorted(dataset_members, key=operator.attrgetter('starting'))
        return cls(DotDict(name=dataset_name,
                           timevar_name=timevar_name,
                           starting=dataset_starting,
                           ending=dataset_ending,
                           standard_names=dataset_variables,
                           members=dataset_members))
Exemple #7
0
    def __init__(self, output_directory, latitude, longitude, station_name, global_attributes, times=None, verticals=None, vertical_fill=None, output_filename=None, vertical_axis_name=None, vertical_positive=None):
        if output_filename is None:
            output_filename = '{}_{}.nc'.format(station_name, int(random.random() * 100000))
            logger.info("No output filename specified, saving as {}".format(output_filename))

        self.vertical_positive  = vertical_positive or 'down'
        self.vertical_axis_name = vertical_axis_name or 'z'
        self.time_axis_name     = 'time'

        # Make directory
        if not os.path.exists(output_directory):
            os.makedirs(output_directory)

        self.time = None

        self.out_file = os.path.abspath(os.path.join(output_directory, output_filename))
        if os.path.isfile(self.out_file):
            os.remove(self.out_file)

        with EnhancedDataset(self.out_file, 'w') as nc:
            # Global attributes
            # These are set by this script, we don't someone to be able to set them manually
            global_skips = ["time_coverage_start", "time_coverage_end", "time_coverage_duration", "time_coverage_resolution",
                            "featureType", "geospatial_vertical_positive", "geospatial_vertical_min", "geospatial_vertical_max",
                            "geospatial_lat_min", "geospatial_lon_min", "geospatial_lat_max", "geospatial_lon_max", "geospatial_bounds"
                            "geospatial_vertical_resolution", "geospatial_lat_resolution", "geospatial_lon_resolution",
                            "Conventions", "date_created", "date_modified", "date_issued"]
            for k, v in global_attributes.items():
                if v is None:
                    v = "None"
                if k not in global_skips:
                    nc.setncattr(k, v)

            now_date = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:00Z")
            nc.setncattr("Conventions", "CF-1.6,ACDD-1.3")
            nc.setncattr("date_created", now_date)
            nc.setncattr("date_modified", now_date)
            nc.setncattr("date_issued", now_date)
            if not hasattr(nc, "date_metadata_modified"):
                nc.setncattr("date_metadata_modified", now_date)

            # Allow the customization of this attribute
            if 'cdm_data_type' not in global_attributes:
                nc.setncattr('cdm_data_type', 'Station')

            old_history = getattr(nc, 'history', '')
            new_history = '{} - {} - {}'.format(now_date, 'pyaxiom', 'File created using pyaxiom')
            if old_history:
                nc.setncattr('history', '{}\n{}'.format(old_history, new_history))
            else:
                nc.setncattr('history', new_history)

            # Station name
            nc.createDimension("feature_type_instance", len(station_name))
            name = nc.createVariable("feature_type_instance", "S1", ("feature_type_instance",))
            name.cf_role = "timeseries_id"
            name.long_name = "Identifier for each feature type instance"
            name[:] = list(station_name)

            # Location
            lat = nc.createVariable("latitude", get_type(latitude))
            lat.units           = "degrees_north"
            lat.standard_name   = "latitude"
            lat.long_name       = "sensor latitude"
            lat.axis            = "Y"
            lat.valid_min       = latitude
            lat.valid_max       = latitude
            lat[:] = latitude
            nc.setncattr("geospatial_lat_min", latitude)
            nc.setncattr("geospatial_lat_max", latitude)
            nc.setncattr("geospatial_lat_resolution", 0)
            nc.setncattr("geospatial_lat_units", "degrees_north")

            lon = nc.createVariable("longitude", get_type(longitude))
            lon.units           = "degrees_east"
            lon.standard_name   = "longitude"
            lon.long_name       = "sensor longitude"
            lon.axis            = "X"
            lon.valid_min       = longitude
            lon.valid_max       = longitude
            lon[:] = longitude
            nc.setncattr("geospatial_lon_min", longitude)
            nc.setncattr("geospatial_lon_max", longitude)
            nc.setncattr("geospatial_lon_resolution", 0)
            nc.setncattr("geospatial_lon_units", "degrees_east")

            nc.setncattr("geospatial_bounds", "POINT({} {})".format(longitude, latitude))
            if not hasattr(nc, "geospatial_bounds_crs"):
                nc.setncattr("geospatial_bounds_crs", "EPSG:4326")

            # Metadata variables
            self.crs = nc.createVariable("crs", "i4")
            self.crs.long_name           = "http://www.opengis.net/def/crs/EPSG/0/4326"
            self.crs.grid_mapping_name   = "latitude_longitude"
            self.crs.epsg_code           = "EPSG:4326"
            self.crs.semi_major_axis     = float(6378137.0)
            self.crs.inverse_flattening  = float(298.257223563)

            platform = nc.createVariable("platform", "i4")
            platform.definition = "http://mmisw.org/ont/ioos/definition/stationID"

            urn = IoosUrn.from_string(station_name)
            if urn.valid() is True:
                platform.short_name = global_attributes.get("title", urn.label)
                platform.long_name = global_attributes.get('summary', 'Station {}'.format(urn.label))
                platform.ioos_code = urn.urn
            else:
                platform.short_name = global_attributes.get("title", station_name)
                platform.long_name = global_attributes.get("summary", station_name)
                platform.ioos_code = station_name

            if vertical_fill is None:
                vertical_fill = -9999.9
            self.vertical_fill = vertical_fill

        self._nc = EnhancedDataset(self.out_file, 'a')
        self.setup_times_and_verticals(times, verticals)
        logger.info("Created file at '{}'".format(self.out_file))
Exemple #8
0
    def add_variable(self, variable_name, values, times=None, verticals=None, sensor_vertical_datum=None, attributes=None, unlink_from_profile=None, fillvalue=None, raise_on_error=False, create_instrument_variable=False):

        if isinstance(values, (list, tuple,)) and values:
            values = np.asarray(values)
        if get_type(values) == np.int64:
            # Create values as int32 because DAP does not support int64 until DAP4.
            values = values.astype(np.int32)

        if isinstance(times, (list, tuple,)) and times:
            times = np.asarray(times)
        if get_type(times) == np.int64:
            # Create time as int32 because DAP does not support int64 until DAP4.
            times = times.astype(np.int32)

        if isinstance(verticals, (list, tuple,)) and verticals:
            verticals = np.asarray(verticals)
        if get_type(verticals) == np.int64:
            # Create verticals as int32 because DAP does not support int64 until DAP4.
            verticals = verticals.astype(np.int32)

        # Set vertical datum on the CRS variable
        if sensor_vertical_datum is not None:
            try:
                self.crs.geoid_name = sensor_vertical_datum
                self.crs.vertical_datum = sensor_vertical_datum
                self.crs.water_surface_reference_datum = sensor_vertical_datum
                if not hasattr(self._nc, "geospatial_bounds_vertical_crs"):
                    self._nc.setncattr("geospatial_bounds_vertical_crs", sensor_vertical_datum)
            except AttributeError:
                pass

        # Set default fillvalue for new variables
        if fillvalue is None:
            fillvalue = -9999.9
        fillvalue = values.dtype.type(fillvalue)

        used_values = None

        vertical_axis = self._nc.variables.get(self.vertical_axis_name)
        try:
            if unlink_from_profile is True:
                used_values = np.ma.reshape(values, (self.time.size, ))
                used_values = used_values[self.time_indexes]
            # These next two cases should work for all but a few cases, which are caught below
            elif vertical_axis.size == 1:
                used_values = np.ma.reshape(values, (self.time.size, ))
                used_values = used_values[self.time_indexes]
            else:
                used_values = np.ma.reshape(values, (self.time.size, vertical_axis.size, ))
                used_values = used_values[self.time_indexes]
                try:
                    used_values = used_values[:, self.vertical_indexes]
                except IndexError:
                    # The vertical values most likely had duplicates.  Ignore the
                    # falty index here and try to save the values as is.
                    pass
        except ValueError:
            if raise_on_error is True:
                raise
            else:
                logger.warning("Could not do a simple reshape of data, trying to match manually! Time:{!s}, Heights:{!s}, Values:{!s}".format(self.time.size, vertical_axis.size, values.size))
            if vertical_axis.size > 1:
                if times is not None and verticals is not None:
                    # Hmmm, we have two actual height values for this station.
                    # Not cool man, not cool.
                    # Reindex the entire values array.  This is slow.
                    indexed = ((bisect.bisect_left(self.time[:], times[i]), bisect.bisect_left(vertical_axis[:], verticals[i]), values[i]) for i in range(values.size))
                    used_values = np.ndarray((self.time.size, vertical_axis.size, ), dtype=get_type(values))
                    used_values.fill(fillvalue)
                    for (tzi, zzi, vz) in indexed:
                        if zzi < vertical_axis.size and tzi < self.time.size:
                            used_values[tzi, zzi] = vz
                    del indexed
                else:
                    raise ValueError("You need to pass in both 'times' and 'verticals' parameters that matches the size of the 'values' parameter.")
            else:
                if times is not None:
                    # Ugh, find the time indexes manually
                    indexed = ((bisect.bisect_left(self.time[:], times[i]), values[i]) for i in range(values.size))
                    used_values = np.ndarray((self.time.size, ), dtype=get_type(values))
                    used_values.fill(fillvalue)
                    for (tzi, vz) in indexed:
                        if tzi < self.time.size:
                            used_values[tzi] = vz
                    del indexed
                else:
                    raise ValueError("You need to pass in a 'times' parameter that matches the size of the 'values' parameter.")

        logger.info("Setting values for {}...".format(variable_name))
        if len(used_values.shape) == 1:
            var = self._nc.createVariable(variable_name, get_type(used_values), ("time",), fill_value=fillvalue, chunksizes=(self.time_chunk,), zlib=True)
            self._nc.setncattr('ncei_template_version', 'NCEI_NetCDF_TimeSeries_Orthogonal_Template_v2.0')
            if vertical_axis.size == 1:
                var.coordinates = "{} {} latitude longitude".format(self.time_axis_name, self.vertical_axis_name)
            else:
                # This is probably a bottom sensor on an ADCP or something, don't add the height coordinate
                var.coordinates = "{} latitude longitude".format(self.time_axis_name)
                if unlink_from_profile is True:
                    # Create metadata variable for the sensor_depth
                    if verticals is not None and self._nc.variables.get('sensor_depth') is None:
                        logger.info("Setting the special case 'sensor_depth' metadata variable")
                        inst_depth = self._nc.createVariable('sensor_depth', get_type(verticals))
                        inst_depth.units = 'm'
                        inst_depth.standard_name = 'surface_altitude'
                        inst_depth.positive = self.vertical_positive
                        if self.vertical_positive.lower() == 'down':
                            inst_depth.long_name = 'sensor depth below datum'
                        elif self.vertical_positive.lower() == 'up':
                            inst_depth.long_name = 'sensor height above datum'
                        inst_depth.datum = sensor_vertical_datum or 'Unknown'
                        if verticals and verticals.size > 0:
                            inst_depth[:] = verticals[0]
                        else:
                            inst_depth[:] = self.vertical_fill

        elif len(used_values.shape) == 2:
            var = self._nc.createVariable(variable_name, get_type(used_values), ("time", "z",), fill_value=fillvalue, chunksizes=(self.time_chunk, vertical_axis.size,), zlib=True)
            var.coordinates = "{} {} latitude longitude".format(self.time_axis_name, self.vertical_axis_name)
            self._nc.setncattr('ncei_template_version', 'NCEI_NetCDF_TimeSeriesProfile_Orthogonal_Template_v2.0')
        else:
            raise ValueError("Could not create variable.  Shape of data is {!s}.  Expected a dimension of 1 or 2, not {!s}.".format(used_values.shape, len(used_values.shape)))

        # Set missing_value as well
        attributes = attributes or {}
        attributes['missing_value'] = fillvalue
        # Set the variable attributes as passed in
        if attributes:
            for k, v in attributes.items():

                if k == 'vertical_datum' and sensor_vertical_datum is None and v is not None:
                    # Use this as the vertical datum if it is specified and we didn't already have one
                    try:
                        self.crs.geoid_name = v
                        self.crs.vertical_datum = v
                        self.crs.water_surface_reference_datum = v
                        if not hasattr(self._nc, "geospatial_bounds_vertical_crs"):
                            self._nc.setncattr("geospatial_bounds_vertical_crs", v)
                    except AttributeError:
                        pass

                if k not in ['name', 'coordinates', '_FillValue'] and v is not None:
                    try:
                        var.setncattr(k, v)
                    except BaseException:
                        logger.info('Could not add attribute {}: {}, skipping.'.format(k, v))

        # Add a long name if it doesn't exist
        if not hasattr(var, 'long_name'):
            varunits = getattr(var, 'units', None)
            vartitle = getattr(var, 'standard_name', getattr(var, 'name'))
            vartitle = vartitle.title().replace('_', ' ')
            if varunits is not None:
                vartitle = '{} ({})'.format(vartitle, varunits)
            var.long_name = vartitle
        var.grid_mapping = 'crs'
        var.platform = 'platform'
        var.ancillary_variables = 'platform'
        var.coverage_content_type = 'physicalMeasurement'
        var[:] = used_values

        if create_instrument_variable is True:
            self.add_instrument_variable(variable_name)

        self._nc.sync()
        del used_values
        return var
Exemple #9
0
    def __init__(self, output_directory, latitude, longitude, station_name, global_attributes, times=None, verticals=None, vertical_fill=None, output_filename=None, vertical_axis_name=None, vertical_positive=None):
        if output_filename is None:
            output_filename = '{}_{}.nc'.format(station_name, int(random.random()*100000))
            logger.info("No output filename specified, saving as {}".format(output_filename))

        self.vertical_positive  = vertical_positive or 'down'
        self.vertical_axis_name = vertical_axis_name or 'z'
        self.time_axis_name     = 'time'

        # Make directory
        if not os.path.exists(output_directory):
            os.makedirs(output_directory)

        out_file = os.path.abspath(os.path.join(output_directory, output_filename))
        self.nc = netCDF4.Dataset(out_file, 'w')
        self.time = None

        # Global attributes
        # These are set by this script, we don't someone to be able to set them manually
        global_skips = ["time_coverage_start", "time_coverage_end", "time_coverage_duration", "time_coverage_resolution",
                        "featureType", "geospatial_vertical_positive", "geospatial_vertical_min", "geospatial_vertical_max",
                        "geospatial_lat_min", "geospatial_lon_min", "geospatial_lat_max", "geospatial_lon_max",
                        "geospatial_vertical_resolution", "Conventions", "date_created"]
        for k, v in global_attributes.iteritems():
            if v is None:
                v = "None"
            if k not in global_skips:
                self.nc.setncattr(k, v)
        self.nc.setncattr("Conventions", "CF-1.6")
        self.nc.setncattr("date_created", datetime.utcnow().strftime("%Y-%m-%dT%H:%M:00Z"))

        # Station name
        self.nc.createDimension("feature_type_instance", len(station_name))
        name = self.nc.createVariable("feature_type_instance", "S1", ("feature_type_instance",))
        name.cf_role = "timeseries_id"
        name.long_name = "Identifier for each feature type instance"
        name[:] = list(station_name)

        # Location
        lat = self.nc.createVariable("latitude", "f8")
        lat.units           = "degrees_north"
        lat.standard_name   = "latitude"
        lat.long_name       = "sensor latitude"
        lat[:] = latitude
        self.nc.setncattr("geospatial_lat_min", latitude)
        self.nc.setncattr("geospatial_lat_max", latitude)

        lon = self.nc.createVariable("longitude", "f8")
        lon.units           = "degrees_east"
        lon.standard_name   = "longitude"
        lon.long_name       = "sensor longitude"
        lon[:] = longitude
        self.nc.setncattr("geospatial_lon_min", longitude)
        self.nc.setncattr("geospatial_lon_max", longitude)

        # Metadata variables
        self.crs = self.nc.createVariable("crs", "i4")
        self.crs.long_name           = "http://www.opengis.net/def/crs/EPSG/0/4326"
        self.crs.grid_mapping_name   = "latitude_longitude"
        self.crs.epsg_code           = "EPSG:4326"
        self.crs.semi_major_axis     = float(6378137.0)
        self.crs.inverse_flattening  = float(298.257223563)

        platform = self.nc.createVariable("platform", "i4")
        platform.ioos_code      = station_name
        platform.short_name     = global_attributes.get("title", station_name)
        platform.long_name      = global_attributes.get("description", station_name)
        platform.definition     = "http://mmisw.org/ont/ioos/definition/stationID"

        if vertical_fill is None:
            vertical_fill = -9999.9
        self.vertical_fill      = vertical_fill

        self.setup_times_and_verticals(times, verticals)
        logger.info("Created file at '{}'".format(out_file))
Exemple #10
0
    def add_variable(self, variable_name, values, times=None, verticals=None, sensor_vertical_datum=None, attributes=None, unlink_from_profile=None, fillvalue=None, raise_on_error=False):

        if isinstance(values, (list, tuple,)) and values:
            values = np.asarray(values)
        if isinstance(times, (list, tuple,)) and times:
            times = np.asarray(times)
        if isinstance(verticals, (list, tuple,)) and verticals:
            verticals = np.asarray(verticals)

        # Set vertical datum on the CRS variable
        if sensor_vertical_datum is not None:
            try:
                self.crs.geoid_name = sensor_vertical_datum
                self.crs.vertical_datum = sensor_vertical_datum
                self.crs.water_surface_reference_datum = sensor_vertical_datum
            except AttributeError:
                pass

        # Set default fillvalue for new variables
        if fillvalue is None:
            fillvalue = -9999.9

        used_values = None
        try:
            if unlink_from_profile is True:
                used_values = np.ma.reshape(values, (self.time.size, ))
                used_values = used_values[self.time_indexes]
            # These next two cases should work for all but a few cases, which are caught below
            elif self.z.size == 1:
                used_values = np.ma.reshape(values, (self.time.size, ))
                used_values = used_values[self.time_indexes]
            else:
                used_values = np.ma.reshape(values, (self.time.size, self.z.size, ))
                used_values = used_values[self.time_indexes]
                try:
                    used_values = used_values[:, self.vertical_indexes]
                except IndexError:
                    # The vertical values most likely had duplicates.  Ignore the
                    # falty index here and try to save the values as is.
                    pass
        except ValueError:
            if raise_on_error is True:
                self.close()
                raise
            else:
                logger.exception("Could not do a simple reshape of data, trying to match manually! Time:{!s}, Heights:{!s}, Values:{!s}".format(self.time.size, self.z.size, values.size))
            if self.z.size > 1:
                if times is not None and verticals is not None:
                    # Hmmm, we have two actual height values for this station.
                    # Not cool man, not cool.
                    # Reindex the entire values array.  This is slow.
                    indexed = ((bisect.bisect_left(self.time[:], times[i]), bisect.bisect_left(self.z[:], verticals[i]), values[i]) for i in xrange(values.size))
                    used_values = np.ndarray((self.time.size, self.z.size, ), dtype=np.float64)
                    used_values.fill(float(fillvalue))
                    for (tzi, zzi, vz) in indexed:
                        if zzi < self.z.size and tzi < self.time.size:
                            used_values[tzi, zzi] = vz
                else:
                    self.close()
                    raise ValueError("You need to pass in both 'times' and 'verticals' parameters that matches the size of the 'values' parameter.")
            else:
                if times is not None:
                    # Ugh, find the time indexes manually
                    indexed = ((bisect.bisect_left(self.time[:], times[i]), values[i]) for i in xrange(values.size))
                    used_values = np.ndarray((self.time.size, ), dtype=np.float64)
                    used_values.fill(float(fillvalue))
                    for (tzi, vz) in indexed:
                        if tzi < self.time.size:
                            used_values[tzi] = vz
                else:
                    self.close()
                    raise ValueError("You need to pass in a 'times' parameter that matches the size of the 'values' parameter.")

        logger.info("Setting values for {}...".format(variable_name))
        if len(used_values.shape) == 1:
            var = self.nc.createVariable(variable_name,    "f8", ("time",), fill_value=fillvalue, chunksizes=(1000,), zlib=True)
            if self.z.size == 1:
                var.coordinates = "{} {} latitude longitude".format(self.time_axis_name, self.vertical_axis_name)
            else:
                # This is probably a bottom sensor on an ADCP or something, don't add the height coordinate
                var.coordinates = "time latitude longitude"
                if unlink_from_profile is True:
                    # Create metadata variable for the sensor_depth
                    if self.nc.variables.get('sensor_depth') is None:
                        logger.info("Setting the special case 'sensor_depth' metadata variable")
                        inst_depth = self.nc.createVariable('sensor_depth', 'f4')
                        inst_depth.units = 'm'
                        inst_depth.standard_name = 'surface_altitude'
                        inst_depth.long_name = 'sensor depth below datum'
                        inst_depth.positive = self.vertical_positive
                        inst_depth.datum = sensor_vertical_datum or 'Unknown'
                        inst_depth[:] = verticals[0] * -1

        elif len(used_values.shape) == 2:
            var = self.nc.createVariable(variable_name,    "f8", ("time", "z",), fill_value=fillvalue, chunksizes=(1000, self.z.size,), zlib=True)
            var.coordinates = "{} {} latitude longitude".format(self.time_axis_name, self.vertical_axis_name)
        else:
            raise ValueError("Could not create variable.  Shape of data is {!s}.  Expected a dimension of 1 or 2, not {!s}.".format(used_values.shape, len(used_values.shape)))
        # Set the variable attributes as passed in
        if attributes:
            for k, v in attributes.iteritems():

                if k == 'vertical_datum' and sensor_vertical_datum is None and v is not None:
                    # Use this as the vertical datum if it is specified and we didn't already have one
                    try:
                        self.crs.geoid_name = v
                        self.crs.vertical_datum = v
                        self.crs.water_surface_reference_datum = v
                    except AttributeError:
                        pass

                if k != '_FillValue' and v is not None:
                    try:
                        setattr(var, k, v)
                    except BaseException:
                        logger.info('Could not add attribute {}: {}, skipping.'.format(k, v))

        var.grid_mapping = 'crs'
        var[:] = used_values

        return var
Exemple #11
0
    def add_variable(self,
                     variable_name,
                     values,
                     times=None,
                     verticals=None,
                     sensor_vertical_datum=None,
                     attributes=None,
                     unlink_from_profile=None,
                     fillvalue=None,
                     raise_on_error=False):

        if isinstance(values, (
                list,
                tuple,
        )) and values:
            values = np.asarray(values)
        if isinstance(times, (
                list,
                tuple,
        )) and times:
            times = np.asarray(times)
        if isinstance(verticals, (
                list,
                tuple,
        )) and verticals:
            verticals = np.asarray(verticals)

        # Set vertical datum on the CRS variable
        if sensor_vertical_datum is not None:
            try:
                self.crs.geoid_name = sensor_vertical_datum
                self.crs.vertical_datum = sensor_vertical_datum
                self.crs.water_surface_reference_datum = sensor_vertical_datum
            except AttributeError:
                pass

        # Set default fillvalue for new variables
        if fillvalue is None:
            fillvalue = -9999.9

        used_values = None
        try:
            if unlink_from_profile is True:
                used_values = np.ma.reshape(values, (self.time.size, ))
                used_values = used_values[self.time_indexes]
            # These next two cases should work for all but a few cases, which are caught below
            elif self.z.size == 1:
                used_values = np.ma.reshape(values, (self.time.size, ))
                used_values = used_values[self.time_indexes]
            else:
                used_values = np.ma.reshape(values, (
                    self.time.size,
                    self.z.size,
                ))
                used_values = used_values[self.time_indexes]
                try:
                    used_values = used_values[:, self.vertical_indexes]
                except IndexError:
                    # The vertical values most likely had duplicates.  Ignore the
                    # falty index here and try to save the values as is.
                    pass
        except ValueError:
            if raise_on_error is True:
                raise
            else:
                logger.warning(
                    "Could not do a simple reshape of data, trying to match manually! Time:{!s}, Heights:{!s}, Values:{!s}"
                    .format(self.time.size, self.z.size, values.size))
            if self.z.size > 1:
                if times is not None and verticals is not None:
                    # Hmmm, we have two actual height values for this station.
                    # Not cool man, not cool.
                    # Reindex the entire values array.  This is slow.
                    indexed = ((bisect.bisect_left(self.time[:], times[i]),
                                bisect.bisect_left(self.z[:],
                                                   verticals[i]), values[i])
                               for i in range(values.size))
                    used_values = np.ndarray((
                        self.time.size,
                        self.z.size,
                    ),
                                             dtype=values.dtype)
                    used_values.fill(float(fillvalue))
                    for (tzi, zzi, vz) in indexed:
                        if zzi < self.z.size and tzi < self.time.size:
                            used_values[tzi, zzi] = vz
                else:
                    raise ValueError(
                        "You need to pass in both 'times' and 'verticals' parameters that matches the size of the 'values' parameter."
                    )
            else:
                if times is not None:
                    # Ugh, find the time indexes manually
                    indexed = ((bisect.bisect_left(self.time[:],
                                                   times[i]), values[i])
                               for i in range(values.size))
                    used_values = np.ndarray((self.time.size, ),
                                             dtype=values.dtype)
                    used_values.fill(float(fillvalue))
                    for (tzi, vz) in indexed:
                        if tzi < self.time.size:
                            used_values[tzi] = vz
                else:
                    raise ValueError(
                        "You need to pass in a 'times' parameter that matches the size of the 'values' parameter."
                    )

        with EnhancedDataset(self.out_file, 'a') as nc:
            logger.info("Setting values for {}...".format(variable_name))
            if len(used_values.shape) == 1:
                var = nc.createVariable(variable_name,
                                        used_values.dtype, ("time", ),
                                        fill_value=fillvalue,
                                        chunksizes=(1000, ),
                                        zlib=True)
                if self.z.size == 1:
                    var.coordinates = "{} {} latitude longitude".format(
                        self.time_axis_name, self.vertical_axis_name)
                else:
                    # This is probably a bottom sensor on an ADCP or something, don't add the height coordinate
                    var.coordinates = "{} latitude longitude".format(
                        self.time_axis_name)
                    if unlink_from_profile is True:
                        # Create metadata variable for the sensor_depth
                        if nc.variables.get('sensor_depth') is None:
                            logger.info(
                                "Setting the special case 'sensor_depth' metadata variable"
                            )
                            inst_depth = nc.createVariable(
                                'sensor_depth', 'f4')
                            inst_depth.units = 'm'
                            inst_depth.standard_name = 'surface_altitude'
                            inst_depth.positive = self.vertical_positive
                            if self.vertical_positive.lower() == 'down':
                                inst_depth.long_name = 'sensor depth below datum'
                            elif self.vertical_positive.lower() == 'up':
                                inst_depth.long_name = 'sensor height above datum'
                            inst_depth.datum = sensor_vertical_datum or 'Unknown'
                            if verticals and verticals.size > 0:
                                inst_depth[:] = verticals[0]
                            else:
                                inst_depth[:] = self.vertical_fill

            elif len(used_values.shape) == 2:
                var = nc.createVariable(variable_name,
                                        used_values.dtype, (
                                            "time",
                                            "z",
                                        ),
                                        fill_value=fillvalue,
                                        chunksizes=(
                                            1000,
                                            self.z.size,
                                        ),
                                        zlib=True)
                var.coordinates = "{} {} latitude longitude".format(
                    self.time_axis_name, self.vertical_axis_name)
            else:
                raise ValueError(
                    "Could not create variable.  Shape of data is {!s}.  Expected a dimension of 1 or 2, not {!s}."
                    .format(used_values.shape, len(used_values.shape)))
            # Set the variable attributes as passed in
            if attributes:
                for k, v in attributes.items():

                    if k == 'vertical_datum' and sensor_vertical_datum is None and v is not None:
                        # Use this as the vertical datum if it is specified and we didn't already have one
                        try:
                            self.crs.geoid_name = v
                            self.crs.vertical_datum = v
                            self.crs.water_surface_reference_datum = v
                        except AttributeError:
                            pass

                    if k not in ['name', 'coordinates', '_FillValue'
                                 ] and v is not None:
                        try:
                            var.setncattr(k, v)
                        except BaseException:
                            logger.info(
                                'Could not add attribute {}: {}, skipping.'.
                                format(k, v))

            var.grid_mapping = 'crs'
            var[:] = used_values

            return var
Exemple #12
0
    def __init__(self,
                 output_directory,
                 latitude,
                 longitude,
                 station_name,
                 global_attributes,
                 times=None,
                 verticals=None,
                 vertical_fill=None,
                 output_filename=None,
                 vertical_axis_name=None,
                 vertical_positive=None):
        if output_filename is None:
            output_filename = '{}_{}.nc'.format(station_name,
                                                int(random.random() * 100000))
            logger.info("No output filename specified, saving as {}".format(
                output_filename))

        self.vertical_positive = vertical_positive or 'down'
        self.vertical_axis_name = vertical_axis_name or 'z'
        self.time_axis_name = 'time'

        # Make directory
        if not os.path.exists(output_directory):
            os.makedirs(output_directory)

        self.time = None

        self.out_file = os.path.abspath(
            os.path.join(output_directory, output_filename))
        if os.path.isfile(self.out_file):
            os.remove(self.out_file)

        with EnhancedDataset(self.out_file, 'w') as nc:
            # Global attributes
            # These are set by this script, we don't someone to be able to set them manually
            global_skips = [
                "time_coverage_start", "time_coverage_end",
                "time_coverage_duration", "time_coverage_resolution",
                "featureType", "geospatial_vertical_positive",
                "geospatial_vertical_min", "geospatial_vertical_max",
                "geospatial_lat_min", "geospatial_lon_min",
                "geospatial_lat_max", "geospatial_lon_max",
                "geospatial_vertical_resolution", "Conventions", "date_created"
            ]
            for k, v in global_attributes.items():
                if v is None:
                    v = "None"
                if k not in global_skips:
                    nc.setncattr(k, v)
            nc.setncattr("Conventions", "CF-1.6")
            nc.setncattr("date_created",
                         datetime.utcnow().strftime("%Y-%m-%dT%H:%M:00Z"))
            nc.setncattr("date_issued",
                         datetime.utcnow().strftime("%Y-%m-%dT%H:%M:00Z"))
            nc.setncattr('cdm_data_type', 'Station')

            # Station name
            nc.createDimension("feature_type_instance", len(station_name))
            name = nc.createVariable("feature_type_instance", "S1",
                                     ("feature_type_instance", ))
            name.cf_role = "timeseries_id"
            name.long_name = "Identifier for each feature type instance"
            name[:] = list(station_name)

            # Location
            lat = nc.createVariable("latitude", "f8")
            lat.units = "degrees_north"
            lat.standard_name = "latitude"
            lat.long_name = "sensor latitude"
            lat[:] = latitude
            nc.setncattr("geospatial_lat_min", latitude)
            nc.setncattr("geospatial_lat_max", latitude)
            nc.setncattr("geospatial_lat_units", "degrees_north")

            lon = nc.createVariable("longitude", "f8")
            lon.units = "degrees_east"
            lon.standard_name = "longitude"
            lon.long_name = "sensor longitude"
            lon[:] = longitude
            nc.setncattr("geospatial_lon_min", longitude)
            nc.setncattr("geospatial_lon_max", longitude)
            nc.setncattr("geospatial_lon_units", "degrees_east")

            # Metadata variables
            self.crs = nc.createVariable("crs", "i4")
            self.crs.long_name = "http://www.opengis.net/def/crs/EPSG/0/4326"
            self.crs.grid_mapping_name = "latitude_longitude"
            self.crs.epsg_code = "EPSG:4326"
            self.crs.semi_major_axis = float(6378137.0)
            self.crs.inverse_flattening = float(298.257223563)

            platform = nc.createVariable("platform", "i4")
            platform.ioos_code = station_name
            platform.short_name = global_attributes.get("title", station_name)
            platform.long_name = global_attributes.get("description",
                                                       station_name)
            platform.definition = "http://mmisw.org/ont/ioos/definition/stationID"
            nc.setncattr('platform', 'platform')

            if vertical_fill is None:
                vertical_fill = -9999.9
            self.vertical_fill = vertical_fill

            self.setup_times_and_verticals(times, verticals)
            logger.info("Created file at '{}'".format(self.out_file))