def main(output_path, delta, ncml_file=None, glob_string=None, apply_to_members=None, hard_start=None, hard_end=None): if glob_string is not None: collection = Collection.from_glob(glob_string, ncml=ncml_file) elif ncml_file is not None: collection = Collection.from_ncml_file(ncml_file, apply_to_members=apply_to_members) if delta.years > 0: starting = collection.aggregation.starting.replace(microsecond=0, second=0, minute=0, hour=0, day=1, month=1) elif delta.months > 0: starting = collection.aggregation.starting.replace(microsecond=0, second=0, minute=0, hour=0, day=1) elif delta.days > 0: starting = collection.aggregation.starting.replace(microsecond=0, second=0, minute=0, hour=0) windows = collection.bins(delta=delta, starting=starting, hard_start=hard_start, hard_end=hard_end) # Create output directory if not os.path.exists(output_path): os.makedirs(output_path) for i, window in enumerate(windows): # Create a file name starting = window.starting.strftime("%Y%m%dT%H%M") ending = window.ending.strftime("%Y%m%dT%H%M") if starting == ending: file_name = "{0}.nc".format(starting) else: file_name = "{0}_TO_{1}.nc".format(starting, ending) output_file = os.path.join(output_path, file_name) pyaxiomlogger.info("Combining ({0}/{1}) - {2} files into {3}".format(i+1, len(windows), len(window.members), output_file)) Collection.combine(members=window.members, output_file=output_file) return 0
def from_string(urn_string): complete = urn_string.split('#') extras = '' if len(complete) > 1: extras = '#{0}'.format(complete[1]) parts = complete[0].split(':') if len(parts) < 5: return IoosUrn() urn = IoosUrn() urn.asset_type = parts[2] urn.authority = parts[3] urn.label = parts[4] if len(parts) > 5: if urn.asset_type == 'station': urn.version = parts[5] elif len(parts) > 6: # Also a verion specified, so this has to be the component urn.component = parts[5] + extras else: logger.info("Assuming that {0} is the 'component' piece of the URN (not the 'version')".format(parts[5] + extras)) urn.component = parts[5] + extras if len(parts) > 6: urn.version = parts[6] if len(parts) > 7: pass logger.warning("The URN is too long stripping off '{}'".format(':'.join(parts[7:]))) return urn
def main(output_path, delta, ncml_file=None, glob_string=None, apply_to_members=None, hard_start=None, hard_end=None): if glob_string is not None: collection = Collection.from_glob(glob_string, ncml=ncml_file) elif ncml_file is not None: collection = Collection.from_ncml_file( ncml_file, apply_to_members=apply_to_members) if delta.years > 0: starting = collection.aggregation.starting.replace(microsecond=0, second=0, minute=0, hour=0, day=1, month=1) elif delta.months > 0: starting = collection.aggregation.starting.replace(microsecond=0, second=0, minute=0, hour=0, day=1) elif delta.days > 0: starting = collection.aggregation.starting.replace(microsecond=0, second=0, minute=0, hour=0) windows = collection.bins(delta=delta, starting=starting, hard_start=hard_start, hard_end=hard_end) # Create output directory if not os.path.exists(output_path): os.makedirs(output_path) for i, window in enumerate(windows): # Create a file name starting = window.starting.strftime("%Y%m%dT%H%M") ending = window.ending.strftime("%Y%m%dT%H%M") if starting == ending: file_name = "{0}.nc".format(starting) else: file_name = "{0}_TO_{1}.nc".format(starting, ending) output_file = os.path.join(output_path, file_name) pyaxiomlogger.info("Combining ({0}/{1}) - {2} files into {3}".format( i + 1, len(windows), len(window.members), output_file)) Collection.combine(members=window.members, output_file=output_file) return 0
def test_is_mine(klass, fp): dsg = CFDataset.load(fp) assert dsg.__class__ == klass allsubs = list(all_subclasses(CFDataset)) subs = [s for s in allsubs if s != klass] dsg = CFDataset(fp) logger.info('\nTesting {}'.format(klass.__name__)) assert klass.is_mine(dsg) is True for s in subs: if hasattr(s, 'is_mine'): logger.info(' * Trying {}...'.format(s.__name__)) assert s.is_mine(dsg) is False dsg.close()
def from_glob(cls, glob_string, timevar_name='time', ncml=None): dataset_name = None dataset_starting = None dataset_ending = None dataset_variables = [] dataset_members = [] files = glob(glob_string) logger.info("Processing aggregation containing {!s} files".format(len(files))) for i, filepath in enumerate(files): logger.info("Processing member ({0}/{1}) - {2} ".format(i+1, len(files), filepath)) nc = None try: if ncml is not None: # Apply NcML tmp_f, tmp_fp = tempfile.mkstemp(prefix="nc") os.close(tmp_f) nc = pyncml.apply(filepath, ncml, output_file=tmp_fp) else: nc = netCDF4.Dataset(filepath) if dataset_name is None: if hasattr(nc, 'name'): dataset_name = nc.name elif hasattr(nc, 'title'): dataset_name = nc.title else: dataset_name = "Pyaxiom Glob Dataset" timevar = nc.variables.get(timevar_name) if timevar is None: logger.error("Time variable '{0}' was not found in file '{1}'. Skipping.".format(timevar_name, filepath)) continue # Start/Stop of NetCDF file starting = netCDF4.num2date(np.min(timevar[:]), units=timevar.units) ending = netCDF4.num2date(np.max(timevar[:]), units=timevar.units) variables = filter(None, [ nc.variables[v].standard_name if hasattr(nc.variables[v], 'standard_name') else None for v in nc.variables.keys() ]) dataset_variables = list(set(dataset_variables + variables)) if starting.tzinfo is None: starting = starting.replace(tzinfo=pytz.utc) if ending.tzinfo is None: ending = ending.replace(tzinfo=pytz.utc) if dataset_starting is None or starting < dataset_starting: dataset_starting = starting if dataset_ending is None or ending > dataset_ending: dataset_ending = ending member = DotDict(path=filepath, standard_names=variables, starting=starting, ending=ending) dataset_members.append(member) except BaseException: logger.exception("Something went wrong with {0}".format(filepath)) continue finally: nc.close() try: os.remove(tmp_fp) except (OSError, UnboundLocalError): pass dataset_members = sorted(dataset_members, key=operator.attrgetter('starting')) return cls(DotDict(name=dataset_name, timevar_name=timevar_name, starting=dataset_starting, ending=dataset_ending, standard_names=dataset_variables, members=dataset_members))
def from_glob(cls, glob_string, timevar_name='time', ncml=None): dataset_name = None dataset_starting = None dataset_ending = None dataset_variables = [] dataset_members = [] files = glob(glob_string) logger.info("Processing aggregation containing {!s} files".format(len(files))) for i, filepath in enumerate(files): logger.info("Processing member ({0}/{1}) - {2} ".format(i+1, len(files), filepath)) nc = None try: if ncml is not None: # Apply NcML tmp_f, tmp_fp = tempfile.mkstemp(prefix="nc") os.close(tmp_f) nc = pyncml.apply(filepath, ncml, output_file=tmp_fp) else: nc = netCDF4.Dataset(filepath) if dataset_name is None: if 'name' in nc.ncattrs(): dataset_name = nc.name elif 'title' in nc.ncattrs(): dataset_name = nc.title else: dataset_name = "Pyaxiom Glob Dataset" timevar = nc.variables.get(timevar_name) if timevar is None: logger.error("Time variable '{0}' was not found in file '{1}'. Skipping.".format(timevar_name, filepath)) continue # Start/Stop of NetCDF file starting = netCDF4.num2date(np.min(timevar[:]), units=timevar.units) ending = netCDF4.num2date(np.max(timevar[:]), units=timevar.units) variables = list([_f for _f in [ nc.variables[v].standard_name if hasattr(nc.variables[v], 'standard_name') else None for v in list(nc.variables.keys()) ] if _f]) dataset_variables = list(set(dataset_variables + variables)) if starting.tzinfo is None: starting = starting.replace(tzinfo=pytz.utc) if ending.tzinfo is None: ending = ending.replace(tzinfo=pytz.utc) if dataset_starting is None or starting < dataset_starting: dataset_starting = starting if dataset_ending is None or ending > dataset_ending: dataset_ending = ending member = DotDict(path=filepath, standard_names=variables, starting=starting, ending=ending) dataset_members.append(member) except BaseException: logger.exception("Something went wrong with {0}".format(filepath)) continue finally: nc.close() try: os.remove(tmp_fp) except (OSError, UnboundLocalError): pass dataset_members = sorted(dataset_members, key=operator.attrgetter('starting')) return cls(DotDict(name=dataset_name, timevar_name=timevar_name, starting=dataset_starting, ending=dataset_ending, standard_names=dataset_variables, members=dataset_members))
def __init__(self, output_directory, latitude, longitude, station_name, global_attributes, times=None, verticals=None, vertical_fill=None, output_filename=None, vertical_axis_name=None, vertical_positive=None): if output_filename is None: output_filename = '{}_{}.nc'.format(station_name, int(random.random() * 100000)) logger.info("No output filename specified, saving as {}".format(output_filename)) self.vertical_positive = vertical_positive or 'down' self.vertical_axis_name = vertical_axis_name or 'z' self.time_axis_name = 'time' # Make directory if not os.path.exists(output_directory): os.makedirs(output_directory) self.time = None self.out_file = os.path.abspath(os.path.join(output_directory, output_filename)) if os.path.isfile(self.out_file): os.remove(self.out_file) with EnhancedDataset(self.out_file, 'w') as nc: # Global attributes # These are set by this script, we don't someone to be able to set them manually global_skips = ["time_coverage_start", "time_coverage_end", "time_coverage_duration", "time_coverage_resolution", "featureType", "geospatial_vertical_positive", "geospatial_vertical_min", "geospatial_vertical_max", "geospatial_lat_min", "geospatial_lon_min", "geospatial_lat_max", "geospatial_lon_max", "geospatial_bounds" "geospatial_vertical_resolution", "geospatial_lat_resolution", "geospatial_lon_resolution", "Conventions", "date_created", "date_modified", "date_issued"] for k, v in global_attributes.items(): if v is None: v = "None" if k not in global_skips: nc.setncattr(k, v) now_date = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:00Z") nc.setncattr("Conventions", "CF-1.6,ACDD-1.3") nc.setncattr("date_created", now_date) nc.setncattr("date_modified", now_date) nc.setncattr("date_issued", now_date) if not hasattr(nc, "date_metadata_modified"): nc.setncattr("date_metadata_modified", now_date) # Allow the customization of this attribute if 'cdm_data_type' not in global_attributes: nc.setncattr('cdm_data_type', 'Station') old_history = getattr(nc, 'history', '') new_history = '{} - {} - {}'.format(now_date, 'pyaxiom', 'File created using pyaxiom') if old_history: nc.setncattr('history', '{}\n{}'.format(old_history, new_history)) else: nc.setncattr('history', new_history) # Station name nc.createDimension("feature_type_instance", len(station_name)) name = nc.createVariable("feature_type_instance", "S1", ("feature_type_instance",)) name.cf_role = "timeseries_id" name.long_name = "Identifier for each feature type instance" name[:] = list(station_name) # Location lat = nc.createVariable("latitude", get_type(latitude)) lat.units = "degrees_north" lat.standard_name = "latitude" lat.long_name = "sensor latitude" lat.axis = "Y" lat.valid_min = latitude lat.valid_max = latitude lat[:] = latitude nc.setncattr("geospatial_lat_min", latitude) nc.setncattr("geospatial_lat_max", latitude) nc.setncattr("geospatial_lat_resolution", 0) nc.setncattr("geospatial_lat_units", "degrees_north") lon = nc.createVariable("longitude", get_type(longitude)) lon.units = "degrees_east" lon.standard_name = "longitude" lon.long_name = "sensor longitude" lon.axis = "X" lon.valid_min = longitude lon.valid_max = longitude lon[:] = longitude nc.setncattr("geospatial_lon_min", longitude) nc.setncattr("geospatial_lon_max", longitude) nc.setncattr("geospatial_lon_resolution", 0) nc.setncattr("geospatial_lon_units", "degrees_east") nc.setncattr("geospatial_bounds", "POINT({} {})".format(longitude, latitude)) if not hasattr(nc, "geospatial_bounds_crs"): nc.setncattr("geospatial_bounds_crs", "EPSG:4326") # Metadata variables self.crs = nc.createVariable("crs", "i4") self.crs.long_name = "http://www.opengis.net/def/crs/EPSG/0/4326" self.crs.grid_mapping_name = "latitude_longitude" self.crs.epsg_code = "EPSG:4326" self.crs.semi_major_axis = float(6378137.0) self.crs.inverse_flattening = float(298.257223563) platform = nc.createVariable("platform", "i4") platform.definition = "http://mmisw.org/ont/ioos/definition/stationID" urn = IoosUrn.from_string(station_name) if urn.valid() is True: platform.short_name = global_attributes.get("title", urn.label) platform.long_name = global_attributes.get('summary', 'Station {}'.format(urn.label)) platform.ioos_code = urn.urn else: platform.short_name = global_attributes.get("title", station_name) platform.long_name = global_attributes.get("summary", station_name) platform.ioos_code = station_name if vertical_fill is None: vertical_fill = -9999.9 self.vertical_fill = vertical_fill self._nc = EnhancedDataset(self.out_file, 'a') self.setup_times_and_verticals(times, verticals) logger.info("Created file at '{}'".format(self.out_file))
def add_variable(self, variable_name, values, times=None, verticals=None, sensor_vertical_datum=None, attributes=None, unlink_from_profile=None, fillvalue=None, raise_on_error=False, create_instrument_variable=False): if isinstance(values, (list, tuple,)) and values: values = np.asarray(values) if get_type(values) == np.int64: # Create values as int32 because DAP does not support int64 until DAP4. values = values.astype(np.int32) if isinstance(times, (list, tuple,)) and times: times = np.asarray(times) if get_type(times) == np.int64: # Create time as int32 because DAP does not support int64 until DAP4. times = times.astype(np.int32) if isinstance(verticals, (list, tuple,)) and verticals: verticals = np.asarray(verticals) if get_type(verticals) == np.int64: # Create verticals as int32 because DAP does not support int64 until DAP4. verticals = verticals.astype(np.int32) # Set vertical datum on the CRS variable if sensor_vertical_datum is not None: try: self.crs.geoid_name = sensor_vertical_datum self.crs.vertical_datum = sensor_vertical_datum self.crs.water_surface_reference_datum = sensor_vertical_datum if not hasattr(self._nc, "geospatial_bounds_vertical_crs"): self._nc.setncattr("geospatial_bounds_vertical_crs", sensor_vertical_datum) except AttributeError: pass # Set default fillvalue for new variables if fillvalue is None: fillvalue = -9999.9 fillvalue = values.dtype.type(fillvalue) used_values = None vertical_axis = self._nc.variables.get(self.vertical_axis_name) try: if unlink_from_profile is True: used_values = np.ma.reshape(values, (self.time.size, )) used_values = used_values[self.time_indexes] # These next two cases should work for all but a few cases, which are caught below elif vertical_axis.size == 1: used_values = np.ma.reshape(values, (self.time.size, )) used_values = used_values[self.time_indexes] else: used_values = np.ma.reshape(values, (self.time.size, vertical_axis.size, )) used_values = used_values[self.time_indexes] try: used_values = used_values[:, self.vertical_indexes] except IndexError: # The vertical values most likely had duplicates. Ignore the # falty index here and try to save the values as is. pass except ValueError: if raise_on_error is True: raise else: logger.warning("Could not do a simple reshape of data, trying to match manually! Time:{!s}, Heights:{!s}, Values:{!s}".format(self.time.size, vertical_axis.size, values.size)) if vertical_axis.size > 1: if times is not None and verticals is not None: # Hmmm, we have two actual height values for this station. # Not cool man, not cool. # Reindex the entire values array. This is slow. indexed = ((bisect.bisect_left(self.time[:], times[i]), bisect.bisect_left(vertical_axis[:], verticals[i]), values[i]) for i in range(values.size)) used_values = np.ndarray((self.time.size, vertical_axis.size, ), dtype=get_type(values)) used_values.fill(fillvalue) for (tzi, zzi, vz) in indexed: if zzi < vertical_axis.size and tzi < self.time.size: used_values[tzi, zzi] = vz del indexed else: raise ValueError("You need to pass in both 'times' and 'verticals' parameters that matches the size of the 'values' parameter.") else: if times is not None: # Ugh, find the time indexes manually indexed = ((bisect.bisect_left(self.time[:], times[i]), values[i]) for i in range(values.size)) used_values = np.ndarray((self.time.size, ), dtype=get_type(values)) used_values.fill(fillvalue) for (tzi, vz) in indexed: if tzi < self.time.size: used_values[tzi] = vz del indexed else: raise ValueError("You need to pass in a 'times' parameter that matches the size of the 'values' parameter.") logger.info("Setting values for {}...".format(variable_name)) if len(used_values.shape) == 1: var = self._nc.createVariable(variable_name, get_type(used_values), ("time",), fill_value=fillvalue, chunksizes=(self.time_chunk,), zlib=True) self._nc.setncattr('ncei_template_version', 'NCEI_NetCDF_TimeSeries_Orthogonal_Template_v2.0') if vertical_axis.size == 1: var.coordinates = "{} {} latitude longitude".format(self.time_axis_name, self.vertical_axis_name) else: # This is probably a bottom sensor on an ADCP or something, don't add the height coordinate var.coordinates = "{} latitude longitude".format(self.time_axis_name) if unlink_from_profile is True: # Create metadata variable for the sensor_depth if verticals is not None and self._nc.variables.get('sensor_depth') is None: logger.info("Setting the special case 'sensor_depth' metadata variable") inst_depth = self._nc.createVariable('sensor_depth', get_type(verticals)) inst_depth.units = 'm' inst_depth.standard_name = 'surface_altitude' inst_depth.positive = self.vertical_positive if self.vertical_positive.lower() == 'down': inst_depth.long_name = 'sensor depth below datum' elif self.vertical_positive.lower() == 'up': inst_depth.long_name = 'sensor height above datum' inst_depth.datum = sensor_vertical_datum or 'Unknown' if verticals and verticals.size > 0: inst_depth[:] = verticals[0] else: inst_depth[:] = self.vertical_fill elif len(used_values.shape) == 2: var = self._nc.createVariable(variable_name, get_type(used_values), ("time", "z",), fill_value=fillvalue, chunksizes=(self.time_chunk, vertical_axis.size,), zlib=True) var.coordinates = "{} {} latitude longitude".format(self.time_axis_name, self.vertical_axis_name) self._nc.setncattr('ncei_template_version', 'NCEI_NetCDF_TimeSeriesProfile_Orthogonal_Template_v2.0') else: raise ValueError("Could not create variable. Shape of data is {!s}. Expected a dimension of 1 or 2, not {!s}.".format(used_values.shape, len(used_values.shape))) # Set missing_value as well attributes = attributes or {} attributes['missing_value'] = fillvalue # Set the variable attributes as passed in if attributes: for k, v in attributes.items(): if k == 'vertical_datum' and sensor_vertical_datum is None and v is not None: # Use this as the vertical datum if it is specified and we didn't already have one try: self.crs.geoid_name = v self.crs.vertical_datum = v self.crs.water_surface_reference_datum = v if not hasattr(self._nc, "geospatial_bounds_vertical_crs"): self._nc.setncattr("geospatial_bounds_vertical_crs", v) except AttributeError: pass if k not in ['name', 'coordinates', '_FillValue'] and v is not None: try: var.setncattr(k, v) except BaseException: logger.info('Could not add attribute {}: {}, skipping.'.format(k, v)) # Add a long name if it doesn't exist if not hasattr(var, 'long_name'): varunits = getattr(var, 'units', None) vartitle = getattr(var, 'standard_name', getattr(var, 'name')) vartitle = vartitle.title().replace('_', ' ') if varunits is not None: vartitle = '{} ({})'.format(vartitle, varunits) var.long_name = vartitle var.grid_mapping = 'crs' var.platform = 'platform' var.ancillary_variables = 'platform' var.coverage_content_type = 'physicalMeasurement' var[:] = used_values if create_instrument_variable is True: self.add_instrument_variable(variable_name) self._nc.sync() del used_values return var
def __init__(self, output_directory, latitude, longitude, station_name, global_attributes, times=None, verticals=None, vertical_fill=None, output_filename=None, vertical_axis_name=None, vertical_positive=None): if output_filename is None: output_filename = '{}_{}.nc'.format(station_name, int(random.random()*100000)) logger.info("No output filename specified, saving as {}".format(output_filename)) self.vertical_positive = vertical_positive or 'down' self.vertical_axis_name = vertical_axis_name or 'z' self.time_axis_name = 'time' # Make directory if not os.path.exists(output_directory): os.makedirs(output_directory) out_file = os.path.abspath(os.path.join(output_directory, output_filename)) self.nc = netCDF4.Dataset(out_file, 'w') self.time = None # Global attributes # These are set by this script, we don't someone to be able to set them manually global_skips = ["time_coverage_start", "time_coverage_end", "time_coverage_duration", "time_coverage_resolution", "featureType", "geospatial_vertical_positive", "geospatial_vertical_min", "geospatial_vertical_max", "geospatial_lat_min", "geospatial_lon_min", "geospatial_lat_max", "geospatial_lon_max", "geospatial_vertical_resolution", "Conventions", "date_created"] for k, v in global_attributes.iteritems(): if v is None: v = "None" if k not in global_skips: self.nc.setncattr(k, v) self.nc.setncattr("Conventions", "CF-1.6") self.nc.setncattr("date_created", datetime.utcnow().strftime("%Y-%m-%dT%H:%M:00Z")) # Station name self.nc.createDimension("feature_type_instance", len(station_name)) name = self.nc.createVariable("feature_type_instance", "S1", ("feature_type_instance",)) name.cf_role = "timeseries_id" name.long_name = "Identifier for each feature type instance" name[:] = list(station_name) # Location lat = self.nc.createVariable("latitude", "f8") lat.units = "degrees_north" lat.standard_name = "latitude" lat.long_name = "sensor latitude" lat[:] = latitude self.nc.setncattr("geospatial_lat_min", latitude) self.nc.setncattr("geospatial_lat_max", latitude) lon = self.nc.createVariable("longitude", "f8") lon.units = "degrees_east" lon.standard_name = "longitude" lon.long_name = "sensor longitude" lon[:] = longitude self.nc.setncattr("geospatial_lon_min", longitude) self.nc.setncattr("geospatial_lon_max", longitude) # Metadata variables self.crs = self.nc.createVariable("crs", "i4") self.crs.long_name = "http://www.opengis.net/def/crs/EPSG/0/4326" self.crs.grid_mapping_name = "latitude_longitude" self.crs.epsg_code = "EPSG:4326" self.crs.semi_major_axis = float(6378137.0) self.crs.inverse_flattening = float(298.257223563) platform = self.nc.createVariable("platform", "i4") platform.ioos_code = station_name platform.short_name = global_attributes.get("title", station_name) platform.long_name = global_attributes.get("description", station_name) platform.definition = "http://mmisw.org/ont/ioos/definition/stationID" if vertical_fill is None: vertical_fill = -9999.9 self.vertical_fill = vertical_fill self.setup_times_and_verticals(times, verticals) logger.info("Created file at '{}'".format(out_file))
def add_variable(self, variable_name, values, times=None, verticals=None, sensor_vertical_datum=None, attributes=None, unlink_from_profile=None, fillvalue=None, raise_on_error=False): if isinstance(values, (list, tuple,)) and values: values = np.asarray(values) if isinstance(times, (list, tuple,)) and times: times = np.asarray(times) if isinstance(verticals, (list, tuple,)) and verticals: verticals = np.asarray(verticals) # Set vertical datum on the CRS variable if sensor_vertical_datum is not None: try: self.crs.geoid_name = sensor_vertical_datum self.crs.vertical_datum = sensor_vertical_datum self.crs.water_surface_reference_datum = sensor_vertical_datum except AttributeError: pass # Set default fillvalue for new variables if fillvalue is None: fillvalue = -9999.9 used_values = None try: if unlink_from_profile is True: used_values = np.ma.reshape(values, (self.time.size, )) used_values = used_values[self.time_indexes] # These next two cases should work for all but a few cases, which are caught below elif self.z.size == 1: used_values = np.ma.reshape(values, (self.time.size, )) used_values = used_values[self.time_indexes] else: used_values = np.ma.reshape(values, (self.time.size, self.z.size, )) used_values = used_values[self.time_indexes] try: used_values = used_values[:, self.vertical_indexes] except IndexError: # The vertical values most likely had duplicates. Ignore the # falty index here and try to save the values as is. pass except ValueError: if raise_on_error is True: self.close() raise else: logger.exception("Could not do a simple reshape of data, trying to match manually! Time:{!s}, Heights:{!s}, Values:{!s}".format(self.time.size, self.z.size, values.size)) if self.z.size > 1: if times is not None and verticals is not None: # Hmmm, we have two actual height values for this station. # Not cool man, not cool. # Reindex the entire values array. This is slow. indexed = ((bisect.bisect_left(self.time[:], times[i]), bisect.bisect_left(self.z[:], verticals[i]), values[i]) for i in xrange(values.size)) used_values = np.ndarray((self.time.size, self.z.size, ), dtype=np.float64) used_values.fill(float(fillvalue)) for (tzi, zzi, vz) in indexed: if zzi < self.z.size and tzi < self.time.size: used_values[tzi, zzi] = vz else: self.close() raise ValueError("You need to pass in both 'times' and 'verticals' parameters that matches the size of the 'values' parameter.") else: if times is not None: # Ugh, find the time indexes manually indexed = ((bisect.bisect_left(self.time[:], times[i]), values[i]) for i in xrange(values.size)) used_values = np.ndarray((self.time.size, ), dtype=np.float64) used_values.fill(float(fillvalue)) for (tzi, vz) in indexed: if tzi < self.time.size: used_values[tzi] = vz else: self.close() raise ValueError("You need to pass in a 'times' parameter that matches the size of the 'values' parameter.") logger.info("Setting values for {}...".format(variable_name)) if len(used_values.shape) == 1: var = self.nc.createVariable(variable_name, "f8", ("time",), fill_value=fillvalue, chunksizes=(1000,), zlib=True) if self.z.size == 1: var.coordinates = "{} {} latitude longitude".format(self.time_axis_name, self.vertical_axis_name) else: # This is probably a bottom sensor on an ADCP or something, don't add the height coordinate var.coordinates = "time latitude longitude" if unlink_from_profile is True: # Create metadata variable for the sensor_depth if self.nc.variables.get('sensor_depth') is None: logger.info("Setting the special case 'sensor_depth' metadata variable") inst_depth = self.nc.createVariable('sensor_depth', 'f4') inst_depth.units = 'm' inst_depth.standard_name = 'surface_altitude' inst_depth.long_name = 'sensor depth below datum' inst_depth.positive = self.vertical_positive inst_depth.datum = sensor_vertical_datum or 'Unknown' inst_depth[:] = verticals[0] * -1 elif len(used_values.shape) == 2: var = self.nc.createVariable(variable_name, "f8", ("time", "z",), fill_value=fillvalue, chunksizes=(1000, self.z.size,), zlib=True) var.coordinates = "{} {} latitude longitude".format(self.time_axis_name, self.vertical_axis_name) else: raise ValueError("Could not create variable. Shape of data is {!s}. Expected a dimension of 1 or 2, not {!s}.".format(used_values.shape, len(used_values.shape))) # Set the variable attributes as passed in if attributes: for k, v in attributes.iteritems(): if k == 'vertical_datum' and sensor_vertical_datum is None and v is not None: # Use this as the vertical datum if it is specified and we didn't already have one try: self.crs.geoid_name = v self.crs.vertical_datum = v self.crs.water_surface_reference_datum = v except AttributeError: pass if k != '_FillValue' and v is not None: try: setattr(var, k, v) except BaseException: logger.info('Could not add attribute {}: {}, skipping.'.format(k, v)) var.grid_mapping = 'crs' var[:] = used_values return var
def add_variable(self, variable_name, values, times=None, verticals=None, sensor_vertical_datum=None, attributes=None, unlink_from_profile=None, fillvalue=None, raise_on_error=False): if isinstance(values, ( list, tuple, )) and values: values = np.asarray(values) if isinstance(times, ( list, tuple, )) and times: times = np.asarray(times) if isinstance(verticals, ( list, tuple, )) and verticals: verticals = np.asarray(verticals) # Set vertical datum on the CRS variable if sensor_vertical_datum is not None: try: self.crs.geoid_name = sensor_vertical_datum self.crs.vertical_datum = sensor_vertical_datum self.crs.water_surface_reference_datum = sensor_vertical_datum except AttributeError: pass # Set default fillvalue for new variables if fillvalue is None: fillvalue = -9999.9 used_values = None try: if unlink_from_profile is True: used_values = np.ma.reshape(values, (self.time.size, )) used_values = used_values[self.time_indexes] # These next two cases should work for all but a few cases, which are caught below elif self.z.size == 1: used_values = np.ma.reshape(values, (self.time.size, )) used_values = used_values[self.time_indexes] else: used_values = np.ma.reshape(values, ( self.time.size, self.z.size, )) used_values = used_values[self.time_indexes] try: used_values = used_values[:, self.vertical_indexes] except IndexError: # The vertical values most likely had duplicates. Ignore the # falty index here and try to save the values as is. pass except ValueError: if raise_on_error is True: raise else: logger.warning( "Could not do a simple reshape of data, trying to match manually! Time:{!s}, Heights:{!s}, Values:{!s}" .format(self.time.size, self.z.size, values.size)) if self.z.size > 1: if times is not None and verticals is not None: # Hmmm, we have two actual height values for this station. # Not cool man, not cool. # Reindex the entire values array. This is slow. indexed = ((bisect.bisect_left(self.time[:], times[i]), bisect.bisect_left(self.z[:], verticals[i]), values[i]) for i in range(values.size)) used_values = np.ndarray(( self.time.size, self.z.size, ), dtype=values.dtype) used_values.fill(float(fillvalue)) for (tzi, zzi, vz) in indexed: if zzi < self.z.size and tzi < self.time.size: used_values[tzi, zzi] = vz else: raise ValueError( "You need to pass in both 'times' and 'verticals' parameters that matches the size of the 'values' parameter." ) else: if times is not None: # Ugh, find the time indexes manually indexed = ((bisect.bisect_left(self.time[:], times[i]), values[i]) for i in range(values.size)) used_values = np.ndarray((self.time.size, ), dtype=values.dtype) used_values.fill(float(fillvalue)) for (tzi, vz) in indexed: if tzi < self.time.size: used_values[tzi] = vz else: raise ValueError( "You need to pass in a 'times' parameter that matches the size of the 'values' parameter." ) with EnhancedDataset(self.out_file, 'a') as nc: logger.info("Setting values for {}...".format(variable_name)) if len(used_values.shape) == 1: var = nc.createVariable(variable_name, used_values.dtype, ("time", ), fill_value=fillvalue, chunksizes=(1000, ), zlib=True) if self.z.size == 1: var.coordinates = "{} {} latitude longitude".format( self.time_axis_name, self.vertical_axis_name) else: # This is probably a bottom sensor on an ADCP or something, don't add the height coordinate var.coordinates = "{} latitude longitude".format( self.time_axis_name) if unlink_from_profile is True: # Create metadata variable for the sensor_depth if nc.variables.get('sensor_depth') is None: logger.info( "Setting the special case 'sensor_depth' metadata variable" ) inst_depth = nc.createVariable( 'sensor_depth', 'f4') inst_depth.units = 'm' inst_depth.standard_name = 'surface_altitude' inst_depth.positive = self.vertical_positive if self.vertical_positive.lower() == 'down': inst_depth.long_name = 'sensor depth below datum' elif self.vertical_positive.lower() == 'up': inst_depth.long_name = 'sensor height above datum' inst_depth.datum = sensor_vertical_datum or 'Unknown' if verticals and verticals.size > 0: inst_depth[:] = verticals[0] else: inst_depth[:] = self.vertical_fill elif len(used_values.shape) == 2: var = nc.createVariable(variable_name, used_values.dtype, ( "time", "z", ), fill_value=fillvalue, chunksizes=( 1000, self.z.size, ), zlib=True) var.coordinates = "{} {} latitude longitude".format( self.time_axis_name, self.vertical_axis_name) else: raise ValueError( "Could not create variable. Shape of data is {!s}. Expected a dimension of 1 or 2, not {!s}." .format(used_values.shape, len(used_values.shape))) # Set the variable attributes as passed in if attributes: for k, v in attributes.items(): if k == 'vertical_datum' and sensor_vertical_datum is None and v is not None: # Use this as the vertical datum if it is specified and we didn't already have one try: self.crs.geoid_name = v self.crs.vertical_datum = v self.crs.water_surface_reference_datum = v except AttributeError: pass if k not in ['name', 'coordinates', '_FillValue' ] and v is not None: try: var.setncattr(k, v) except BaseException: logger.info( 'Could not add attribute {}: {}, skipping.'. format(k, v)) var.grid_mapping = 'crs' var[:] = used_values return var
def __init__(self, output_directory, latitude, longitude, station_name, global_attributes, times=None, verticals=None, vertical_fill=None, output_filename=None, vertical_axis_name=None, vertical_positive=None): if output_filename is None: output_filename = '{}_{}.nc'.format(station_name, int(random.random() * 100000)) logger.info("No output filename specified, saving as {}".format( output_filename)) self.vertical_positive = vertical_positive or 'down' self.vertical_axis_name = vertical_axis_name or 'z' self.time_axis_name = 'time' # Make directory if not os.path.exists(output_directory): os.makedirs(output_directory) self.time = None self.out_file = os.path.abspath( os.path.join(output_directory, output_filename)) if os.path.isfile(self.out_file): os.remove(self.out_file) with EnhancedDataset(self.out_file, 'w') as nc: # Global attributes # These are set by this script, we don't someone to be able to set them manually global_skips = [ "time_coverage_start", "time_coverage_end", "time_coverage_duration", "time_coverage_resolution", "featureType", "geospatial_vertical_positive", "geospatial_vertical_min", "geospatial_vertical_max", "geospatial_lat_min", "geospatial_lon_min", "geospatial_lat_max", "geospatial_lon_max", "geospatial_vertical_resolution", "Conventions", "date_created" ] for k, v in global_attributes.items(): if v is None: v = "None" if k not in global_skips: nc.setncattr(k, v) nc.setncattr("Conventions", "CF-1.6") nc.setncattr("date_created", datetime.utcnow().strftime("%Y-%m-%dT%H:%M:00Z")) nc.setncattr("date_issued", datetime.utcnow().strftime("%Y-%m-%dT%H:%M:00Z")) nc.setncattr('cdm_data_type', 'Station') # Station name nc.createDimension("feature_type_instance", len(station_name)) name = nc.createVariable("feature_type_instance", "S1", ("feature_type_instance", )) name.cf_role = "timeseries_id" name.long_name = "Identifier for each feature type instance" name[:] = list(station_name) # Location lat = nc.createVariable("latitude", "f8") lat.units = "degrees_north" lat.standard_name = "latitude" lat.long_name = "sensor latitude" lat[:] = latitude nc.setncattr("geospatial_lat_min", latitude) nc.setncattr("geospatial_lat_max", latitude) nc.setncattr("geospatial_lat_units", "degrees_north") lon = nc.createVariable("longitude", "f8") lon.units = "degrees_east" lon.standard_name = "longitude" lon.long_name = "sensor longitude" lon[:] = longitude nc.setncattr("geospatial_lon_min", longitude) nc.setncattr("geospatial_lon_max", longitude) nc.setncattr("geospatial_lon_units", "degrees_east") # Metadata variables self.crs = nc.createVariable("crs", "i4") self.crs.long_name = "http://www.opengis.net/def/crs/EPSG/0/4326" self.crs.grid_mapping_name = "latitude_longitude" self.crs.epsg_code = "EPSG:4326" self.crs.semi_major_axis = float(6378137.0) self.crs.inverse_flattening = float(298.257223563) platform = nc.createVariable("platform", "i4") platform.ioos_code = station_name platform.short_name = global_attributes.get("title", station_name) platform.long_name = global_attributes.get("description", station_name) platform.definition = "http://mmisw.org/ont/ioos/definition/stationID" nc.setncattr('platform', 'platform') if vertical_fill is None: vertical_fill = -9999.9 self.vertical_fill = vertical_fill self.setup_times_and_verticals(times, verticals) logger.info("Created file at '{}'".format(self.out_file))