def from_string(urn_string): complete = urn_string.split('#') extras = '' if len(complete) > 1: extras = '#{0}'.format(complete[1]) parts = complete[0].split(':') if len(parts) < 5: return IoosUrn() urn = IoosUrn() urn.asset_type = parts[2] urn.authority = parts[3] urn.label = parts[4] if len(parts) > 5: if urn.asset_type == 'station': urn.version = parts[5] elif len(parts) > 6: # Also a verion specified, so this has to be the component urn.component = parts[5] + extras else: logger.debug("Assuming that {0} is the 'component' piece of the URN (not the 'version')".format(parts[5] + extras)) urn.component = parts[5] + extras if len(parts) > 6: urn.version = parts[6] if len(parts) > 7: pass logger.warning("The URN is too long stripping off '{}'".format(':'.join(parts[7:]))) return urn
def from_string(urn_string): complete = urn_string.split('#') extras = '' if len(complete) > 1: extras = '#{0}'.format(complete[1]) parts = complete[0].split(':') if len(parts) < 5: return IoosUrn() urn = IoosUrn() urn.asset_type = parts[2] urn.authority = parts[3] urn.label = parts[4] if len(parts) > 5: if urn.asset_type == 'station': urn.version = parts[5] elif len(parts) > 6: # Also a verion specified, so this has to be the component urn.component = parts[5] + extras else: logger.debug( "Assuming that {0} is the 'component' piece of the URN (not the 'version')" .format(parts[5] + extras)) urn.component = parts[5] + extras if len(parts) > 6: urn.version = parts[6] if len(parts) > 7: pass logger.warning("The URN is too long stripping off '{}'".format( ':'.join(parts[7:]))) return urn
def get_dataframe_from_variable(nc, data_var): """ Returns a Pandas DataFrame of the data. This always returns positive down depths """ time_var = nc.get_variables_by_attributes(standard_name='time')[0] depth_vars = nc.get_variables_by_attributes(axis=lambda v: v is not None and v.lower() == 'z') depth_vars += nc.get_variables_by_attributes(standard_name=lambda v: v in ['height', 'depth' 'surface_altitude'], positive=lambda x: x is not None) # Find the correct depth variable depth_var = None for d in depth_vars: try: if d._name in data_var.coordinates.split(" ") or d._name in data_var.dimensions: depth_var = d break except AttributeError: continue times = netCDF4.num2date(time_var[:], units=time_var.units, calendar=getattr(time_var, 'calendar', 'standard')) original_times_size = times.size if depth_var is None and hasattr(data_var, 'sensor_depth'): depth_type = get_type(data_var.sensor_depth) depths = np.asarray([data_var.sensor_depth] * len(times)).flatten() values = data_var[:].flatten() elif depth_var is None: depths = np.asarray([np.nan] * len(times)).flatten() depth_type = get_type(depths) values = data_var[:].flatten() else: depths = depth_var[:] depth_type = get_type(depths) if len(data_var.shape) > 1: times = np.repeat(times, depths.size) depths = np.tile(depths, original_times_size) values = data_var[:, :].flatten() else: values = data_var[:].flatten() if getattr(depth_var, 'positive', 'down').lower() == 'up': logger.warning("Converting depths to positive down before returning the DataFrame") depths = depths * -1 # https://github.com/numpy/numpy/issues/4595 # We can't call astype on a MaskedConstant if ( isinstance(depths, np.ma.core.MaskedConstant) or (hasattr(depths, 'mask') and depths.mask.all()) ): depths = np.asarray([np.nan] * len(times)).flatten() df = pd.DataFrame({ 'time': times, 'value': values.astype(data_var.dtype), 'unit': data_var.units if hasattr(data_var, 'units') else np.nan, 'depth': depths.astype(depth_type) }) df.set_index([pd.DatetimeIndex(df['time']), pd.Float64Index(df['depth'])], inplace=True) return df
def global_attributes(self, gas): # These are set by this script, we don't someone to be able to set them manually global_skips = ["time_coverage_start", "time_coverage_end", "time_coverage_duration", "time_coverage_resolution", "featureType", "geospatial_vertical_positive", "geospatial_vertical_min", "geospatial_vertical_max", "geospatial_lat_min", "geospatial_lon_min", "geospatial_lat_max", "geospatial_lon_max", "Conventions", "date_created", "cdm_data_type"] for i in set(global_skips) & gas.keys(): logger.warning("Ignoring global attribute {} because it is calculated or set automatically".format(i)) self._global_attributes = { k: v for k, v in gas.items() if k not in global_skips }
def export(self, output_file): super(IncompleteProfile, self).export(output_file) with netCDF4.Dataset(output_file, 'w', clobber=True) as nc: gas = self.global_attributes nc.setncatts(gas) profiles = self.df.profile.unique().size profile_group = self.df.groupby('profile') max_z = profile_group.size().max() nc.createDimension('profile', profiles) nc.createDimension('z', max_z) profile = nc.createVariable('profile', self.df.profile.dtype, ('profile',)) _, unique_profile_rows = np.unique(self.df.profile.values, return_index=True) profile[:] = list(range(profiles)) time = nc.createVariable('time', int, ('profile',)) time[:] = netCDF4.date2num([datetime.utcfromtimestamp(t) for t in self.df.time.unique().astype('<M8[s]').astype(int)], units=self.base_time) latitude = nc.createVariable('latitude', self.df.latitude.dtype, ('profile',)) latitude[:] = self.df.latitude.values[unique_profile_rows] longitude = nc.createVariable('longitude', self.df.longitude.dtype, ('profile',)) longitude[:] = self.df.longitude.values[unique_profile_rows] # Metadata variables nc.createVariable("crs", 'i4') nc.createVariable("platform", "i4") nc.setncattr('platform', 'platform') # Data vars reserved_columns = ['profile', 'time', 'latitude', 'longitude'] for i, (name, p) in enumerate(profile_group): for c in [d for d in self.df.columns if d not in reserved_columns]: var_name = c.split(' ')[0].lower() fill = p[c].dtype.type(self.fill_value) if var_name not in nc.variables: v = nc.createVariable(var_name, self.df[c].dtype, ('profile', 'z'), fill_value=fill) else: v = nc.variables[var_name] assignable_values = p[c].fillna(fill).values v[i, :len(assignable_values)] = assignable_values for k, v in self.variable_attributes.items(): if k in nc.variables: for n, z in v.items(): try: nc.variables[k].setncattr(n, z) except BaseException: logger.warning('Could not set attribute {} on {}'.format(n, k))
def from_dataframe(df, output_directory, output_filename, latitude, longitude, station_name, global_attributes, variable_name, variable_attributes, sensor_vertical_datum=None, fillvalue=None, data_column=None, vertical_axis_name=None, vertical_positive=None, create_instrument_variable=False, attempts=None): # Attempts is how many files to try to build a NetCDF files from a # dataframe. For backwards compatibility purposes, we always try # everything (even manual matching which takes forever and is a memory # hog). attempts = attempts or 5 if fillvalue is None: fillvalue = -9999.9 if data_column is None: data_column = 'value' data_fillvalue = df[data_column].values.dtype.type(fillvalue) vertical_fillvalue = df['depth'].values.dtype.type(fillvalue) df[data_column] = df[data_column].fillna(data_fillvalue) times = np.asarray([ calendar.timegm(x.utctimetuple()) for x in df['time'] ]) df['depth'] = df['depth'].fillna(vertical_fillvalue) depths = df['depth'].values try: ts = TimeSeries(output_directory, latitude, longitude, station_name, global_attributes, times=times, verticals=depths, output_filename=output_filename, vertical_fill=vertical_fillvalue, vertical_axis_name=vertical_axis_name, vertical_positive=vertical_positive) ts.add_variable(variable_name, df[data_column].values, attributes=variable_attributes, sensor_vertical_datum=sensor_vertical_datum, raise_on_error=True, fillvalue=data_fillvalue, create_instrument_variable=create_instrument_variable) except ValueError: if attempts < 2: raise logger.warning("Attempt 2: using unique times") try: # Try uniquing time newtimes = np.unique(times) ts = TimeSeries(output_directory, latitude, longitude, station_name, global_attributes, times=newtimes, verticals=depths, output_filename=output_filename, vertical_fill=vertical_fillvalue, vertical_axis_name=vertical_axis_name, vertical_positive=vertical_positive) ts.add_variable(variable_name, df[data_column].values, attributes=variable_attributes, sensor_vertical_datum=sensor_vertical_datum, raise_on_error=True, fillvalue=data_fillvalue, create_instrument_variable=create_instrument_variable) except ValueError: if attempts < 3: raise logger.warning("Attempt 3: using unique depths") try: # Try uniquing depths newdepths = np.unique(df['depth'].values) ts = TimeSeries(output_directory, latitude, longitude, station_name, global_attributes, times=times, verticals=newdepths, output_filename=output_filename, vertical_fill=vertical_fillvalue, vertical_axis_name=vertical_axis_name, vertical_positive=vertical_positive) ts.add_variable(variable_name, df[data_column].values, attributes=variable_attributes, sensor_vertical_datum=sensor_vertical_datum, raise_on_error=True, fillvalue=data_fillvalue, create_instrument_variable=create_instrument_variable) except ValueError: if attempts < 4: raise logger.warning("Attempt 4: using unique time and depth") try: # Unique both time and depth newdepths = np.unique(df['depth'].values) ts = TimeSeries(output_directory, latitude, longitude, station_name, global_attributes, times=newtimes, verticals=newdepths, output_filename=output_filename, vertical_fill=vertical_fillvalue, vertical_axis_name=vertical_axis_name, vertical_positive=vertical_positive) ts.add_variable(variable_name, df[data_column].values, attributes=variable_attributes, sensor_vertical_datum=sensor_vertical_datum, raise_on_error=True, fillvalue=data_fillvalue, create_instrument_variable=create_instrument_variable) except ValueError: if attempts < 5: raise logger.warning("Attempt 5: manually matching (this is SLOW)") # Manually match ts = TimeSeries(output_directory, latitude, longitude, station_name, global_attributes, times=times, verticals=depths, output_filename=output_filename, vertical_fill=vertical_fillvalue, vertical_axis_name=vertical_axis_name, vertical_positive=vertical_positive) ts.add_variable(variable_name, df[data_column].values, attributes=variable_attributes, times=times, verticals=depths, sensor_vertical_datum=sensor_vertical_datum, raise_on_error=False, fillvalue=data_fillvalue, create_instrument_variable=create_instrument_variable) return ts
def update_attributes(self, attributes): for k, v in attributes.pop('global', {}).items(): try: self.setncattr(k, v) except BaseException: logger.warning('Could not set global attribute {}: {}'.format( k, v)) for k, v in attributes.items(): if k in self.variables: for n, z in v.items(): try: self.variables[k].setncattr(n, z) except BaseException: logger.warning( 'Could not set attribute {} on {}'.format(n, k)) self.sync()
def from_dataframe(df, output_directory, output_filename, latitude, longitude, station_name, global_attributes, variable_name, variable_attributes, sensor_vertical_datum=None, fillvalue=None, data_column=None, vertical_axis_name=None, vertical_positive=None): if fillvalue is None: fillvalue = -9999.9 if data_column is None: data_column = 'value' df[data_column] = df[data_column].fillna(fillvalue) times = np.asarray([ calendar.timegm(x.utctimetuple()) for x in df['time'] ]) df['depth'] = df['depth'].fillna(fillvalue) depths = df['depth'].values try: ts = TimeSeries(output_directory, latitude, longitude, station_name, global_attributes, times=times, verticals=depths, output_filename=output_filename, vertical_fill=fillvalue, vertical_axis_name=vertical_axis_name, vertical_positive=vertical_positive) ts.add_variable(variable_name, df[data_column].values, attributes=variable_attributes, sensor_vertical_datum=sensor_vertical_datum, raise_on_error=True) except ValueError: logger.warning("Failed first attempt, trying again with unique times.") try: # Try uniquing time newtimes = np.unique(times) ts = TimeSeries(output_directory, latitude, longitude, station_name, global_attributes, times=newtimes, verticals=depths, output_filename=output_filename, vertical_fill=fillvalue, vertical_axis_name=vertical_axis_name, vertical_positive=vertical_positive) ts.add_variable(variable_name, df[data_column].values, attributes=variable_attributes, sensor_vertical_datum=sensor_vertical_datum, raise_on_error=True) except ValueError: logger.warning("Failed second attempt, trying again with unique depths.") try: # Try uniquing depths newdepths = np.unique(df['depth'].values) ts = TimeSeries(output_directory, latitude, longitude, station_name, global_attributes, times=times, verticals=newdepths, output_filename=output_filename, vertical_fill=fillvalue, vertical_axis_name=vertical_axis_name, vertical_positive=vertical_positive) ts.add_variable(variable_name, df[data_column].values, attributes=variable_attributes, sensor_vertical_datum=sensor_vertical_datum, raise_on_error=True) except ValueError: logger.warning("Failed third attempt, uniquing time and depth.") try: # Unique both time and depth newdepths = np.unique(df['depth'].values) ts = TimeSeries(output_directory, latitude, longitude, station_name, global_attributes, times=newtimes, verticals=newdepths, output_filename=output_filename, vertical_fill=fillvalue, vertical_axis_name=vertical_axis_name, vertical_positive=vertical_positive) ts.add_variable(variable_name, df[data_column].values, attributes=variable_attributes, sensor_vertical_datum=sensor_vertical_datum, raise_on_error=True) except ValueError: logger.warning("Failed fourth attempt, manually matching indexes (this is slow).") # Manually match ts = TimeSeries(output_directory, latitude, longitude, station_name, global_attributes, times=times, verticals=depths, output_filename=output_filename, vertical_fill=fillvalue, vertical_axis_name=vertical_axis_name, vertical_positive=vertical_positive) ts.add_variable(variable_name, df[data_column].values, attributes=variable_attributes, times=times, verticals=depths, sensor_vertical_datum=sensor_vertical_datum, raise_on_error=False) return ts
def to_dataframe(self, clean_cols=True, clean_rows=True): # The index variable (trajectory_index) is identified by having an # attribute with name of instance_dimension whose value is the instance # dimension name (trajectory in this example). The index variable must # have the profile dimension as its sole dimension, and must be type # integer. Each value in the index variable is the zero-based trajectory # index that the profile belongs to i.e. profile p belongs to trajectory # i=trajectory_index(p), as in section H.2.5. r_index_var = self.get_variables_by_attributes( instance_dimension=lambda x: x is not None)[0] p_dim = self.dimensions[r_index_var.dimensions[0]] # Profile dimension r_dim = self.dimensions[ r_index_var.instance_dimension] # Trajectory dimension # The count variable (row_size) contains the number of elements for # each profile, which must be written contiguously. The count variable # is identified by having an attribute with name sample_dimension whose # value is the sample dimension (obs in this example) being counted. It # must have the profile dimension as its sole dimension, and must be # type integer o_index_var = self.get_variables_by_attributes( sample_dimension=lambda x: x is not None)[0] o_dim = self.dimensions[ o_index_var.sample_dimension] # Sample dimension try: rvar = self.get_variables_by_attributes(cf_role='trajectory_id')[0] traj_indexes = normalize_array(rvar) assert traj_indexes.size == r_dim.size except BaseException: logger.warning( 'Could not pull trajectory values a variable with "cf_role=trajectory_id", using a computed range.' ) traj_indexes = np.arange(r_dim.size) try: pvar = self.get_variables_by_attributes(cf_role='profile_id')[0] profile_indexes = normalize_array(pvar) assert profile_indexes.size == p_dim.size except BaseException: logger.warning( 'Could not pull profile values from a variable with "cf_role=profile_id", using a computed range.' ) profile_indexes = np.arange(p_dim.size) # Profile dimension tvars = self.t_axes() if len(tvars) > 1: tvar = [ v for v in self.t_axes() if v.dimensions == ( p_dim.name, ) and getattr(v, 'axis', '').lower() == 't' ][0] else: tvar = tvars[0] xvars = self.x_axes() if len(xvars) > 1: xvar = [ v for v in self.x_axes() if v.dimensions == ( p_dim.name, ) and getattr(v, 'axis', '').lower() == 'x' ][0] else: xvar = xvars[0] yvars = self.y_axes() if len(yvars) > 1: yvar = [ v for v in self.y_axes() if v.dimensions == ( p_dim.name, ) and getattr(v, 'axis', '').lower() == 'y' ][0] else: yvar = yvars[0] zvars = self.z_axes() if len(zvars) > 1: zvar = [ v for v in self.z_axes() if v.dimensions == ( o_dim.name, ) and getattr(v, 'axis', '').lower() == 'z' ][0] else: zvar = zvars[0] p = np.ma.masked_all(o_dim.size, dtype=profile_indexes.dtype) r = np.ma.masked_all(o_dim.size, dtype=traj_indexes.dtype) t = np.ma.masked_all(o_dim.size, dtype=tvar.dtype) x = np.ma.masked_all(o_dim.size, dtype=xvar.dtype) y = np.ma.masked_all(o_dim.size, dtype=yvar.dtype) si = 0 for i in np.arange(profile_indexes.size): ei = si + o_index_var[i] p[si:ei] = profile_indexes[i] r[si:ei] = traj_indexes[r_index_var[i]] t[si:ei] = tvar[i] x[si:ei] = xvar[i] y[si:ei] = yvar[i] si = ei t_mask = False tfill = get_fill_value(tvar) if tfill is not None: t_mask = np.copy(np.ma.getmaskarray(t)) t[t_mask] = 1 t = np.ma.MaskedArray( nc4.num2date(t, tvar.units, getattr(tvar, 'calendar', 'standard'))) # Patch the time variable back to its original mask, since num2date # breaks any missing/fill values t[t_mask] = np.ma.masked # X and Y x = generic_masked(x, minv=-180, maxv=180).round(5) y = generic_masked(y, minv=-90, maxv=90).round(5) # Distance d = np.ma.zeros(o_dim.size, dtype=np.float64) d[1:] = great_distance(start_latitude=y[0:-1], end_latitude=y[1:], start_longitude=x[0:-1], end_longitude=x[1:])['distance'] d = generic_masked(np.cumsum(d), minv=0).round(2) # Sample dimension z = generic_masked(zvar[:].flatten(), attrs=self.vatts(zvar.name)).round(5) df_data = { 't': t, 'x': x, 'y': y, 'z': z, 'trajectory': r, 'profile': p, 'distance': d } building_index_to_drop = np.ones(o_dim.size, dtype=bool) extract_vars = list(set(self.data_vars() + self.ancillary_vars())) for i, dvar in enumerate(extract_vars): # Profile dimensions if dvar.dimensions == (p_dim.name, ): vdata = np.ma.masked_all(o_dim.size, dtype=dvar.dtype) si = 0 for j in np.arange(profile_indexes.size): ei = si + o_index_var[j] vdata[si:ei] = dvar[j] si = ei # Sample dimensions elif dvar.dimensions == (o_dim.name, ): vdata = generic_masked(dvar[:].flatten(), attrs=self.vatts(dvar.name)).round(3) else: logger.warning( "Skipping variable {}... it didn't seem like a data variable" .format(dvar)) building_index_to_drop = (building_index_to_drop == True) & ( vdata.mask == True) # noqa df_data[dvar.name] = vdata df = pd.DataFrame(df_data) # Drop all data columns with no data if clean_cols: df = df.dropna(axis=1, how='all') # Drop all data rows with no data variable data if clean_rows: df = df.iloc[~building_index_to_drop] return df
def urnify_from_dict(naming_authority, station_identifier, data_dict): def clean_value(v): return v.replace('(', '').replace(')', '').strip().replace(' ', '_') extras = [] intervals = [] # Because it can be part of cell_methods and its own dict key if 'cell_methods' in data_dict and data_dict['cell_methods']: cm = data_dict['cell_methods'] keys = [] values = [] sofar = '' for i, c in enumerate(cm): if c == ":": if len(keys) == len(values): keys.append(clean_value(sofar)) else: for j in reversed(range(0, i)): if cm[j] == " ": key = clean_value(cm[j+1:i]) values.append(clean_value(sofar.replace(key, ''))) keys.append(key) break sofar = '' else: sofar += c # The last value needs appending values.append(clean_value(sofar)) pairs = zip(keys, values) mems = [] cell_intervals = [] pairs = sorted(pairs) for group, members in itertools.groupby(pairs, lambda x: x[0]): if group == 'interval': cell_intervals = [m[1] for m in members] elif group in ['time', 'area']: # Ignore 'comments'. May need to add more things here... member_strings = [] for m in members: member_strings.append('{}:{}'.format(group, m[1])) mems.append(','.join(member_strings)) if mems: extras.append('cell_methods={}'.format(','.join(mems))) if cell_intervals: intervals += cell_intervals if 'bounds' in data_dict and data_dict['bounds']: extras.append('bounds={0}'.format(data_dict['bounds'])) if 'vertical_datum' in data_dict and data_dict['vertical_datum']: extras.append('vertical_datum={0}'.format(data_dict['vertical_datum'])) if 'interval' in data_dict and data_dict['interval']: if isinstance(data_dict['interval'], (list, tuple,)): intervals += data_dict['interval'] elif isinstance(data_dict['interval'], str): intervals += [data_dict['interval']] if 'standard_name' in data_dict and data_dict['standard_name']: variable_name = data_dict['standard_name'] elif 'name' in data_dict and data_dict['name']: variable_name = data_dict['name'] else: variable_name = ''.join(random.choice(string.ascii_uppercase) for _ in range(8)).lower() logger.warning("Had to randomly generate a variable name: {0}".format(variable_name)) if 'discriminant' in data_dict and data_dict['discriminant']: variable_name = '{}-{}'.format(variable_name, data_dict['discriminant']) if intervals: intervals = list(set(intervals)) # Unique them extras.append('interval={}'.format(','.join(intervals))) if extras: variable_name = '{0}#{1}'.format(variable_name, ';'.join(extras)) u = IoosUrn(asset_type='sensor', authority=naming_authority, label=station_identifier, component=variable_name, version=None) return u.urn
def safe_attribute_typing(zdtype, value): try: return zdtype.type(value) except ValueError: logger.warning("Could not convert {} to type {}".format(value, zdtype)) return None
def add_variable(self, variable_name, values, times=None, verticals=None, sensor_vertical_datum=None, attributes=None, unlink_from_profile=None, fillvalue=None, raise_on_error=False): if isinstance(values, ( list, tuple, )) and values: values = np.asarray(values) if isinstance(times, ( list, tuple, )) and times: times = np.asarray(times) if isinstance(verticals, ( list, tuple, )) and verticals: verticals = np.asarray(verticals) # Set vertical datum on the CRS variable if sensor_vertical_datum is not None: try: self.crs.geoid_name = sensor_vertical_datum self.crs.vertical_datum = sensor_vertical_datum self.crs.water_surface_reference_datum = sensor_vertical_datum except AttributeError: pass # Set default fillvalue for new variables if fillvalue is None: fillvalue = -9999.9 used_values = None try: if unlink_from_profile is True: used_values = np.ma.reshape(values, (self.time.size, )) used_values = used_values[self.time_indexes] # These next two cases should work for all but a few cases, which are caught below elif self.z.size == 1: used_values = np.ma.reshape(values, (self.time.size, )) used_values = used_values[self.time_indexes] else: used_values = np.ma.reshape(values, ( self.time.size, self.z.size, )) used_values = used_values[self.time_indexes] try: used_values = used_values[:, self.vertical_indexes] except IndexError: # The vertical values most likely had duplicates. Ignore the # falty index here and try to save the values as is. pass except ValueError: if raise_on_error is True: raise else: logger.warning( "Could not do a simple reshape of data, trying to match manually! Time:{!s}, Heights:{!s}, Values:{!s}" .format(self.time.size, self.z.size, values.size)) if self.z.size > 1: if times is not None and verticals is not None: # Hmmm, we have two actual height values for this station. # Not cool man, not cool. # Reindex the entire values array. This is slow. indexed = ((bisect.bisect_left(self.time[:], times[i]), bisect.bisect_left(self.z[:], verticals[i]), values[i]) for i in range(values.size)) used_values = np.ndarray(( self.time.size, self.z.size, ), dtype=values.dtype) used_values.fill(float(fillvalue)) for (tzi, zzi, vz) in indexed: if zzi < self.z.size and tzi < self.time.size: used_values[tzi, zzi] = vz else: raise ValueError( "You need to pass in both 'times' and 'verticals' parameters that matches the size of the 'values' parameter." ) else: if times is not None: # Ugh, find the time indexes manually indexed = ((bisect.bisect_left(self.time[:], times[i]), values[i]) for i in range(values.size)) used_values = np.ndarray((self.time.size, ), dtype=values.dtype) used_values.fill(float(fillvalue)) for (tzi, vz) in indexed: if tzi < self.time.size: used_values[tzi] = vz else: raise ValueError( "You need to pass in a 'times' parameter that matches the size of the 'values' parameter." ) with EnhancedDataset(self.out_file, 'a') as nc: logger.info("Setting values for {}...".format(variable_name)) if len(used_values.shape) == 1: var = nc.createVariable(variable_name, used_values.dtype, ("time", ), fill_value=fillvalue, chunksizes=(1000, ), zlib=True) if self.z.size == 1: var.coordinates = "{} {} latitude longitude".format( self.time_axis_name, self.vertical_axis_name) else: # This is probably a bottom sensor on an ADCP or something, don't add the height coordinate var.coordinates = "{} latitude longitude".format( self.time_axis_name) if unlink_from_profile is True: # Create metadata variable for the sensor_depth if nc.variables.get('sensor_depth') is None: logger.info( "Setting the special case 'sensor_depth' metadata variable" ) inst_depth = nc.createVariable( 'sensor_depth', 'f4') inst_depth.units = 'm' inst_depth.standard_name = 'surface_altitude' inst_depth.positive = self.vertical_positive if self.vertical_positive.lower() == 'down': inst_depth.long_name = 'sensor depth below datum' elif self.vertical_positive.lower() == 'up': inst_depth.long_name = 'sensor height above datum' inst_depth.datum = sensor_vertical_datum or 'Unknown' if verticals and verticals.size > 0: inst_depth[:] = verticals[0] else: inst_depth[:] = self.vertical_fill elif len(used_values.shape) == 2: var = nc.createVariable(variable_name, used_values.dtype, ( "time", "z", ), fill_value=fillvalue, chunksizes=( 1000, self.z.size, ), zlib=True) var.coordinates = "{} {} latitude longitude".format( self.time_axis_name, self.vertical_axis_name) else: raise ValueError( "Could not create variable. Shape of data is {!s}. Expected a dimension of 1 or 2, not {!s}." .format(used_values.shape, len(used_values.shape))) # Set the variable attributes as passed in if attributes: for k, v in attributes.items(): if k == 'vertical_datum' and sensor_vertical_datum is None and v is not None: # Use this as the vertical datum if it is specified and we didn't already have one try: self.crs.geoid_name = v self.crs.vertical_datum = v self.crs.water_surface_reference_datum = v except AttributeError: pass if k not in ['name', 'coordinates', '_FillValue' ] and v is not None: try: var.setncattr(k, v) except BaseException: logger.info( 'Could not add attribute {}: {}, skipping.'. format(k, v)) var.grid_mapping = 'crs' var[:] = used_values return var
import pytz try: import pyncml except ImportError: raise ImportError("You must install the 'pyncml' library to use this functionality.") import netCDF4 import numpy as np from pyaxiom.utils import DotDict from pyaxiom import logger try: from nco import Nco except ImportError: logger.warning("NCO not found. The NCO python bindings are required to use 'Collection.combine'.") class Collection(object): @classmethod def from_ncml_file(cls, ncml_path, apply_to_members=None): try: with open(ncml_path) as f: return cls(pyncml.scan(f.read(), apply_to_members=apply_to_members)) except BaseException: logger.exception("Could not load Collection from NcML. Please check the NcML.") @classmethod def from_directory(cls, directory, suffix=".nc", subdirs=True, dimName='time', apply_to_members=None):
def add_variable(self, variable_name, values, times=None, verticals=None, sensor_vertical_datum=None, attributes=None, unlink_from_profile=None, fillvalue=None, raise_on_error=False, create_instrument_variable=False): if isinstance(values, (list, tuple,)) and values: values = np.asarray(values) if get_type(values) == np.int64: # Create values as int32 because DAP does not support int64 until DAP4. values = values.astype(np.int32) if isinstance(times, (list, tuple,)) and times: times = np.asarray(times) if get_type(times) == np.int64: # Create time as int32 because DAP does not support int64 until DAP4. times = times.astype(np.int32) if isinstance(verticals, (list, tuple,)) and verticals: verticals = np.asarray(verticals) if get_type(verticals) == np.int64: # Create verticals as int32 because DAP does not support int64 until DAP4. verticals = verticals.astype(np.int32) # Set vertical datum on the CRS variable if sensor_vertical_datum is not None: try: self.crs.geoid_name = sensor_vertical_datum self.crs.vertical_datum = sensor_vertical_datum self.crs.water_surface_reference_datum = sensor_vertical_datum if not hasattr(self._nc, "geospatial_bounds_vertical_crs"): self._nc.setncattr("geospatial_bounds_vertical_crs", sensor_vertical_datum) except AttributeError: pass # Set default fillvalue for new variables if fillvalue is None: fillvalue = -9999.9 fillvalue = values.dtype.type(fillvalue) used_values = None vertical_axis = self._nc.variables.get(self.vertical_axis_name) try: if unlink_from_profile is True: used_values = np.ma.reshape(values, (self.time.size, )) used_values = used_values[self.time_indexes] # These next two cases should work for all but a few cases, which are caught below elif vertical_axis.size == 1: used_values = np.ma.reshape(values, (self.time.size, )) used_values = used_values[self.time_indexes] else: used_values = np.ma.reshape(values, (self.time.size, vertical_axis.size, )) used_values = used_values[self.time_indexes] try: used_values = used_values[:, self.vertical_indexes] except IndexError: # The vertical values most likely had duplicates. Ignore the # falty index here and try to save the values as is. pass except ValueError: if raise_on_error is True: raise else: logger.warning("Could not do a simple reshape of data, trying to match manually! Time:{!s}, Heights:{!s}, Values:{!s}".format(self.time.size, vertical_axis.size, values.size)) if vertical_axis.size > 1: if times is not None and verticals is not None: # Hmmm, we have two actual height values for this station. # Not cool man, not cool. # Reindex the entire values array. This is slow. indexed = ((bisect.bisect_left(self.time[:], times[i]), bisect.bisect_left(vertical_axis[:], verticals[i]), values[i]) for i in range(values.size)) used_values = np.ndarray((self.time.size, vertical_axis.size, ), dtype=get_type(values)) used_values.fill(fillvalue) for (tzi, zzi, vz) in indexed: if zzi < vertical_axis.size and tzi < self.time.size: used_values[tzi, zzi] = vz del indexed else: raise ValueError("You need to pass in both 'times' and 'verticals' parameters that matches the size of the 'values' parameter.") else: if times is not None: # Ugh, find the time indexes manually indexed = ((bisect.bisect_left(self.time[:], times[i]), values[i]) for i in range(values.size)) used_values = np.ndarray((self.time.size, ), dtype=get_type(values)) used_values.fill(fillvalue) for (tzi, vz) in indexed: if tzi < self.time.size: used_values[tzi] = vz del indexed else: raise ValueError("You need to pass in a 'times' parameter that matches the size of the 'values' parameter.") logger.info("Setting values for {}...".format(variable_name)) if len(used_values.shape) == 1: var = self._nc.createVariable(variable_name, get_type(used_values), ("time",), fill_value=fillvalue, chunksizes=(self.time_chunk,), zlib=True) self._nc.setncattr('ncei_template_version', 'NCEI_NetCDF_TimeSeries_Orthogonal_Template_v2.0') if vertical_axis.size == 1: var.coordinates = "{} {} latitude longitude".format(self.time_axis_name, self.vertical_axis_name) else: # This is probably a bottom sensor on an ADCP or something, don't add the height coordinate var.coordinates = "{} latitude longitude".format(self.time_axis_name) if unlink_from_profile is True: # Create metadata variable for the sensor_depth if verticals is not None and self._nc.variables.get('sensor_depth') is None: logger.info("Setting the special case 'sensor_depth' metadata variable") inst_depth = self._nc.createVariable('sensor_depth', get_type(verticals)) inst_depth.units = 'm' inst_depth.standard_name = 'surface_altitude' inst_depth.positive = self.vertical_positive if self.vertical_positive.lower() == 'down': inst_depth.long_name = 'sensor depth below datum' elif self.vertical_positive.lower() == 'up': inst_depth.long_name = 'sensor height above datum' inst_depth.datum = sensor_vertical_datum or 'Unknown' if verticals and verticals.size > 0: inst_depth[:] = verticals[0] else: inst_depth[:] = self.vertical_fill elif len(used_values.shape) == 2: var = self._nc.createVariable(variable_name, get_type(used_values), ("time", "z",), fill_value=fillvalue, chunksizes=(self.time_chunk, vertical_axis.size,), zlib=True) var.coordinates = "{} {} latitude longitude".format(self.time_axis_name, self.vertical_axis_name) self._nc.setncattr('ncei_template_version', 'NCEI_NetCDF_TimeSeriesProfile_Orthogonal_Template_v2.0') else: raise ValueError("Could not create variable. Shape of data is {!s}. Expected a dimension of 1 or 2, not {!s}.".format(used_values.shape, len(used_values.shape))) # Set missing_value as well attributes = attributes or {} attributes['missing_value'] = fillvalue # Set the variable attributes as passed in if attributes: for k, v in attributes.items(): if k == 'vertical_datum' and sensor_vertical_datum is None and v is not None: # Use this as the vertical datum if it is specified and we didn't already have one try: self.crs.geoid_name = v self.crs.vertical_datum = v self.crs.water_surface_reference_datum = v if not hasattr(self._nc, "geospatial_bounds_vertical_crs"): self._nc.setncattr("geospatial_bounds_vertical_crs", v) except AttributeError: pass if k not in ['name', 'coordinates', '_FillValue'] and v is not None: try: var.setncattr(k, v) except BaseException: logger.info('Could not add attribute {}: {}, skipping.'.format(k, v)) # Add a long name if it doesn't exist if not hasattr(var, 'long_name'): varunits = getattr(var, 'units', None) vartitle = getattr(var, 'standard_name', getattr(var, 'name')) vartitle = vartitle.title().replace('_', ' ') if varunits is not None: vartitle = '{} ({})'.format(vartitle, varunits) var.long_name = vartitle var.grid_mapping = 'crs' var.platform = 'platform' var.ancillary_variables = 'platform' var.coverage_content_type = 'physicalMeasurement' var[:] = used_values if create_instrument_variable is True: self.add_instrument_variable(variable_name) self._nc.sync() del used_values return var
def add_variable(self, variable_name, values, times=None, verticals=None, sensor_vertical_datum=None, attributes=None, unlink_from_profile=None, fillvalue=None, raise_on_error=False): if isinstance(values, (list, tuple,)) and values: values = np.asarray(values) if isinstance(times, (list, tuple,)) and times: times = np.asarray(times) if isinstance(verticals, (list, tuple,)) and verticals: verticals = np.asarray(verticals) # Set vertical datum on the CRS variable if sensor_vertical_datum is not None: try: self.crs.geoid_name = sensor_vertical_datum self.crs.vertical_datum = sensor_vertical_datum self.crs.water_surface_reference_datum = sensor_vertical_datum except AttributeError: pass # Set default fillvalue for new variables if fillvalue is None: fillvalue = -9999.9 used_values = None try: if unlink_from_profile is True: used_values = np.ma.reshape(values, (self.time.size, )) used_values = used_values[self.time_indexes] # These next two cases should work for all but a few cases, which are caught below elif self.z.size == 1: used_values = np.ma.reshape(values, (self.time.size, )) used_values = used_values[self.time_indexes] else: used_values = np.ma.reshape(values, (self.time.size, self.z.size, )) used_values = used_values[self.time_indexes] try: used_values = used_values[:, self.vertical_indexes] except IndexError: # The vertical values most likely had duplicates. Ignore the # falty index here and try to save the values as is. pass except ValueError: if raise_on_error is True: raise else: logger.warning("Could not do a simple reshape of data, trying to match manually! Time:{!s}, Heights:{!s}, Values:{!s}".format(self.time.size, self.z.size, values.size)) if self.z.size > 1: if times is not None and verticals is not None: # Hmmm, we have two actual height values for this station. # Not cool man, not cool. # Reindex the entire values array. This is slow. indexed = ((bisect.bisect_left(self.time[:], times[i]), bisect.bisect_left(self.z[:], verticals[i]), values[i]) for i in range(values.size)) used_values = np.ndarray((self.time.size, self.z.size, ), dtype=values.dtype) used_values.fill(float(fillvalue)) for (tzi, zzi, vz) in indexed: if zzi < self.z.size and tzi < self.time.size: used_values[tzi, zzi] = vz else: raise ValueError("You need to pass in both 'times' and 'verticals' parameters that matches the size of the 'values' parameter.") else: if times is not None: # Ugh, find the time indexes manually indexed = ((bisect.bisect_left(self.time[:], times[i]), values[i]) for i in range(values.size)) used_values = np.ndarray((self.time.size, ), dtype=values.dtype) used_values.fill(float(fillvalue)) for (tzi, vz) in indexed: if tzi < self.time.size: used_values[tzi] = vz else: raise ValueError("You need to pass in a 'times' parameter that matches the size of the 'values' parameter.") with EnhancedDataset(self.out_file, 'a') as nc: logger.info("Setting values for {}...".format(variable_name)) if len(used_values.shape) == 1: var = nc.createVariable(variable_name, used_values.dtype, ("time",), fill_value=fillvalue, chunksizes=(1000,), zlib=True) if self.z.size == 1: var.coordinates = "{} {} latitude longitude".format(self.time_axis_name, self.vertical_axis_name) else: # This is probably a bottom sensor on an ADCP or something, don't add the height coordinate var.coordinates = "{} latitude longitude".format(self.time_axis_name) if unlink_from_profile is True: # Create metadata variable for the sensor_depth if nc.variables.get('sensor_depth') is None: logger.info("Setting the special case 'sensor_depth' metadata variable") inst_depth = nc.createVariable('sensor_depth', 'f4') inst_depth.units = 'm' inst_depth.standard_name = 'surface_altitude' inst_depth.positive = self.vertical_positive if self.vertical_positive.lower() == 'down': inst_depth.long_name = 'sensor depth below datum' elif self.vertical_positive.lower() == 'up': inst_depth.long_name = 'sensor height above datum' inst_depth.datum = sensor_vertical_datum or 'Unknown' if verticals and verticals.size > 0: inst_depth[:] = verticals[0] else: inst_depth[:] = self.vertical_fill elif len(used_values.shape) == 2: var = nc.createVariable(variable_name, used_values.dtype, ("time", "z",), fill_value=fillvalue, chunksizes=(1000, self.z.size,), zlib=True) var.coordinates = "{} {} latitude longitude".format(self.time_axis_name, self.vertical_axis_name) else: raise ValueError("Could not create variable. Shape of data is {!s}. Expected a dimension of 1 or 2, not {!s}.".format(used_values.shape, len(used_values.shape))) # Set the variable attributes as passed in if attributes: for k, v in attributes.items(): if k == 'vertical_datum' and sensor_vertical_datum is None and v is not None: # Use this as the vertical datum if it is specified and we didn't already have one try: self.crs.geoid_name = v self.crs.vertical_datum = v self.crs.water_surface_reference_datum = v except AttributeError: pass if k not in ['name', 'coordinates', '_FillValue'] and v is not None: try: var.setncattr(k, v) except BaseException: logger.info('Could not add attribute {}: {}, skipping.'.format(k, v)) var.grid_mapping = 'crs' var[:] = used_values return var
def from_dataframe(df, output_directory, output_filename, latitude, longitude, station_name, global_attributes, variable_name, variable_attributes, sensor_vertical_datum=None, fillvalue=None, data_column=None, vertical_axis_name=None, vertical_positive=None): if fillvalue is None: fillvalue = -9999.9 if data_column is None: data_column = 'value' df[data_column] = df[data_column].fillna(fillvalue) times = np.asarray( [calendar.timegm(x.utctimetuple()) for x in df['time']]) df['depth'] = df['depth'].fillna(fillvalue) depths = df['depth'].values try: ts = TimeSeries(output_directory, latitude, longitude, station_name, global_attributes, times=times, verticals=depths, output_filename=output_filename, vertical_fill=fillvalue, vertical_axis_name=vertical_axis_name, vertical_positive=vertical_positive) ts.add_variable(variable_name, df[data_column].values, attributes=variable_attributes, sensor_vertical_datum=sensor_vertical_datum, raise_on_error=True) except ValueError: logger.warning( "Failed first attempt, trying again with unique times.") try: # Try uniquing time newtimes = np.unique(times) ts = TimeSeries(output_directory, latitude, longitude, station_name, global_attributes, times=newtimes, verticals=depths, output_filename=output_filename, vertical_fill=fillvalue, vertical_axis_name=vertical_axis_name, vertical_positive=vertical_positive) ts.add_variable(variable_name, df[data_column].values, attributes=variable_attributes, sensor_vertical_datum=sensor_vertical_datum, raise_on_error=True) except ValueError: logger.warning( "Failed second attempt, trying again with unique depths.") try: # Try uniquing depths newdepths = np.unique(df['depth'].values) ts = TimeSeries(output_directory, latitude, longitude, station_name, global_attributes, times=times, verticals=newdepths, output_filename=output_filename, vertical_fill=fillvalue, vertical_axis_name=vertical_axis_name, vertical_positive=vertical_positive) ts.add_variable( variable_name, df[data_column].values, attributes=variable_attributes, sensor_vertical_datum=sensor_vertical_datum, raise_on_error=True) except ValueError: logger.warning( "Failed third attempt, uniquing time and depth.") try: # Unique both time and depth newdepths = np.unique(df['depth'].values) ts = TimeSeries(output_directory, latitude, longitude, station_name, global_attributes, times=newtimes, verticals=newdepths, output_filename=output_filename, vertical_fill=fillvalue, vertical_axis_name=vertical_axis_name, vertical_positive=vertical_positive) ts.add_variable( variable_name, df[data_column].values, attributes=variable_attributes, sensor_vertical_datum=sensor_vertical_datum, raise_on_error=True) except ValueError: logger.warning( "Failed fourth attempt, manually matching indexes (this is slow)." ) # Manually match ts = TimeSeries(output_directory, latitude, longitude, station_name, global_attributes, times=times, verticals=depths, output_filename=output_filename, vertical_fill=fillvalue, vertical_axis_name=vertical_axis_name, vertical_positive=vertical_positive) ts.add_variable( variable_name, df[data_column].values, attributes=variable_attributes, times=times, verticals=depths, sensor_vertical_datum=sensor_vertical_datum, raise_on_error=False) return ts