def flort_wfp(ds, grid=False): """ Takes FLORT data recorded by the Wire-Following Profilers (used by CGSN/EA as part of the coastal and global arrays) and cleans up the data set to make it more user-friendly. Primary task is renaming parameters and dropping some that are of limited use. Additionally, re-organize some of the variables to permit better assessments of the data. :param ds: initial FLORT data set downloaded from OOI via the M2M system :param grid: boolean flag for whether the data should be gridded :return ds: cleaned up data set """ # drop some of the variables: # internal_timestamp == superseded by time, redundant so can remove # suspect_timestamp = not used # measurement_wavelength_* == metadata, move into variable attributes. # seawater_scattering_coefficient == not used # raw_internal_temp == not available, NaN filled ds = ds.reset_coords() ds = ds.drop([ 'internal_timestamp', 'suspect_timestamp', 'measurement_wavelength_beta', 'measurement_wavelength_cdom', 'measurement_wavelength_chl', 'raw_internal_temp' ]) # lots of renaming here to get a better defined data set with cleaner attributes rename = { 'int_ctd_pressure': 'seawater_pressure', 'ctdpf_ckl_seawater_temperature': 'seawater_temperature', 'raw_signal_chl': 'raw_chlorophyll', 'fluorometric_chlorophyll_a': 'estimated_chlorophyll', 'fluorometric_chlorophyll_a_qc_executed': 'estimated_chlorophyll_qc_executed', 'fluorometric_chlorophyll_a_qc_results': 'estimated_chlorophyll_qc_results', 'raw_signal_cdom': 'raw_cdom', 'raw_signal_beta': 'raw_backscatter', 'total_volume_scattering_coefficient': 'beta_700', 'total_volume_scattering_coefficient_qc_executed': 'beta_700_qc_executed', 'total_volume_scattering_coefficient_qc_results': 'beta_700_qc_results', 'optical_backscatter': 'bback', 'optical_backscatter_qc_executed': 'bback_qc_executed', 'optical_backscatter_qc_results': 'bback_qc_results', } ds = ds.rename(rename) # reset some attributes for key, value in ATTRS.items(): for atk, atv in value.items(): if key in ds.variables: ds[key].attrs[atk] = atv # add the original variable name as an attribute, if renamed for key, value in rename.items(): ds[value].attrs['ooinet_variable_name'] = key # parse the OOI QC variables and add QARTOD style QC summary flags to the data, converting the # bitmap represented flags into an integer value representing pass == 1, suspect or of high # interest == 3, and fail == 4. ds = parse_qc(ds) # create qc flags for the data and add them to the OOI qc flags beta_flag, cdom_flag, chl_flag = quality_checks(ds) ds['beta_700_qc_summary_flag'] = ('time', (np.array( [ds.beta_700_qc_summary_flag, beta_flag])).max(axis=0, initial=1)) ds['fluorometric_cdom_qc_summary_flag'] = ('time', (np.array( [ds.fluorometric_cdom_qc_summary_flag, cdom_flag])).max(axis=0, initial=1)) ds['estimated_chlorophyll_qc_summary_flag'] = ('time', (np.array( [ds.estimated_chlorophyll_qc_summary_flag, chl_flag])).max(axis=0, initial=1)) if grid: # clear out any duplicate time stamps _, index = np.unique(ds['time'], return_index=True) ds = ds.isel(time=index) # since the scipy griddata function cannot use the time values as is (get converted to nanoseconds, which # is too large of a value), we need to temporarily convert them to a floating point number in days since # the start of the data record; we can then use that temporary date/time array for the gridding. base_time = ds['time'].min().values dt = (ds['time'] - base_time).astype(float) / 1e9 / 60 / 60 / 24 # construct the new grid, using 1 m depth bins from 30 to 510 m, and daily intervals from the start of # the record to the end (centered on noon UTC). depth_range = np.arange(30, 511, 1) time_range = np.arange(0.5, np.ceil(dt.max()) + 0.5, 1) gridded_time = base_time.astype('M8[D]') + pd.to_timedelta(time_range, unit='D') # grid the data, adding the results to a list of data arrays gridded = [] for v in ds.variables: if v not in ['time', 'depth']: # grid the data for each variable gdata = griddata((dt.values, ds['depth'].values), ds[v].values, (time_range[None, :], depth_range[:, None]), method='linear') # add the data to a data array da = xr.DataArray(name=v, data=gdata, coords=[("depth", depth_range), ("time", gridded_time)]) da.attrs = ds[v].attrs # reset the data types and fill values for floats and ints if ds[v].dtype == np.dtype(int): da = da.where(np.isnan is True, FILL_INT) da.attrs['_FillValue'] = FILL_INT da = da.astype(int) else: da.attrs['_FillValue'] = np.nan da = da.astype(float) # add to the list gridded.append(da) # recombine the gridded data arrays into a single dataset gridded = xr.merge(gridded) gridded.attrs = ds.attrs ds = gridded return ds
def flort_instrument(ds): """ Takes flort data recorded by the Sea-Bird Electronics SBE16Plus used in the CGSN/EA moorings and cleans up the data set to make it more user-friendly. Primary task is renaming parameters and dropping some that are of limited use. Additionally, re-organize some of the variables to permit better assessments of the data. :param ds: initial flort data set downloaded from OOI via the M2M system :return ds: cleaned up data set """ # drop some of the variables: # internal_timestamp == superseded by time, redundant so can remove # suspect_timestamp = not used # measurement_wavelength_* == metadata, move into variable attributes. # pressure_depth == variable assigned if this was a FLORT on a CSPP, not with moorings ds = ds.reset_coords() ds = ds.drop([ 'internal_timestamp', 'suspect_timestamp', 'measurement_wavelength_beta', 'measurement_wavelength_cdom', 'measurement_wavelength_chl' ]) # lots of renaming here to get a better defined data set with cleaner attributes rename = { 'temp': 'seawater_temperature', 'raw_signal_chl': 'raw_chlorophyll', 'fluorometric_chlorophyll_a': 'estimated_chlorophyll', 'fluorometric_chlorophyll_a_qc_executed': 'estimated_chlorophyll_qc_executed', 'fluorometric_chlorophyll_a_qc_results': 'estimated_chlorophyll_qc_results', 'raw_signal_cdom': 'raw_cdom', 'raw_signal_beta': 'raw_backscatter', 'total_volume_scattering_coefficient': 'beta_700', 'total_volume_scattering_coefficient_qc_executed': 'beta_700_qc_executed', 'total_volume_scattering_coefficient_qc_results': 'beta_700_qc_results', 'optical_backscatter': 'bback', 'optical_backscatter_qc_executed': 'bback_qc_executed', 'optical_backscatter_qc_results': 'bback_qc_results', } ds = ds.rename(rename) # reset some attributes for key, value in ATTRS.items(): for atk, atv in value.items(): if key in ds.variables: ds[key].attrs[atk] = atv # add the original variable name as an attribute, if renamed for key, value in rename.items(): ds[value].attrs['ooinet_variable_name'] = key # check if the raw data for all three channels is 0, if so the FLORT wasn't talking to the CTD and these are # all just fill values that can be removed. ds = ds.where( ds['raw_backscatter'] + ds['raw_cdom'] + ds['raw_chlorophyll'] > 0, drop=True) if len(ds.time) == 0: # this was one of those deployments where the FLORT was never able to communicate with the CTD. warnings.warn( 'Communication failure between the FLORT and the CTDBP. No data was recorded.' ) return None # parse the OOI QC variables and add QARTOD style QC summary flags to the data, converting the # bitmap represented flags into an integer value representing pass == 1, suspect or of high # interest == 3, and fail == 4. ds = parse_qc(ds) # create qc flags for the data and add them to the OOI qc flags beta_flag, cdom_flag, chl_flag = quality_checks(ds) ds['beta_700_qc_summary_flag'] = ('time', (np.array( [ds.beta_700_qc_summary_flag, beta_flag])).max(axis=0, initial=1)) ds['fluorometric_cdom_qc_summary_flag'] = ('time', (np.array( [ds.fluorometric_cdom_qc_summary_flag, cdom_flag])).max(axis=0, initial=1)) ds['estimated_chlorophyll_qc_summary_flag'] = ('time', (np.array( [ds.estimated_chlorophyll_qc_summary_flag, chl_flag])).max(axis=0, initial=1)) return ds
def flort_cspp(ds): """ Takes FLORT data recorded by the CSPP loggers used by the Endurance Array and cleans up the data set to make it more user-friendly. Primary task is renaming parameters and dropping some that are of limited use. Additionally, re-organize some of the variables to permit better assessments of the data. :param ds: initial FLORT data set downloaded from OOI via the M2M system :return ds: cleaned up data set """ # drop some of the variables: # internal_timestamp == superseded by time, redundant so can remove # suspect_timestamp = not used # measurement_wavelength_* == metadata, move into variable attributes. # seawater_scattering_coefficient == not used ds = ds.reset_coords() ds = ds.drop([ 'internal_timestamp', 'suspect_timestamp', 'measurement_wavelength_beta', 'measurement_wavelength_cdom', 'measurement_wavelength_chl' ]) # lots of renaming here to get a better defined data set with cleaner attributes rename = { 'pressure': 'seawater_pressure', 'pressure_qc_executed': 'seawater_pressure_qc_executed', 'pressure_qc_results': 'seawater_pressure_qc_results', 'temperature': 'seawater_temperature', 'salinity': 'practical_salinity', 'raw_signal_chl': 'raw_chlorophyll', 'fluorometric_chlorophyll_a': 'estimated_chlorophyll', 'fluorometric_chlorophyll_a_qc_executed': 'estimated_chlorophyll_qc_executed', 'fluorometric_chlorophyll_a_qc_results': 'estimated_chlorophyll_qc_results', 'raw_signal_cdom': 'raw_cdom', 'raw_signal_beta': 'raw_backscatter', 'total_volume_scattering_coefficient': 'beta_700', 'total_volume_scattering_coefficient_qc_executed': 'beta_700_qc_executed', 'total_volume_scattering_coefficient_qc_results': 'beta_700_qc_results', 'optical_backscatter': 'bback', 'optical_backscatter_qc_executed': 'bback_qc_executed', 'optical_backscatter_qc_results': 'bback_qc_results', } ds = ds.rename(rename) # reset some attributes for key, value in ATTRS.items(): for atk, atv in value.items(): if key in ds.variables: ds[key].attrs[atk] = atv # add the original variable name as an attribute, if renamed for key, value in rename.items(): ds[value].attrs['ooinet_variable_name'] = key # parse the OOI QC variables and add QARTOD style QC summary flags to the data, converting the # bitmap represented flags into an integer value representing pass == 1, suspect or of high # interest == 3, and fail == 4. ds = parse_qc(ds) # create qc flags for the data and add them to the OOI qc flags beta_flag, cdom_flag, chl_flag = quality_checks(ds) ds['beta_700_qc_summary_flag'] = ('time', (np.array( [ds.beta_700_qc_summary_flag, beta_flag])).max(axis=0, initial=1)) ds['fluorometric_cdom_qc_summary_flag'] = ('time', (np.array( [ds.fluorometric_cdom_qc_summary_flag, cdom_flag])).max(axis=0, initial=1)) ds['estimated_chlorophyll_qc_summary_flag'] = ('time', (np.array( [ds.estimated_chlorophyll_qc_summary_flag, chl_flag])).max(axis=0, initial=1)) return ds
def flort_datalogger(ds, burst=False): """ Takes flort data recorded by the data loggers used in the CGSN/EA moorings and cleans up the data set to make it more user-friendly. Primary task is renaming parameters and dropping some that are of limited use. Additionally, re-organize some of the variables to permit better assessments of the data. :param ds: initial flort data set downloaded from OOI via the M2M system :param burst: resample the data to the defined time interval :return ds: cleaned up data set """ # drop some of the variables: # internal_timestamp == superseded by time, redundant so can remove # suspect_timestamp = not used # measurement_wavelength_* == metadata, move into variable attributes. # pressure_depth == variable assigned if this was a FLORT on a CSPP, not with moorings ds = ds.drop([ 'internal_timestamp', 'suspect_timestamp', 'measurement_wavelength_beta', 'measurement_wavelength_cdom', 'measurement_wavelength_chl' ]) # check for data from a co-located CTD, if not present add it and reset the fill value for the optical # backscatter derived values if 'temp' not in ds.variables: ds['temp'] = ('time', ds['deployment'] * np.nan) ds['practical_salinity'] = ('time', ds['deployment'] * np.nan) ds['optical_backscatter'] = ds['optical_backscatter'] * np.nan ds['seawater_scattering_coefficient'] = ds[ 'seawater_scattering_coefficient'] * np.nan # lots of renaming here to get a better defined data set with cleaner attributes rename = { 'temp': 'seawater_temperature', 'raw_signal_chl': 'raw_chlorophyll', 'fluorometric_chlorophyll_a': 'estimated_chlorophyll', 'fluorometric_chlorophyll_a_qc_executed': 'estimated_chlorophyll_qc_executed', 'fluorometric_chlorophyll_a_qc_results': 'estimated_chlorophyll_qc_results', 'raw_signal_cdom': 'raw_cdom', 'raw_signal_beta': 'raw_backscatter', 'total_volume_scattering_coefficient': 'beta_700', 'total_volume_scattering_coefficient_qc_executed': 'beta_700_qc_executed', 'total_volume_scattering_coefficient_qc_results': 'beta_700_qc_results', 'optical_backscatter': 'bback', 'optical_backscatter_qc_executed': 'bback_qc_executed', 'optical_backscatter_qc_results': 'bback_qc_results', } ds = ds.rename(rename) # reset some attributes for key, value in ATTRS.items(): for atk, atv in value.items(): if key in ds.variables: ds[key].attrs[atk] = atv # add the original variable name as an attribute, if renamed for key, value in rename.items(): ds[value].attrs['ooinet_variable_name'] = key # parse the OOI QC variables and add QARTOD style QC summary flags to the data, converting the # bitmap represented flags into an integer value representing pass == 1, suspect or of high # interest == 3, and fail == 4. ds = parse_qc(ds) # create QC flags for the data and add them to the OOI QC summary flags beta_flag, cdom_flag, chl_flag = quality_checks(ds) ds['beta_700_qc_summary_flag'] = ('time', (np.array( [ds.beta_700_qc_summary_flag, beta_flag])).max(axis=0, initial=1)) ds['fluorometric_cdom_qc_summary_flag'] = ('time', (np.array( [ds.fluorometric_cdom_qc_summary_flag, cdom_flag])).max(axis=0, initial=1)) ds['estimated_chlorophyll_qc_summary_flag'] = ('time', (np.array( [ds.estimated_chlorophyll_qc_summary_flag, chl_flag])).max(axis=0, initial=1)) if burst: # re-sample the data collected in burst mode using a 15-minute median average burst = ds.resample(time='900s', skipna=True).median(dim='time', keep_attrs=True) # for each of the three FLORT measurements, calculate stats (min, max, and the standard deviation) # for each of the bursts cdom = ds['fluorometric_cdom'].resample(time='900s', skipna=True) cdom = np.array([ cdom.min('time').values, cdom.max('time').values, cdom.std('time').values ]) chl = ds['estimated_chlorophyll'].resample(time='900s', skipna=True) chl = np.array([ chl.min('time').values, chl.max('time').values, chl.std('time').values ]) beta = ds['beta_700'].resample(time='900s', skipna=True) beta = np.array([ beta.min('time').values, beta.max('time').values, beta.std('time').values ]) # create a data set with the burst statistics for the variables stats = xr.Dataset( { 'fluorometric_cdom_burst_stats': (['time', 'stats'], cdom.T), 'estimated_chlorophyll_burst_stats': (['time', 'stats'], chl.T), 'beta_700_burst_stats': (['time', 'stats'], beta.T) }, coords={ 'time': burst['time'], 'stats': np.arange(0, 3).astype('int32') }) # add the stats into the burst averaged data set, and then remove the missing rows burst = burst.merge(stats) burst = burst.where(~np.isnan(burst.deployment), drop=True) # save the newly average data ds = burst return ds