Python parse_qc Examples

Programming Language: Python

Namespace/Package Name: ooi_data_explorations.qartod.qc_processing

Method/Function: parse_qc

Examples at hotexamples.com: 4

Python parse_qc - 4 examples found. These are the top rated real world Python examples of ooi_data_explorations.qartod.qc_processing.parse_qc extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

def flort_wfp(ds, grid=False):
    """
    Takes FLORT data recorded by the Wire-Following Profilers (used by CGSN/EA
    as part of the coastal and global arrays) and cleans up the data set to
    make it more user-friendly.  Primary task is renaming parameters and
    dropping some that are of limited use. Additionally, re-organize some of
    the variables to permit better assessments of the data.

    :param ds: initial FLORT data set downloaded from OOI via the M2M system
    :param grid: boolean flag for whether the data should be gridded
    :return ds: cleaned up data set
    """
    # drop some of the variables:
    #   internal_timestamp == superseded by time, redundant so can remove
    #   suspect_timestamp = not used
    #   measurement_wavelength_* == metadata, move into variable attributes.
    #   seawater_scattering_coefficient == not used
    #   raw_internal_temp == not available, NaN filled
    ds = ds.reset_coords()
    ds = ds.drop([
        'internal_timestamp', 'suspect_timestamp',
        'measurement_wavelength_beta', 'measurement_wavelength_cdom',
        'measurement_wavelength_chl', 'raw_internal_temp'
    ])

    # lots of renaming here to get a better defined data set with cleaner attributes
    rename = {
        'int_ctd_pressure': 'seawater_pressure',
        'ctdpf_ckl_seawater_temperature': 'seawater_temperature',
        'raw_signal_chl': 'raw_chlorophyll',
        'fluorometric_chlorophyll_a': 'estimated_chlorophyll',
        'fluorometric_chlorophyll_a_qc_executed':
        'estimated_chlorophyll_qc_executed',
        'fluorometric_chlorophyll_a_qc_results':
        'estimated_chlorophyll_qc_results',
        'raw_signal_cdom': 'raw_cdom',
        'raw_signal_beta': 'raw_backscatter',
        'total_volume_scattering_coefficient': 'beta_700',
        'total_volume_scattering_coefficient_qc_executed':
        'beta_700_qc_executed',
        'total_volume_scattering_coefficient_qc_results':
        'beta_700_qc_results',
        'optical_backscatter': 'bback',
        'optical_backscatter_qc_executed': 'bback_qc_executed',
        'optical_backscatter_qc_results': 'bback_qc_results',
    }
    ds = ds.rename(rename)

    # reset some attributes
    for key, value in ATTRS.items():
        for atk, atv in value.items():
            if key in ds.variables:
                ds[key].attrs[atk] = atv

    # add the original variable name as an attribute, if renamed
    for key, value in rename.items():
        ds[value].attrs['ooinet_variable_name'] = key

    # parse the OOI QC variables and add QARTOD style QC summary flags to the data, converting the
    # bitmap represented flags into an integer value representing pass == 1, suspect or of high
    # interest == 3, and fail == 4.
    ds = parse_qc(ds)

    # create qc flags for the data and add them to the OOI qc flags
    beta_flag, cdom_flag, chl_flag = quality_checks(ds)
    ds['beta_700_qc_summary_flag'] = ('time', (np.array(
        [ds.beta_700_qc_summary_flag, beta_flag])).max(axis=0, initial=1))
    ds['fluorometric_cdom_qc_summary_flag'] = ('time', (np.array(
        [ds.fluorometric_cdom_qc_summary_flag, cdom_flag])).max(axis=0,
                                                                initial=1))
    ds['estimated_chlorophyll_qc_summary_flag'] = ('time', (np.array(
        [ds.estimated_chlorophyll_qc_summary_flag, chl_flag])).max(axis=0,
                                                                   initial=1))

    if grid:
        # clear out any duplicate time stamps
        _, index = np.unique(ds['time'], return_index=True)
        ds = ds.isel(time=index)

        # since the scipy griddata function cannot use the time values as is (get converted to nanoseconds, which
        # is too large of a value), we need to temporarily convert them to a floating point number in days since
        # the start of the data record; we can then use that temporary date/time array for the gridding.
        base_time = ds['time'].min().values
        dt = (ds['time'] - base_time).astype(float) / 1e9 / 60 / 60 / 24

        # construct the new grid, using 1 m depth bins from 30 to 510 m, and daily intervals from the start of
        # the record to the end (centered on noon UTC).
        depth_range = np.arange(30, 511, 1)
        time_range = np.arange(0.5, np.ceil(dt.max()) + 0.5, 1)
        gridded_time = base_time.astype('M8[D]') + pd.to_timedelta(time_range,
                                                                   unit='D')

        # grid the data, adding the results to a list of data arrays
        gridded = []
        for v in ds.variables:
            if v not in ['time', 'depth']:
                # grid the data for each variable
                gdata = griddata((dt.values, ds['depth'].values),
                                 ds[v].values,
                                 (time_range[None, :], depth_range[:, None]),
                                 method='linear')

                # add the data to a data array
                da = xr.DataArray(name=v,
                                  data=gdata,
                                  coords=[("depth", depth_range),
                                          ("time", gridded_time)])
                da.attrs = ds[v].attrs

                # reset the data types and fill values for floats and ints
                if ds[v].dtype == np.dtype(int):
                    da = da.where(np.isnan is True, FILL_INT)
                    da.attrs['_FillValue'] = FILL_INT
                    da = da.astype(int)
                else:
                    da.attrs['_FillValue'] = np.nan
                    da = da.astype(float)

                # add to the list
                gridded.append(da)

        # recombine the gridded data arrays into a single dataset
        gridded = xr.merge(gridded)
        gridded.attrs = ds.attrs
        ds = gridded

    return ds

Example #2

Show file

def flort_instrument(ds):
    """
    Takes flort data recorded by the Sea-Bird Electronics SBE16Plus used in the
    CGSN/EA moorings and cleans up the data set to make it more user-friendly.
    Primary task is renaming parameters and dropping some that are of limited
    use. Additionally, re-organize some of the variables to permit better
    assessments of the data.

    :param ds: initial flort data set downloaded from OOI via the M2M system
    :return ds: cleaned up data set
    """
    # drop some of the variables:
    #   internal_timestamp == superseded by time, redundant so can remove
    #   suspect_timestamp = not used
    #   measurement_wavelength_* == metadata, move into variable attributes.
    #   pressure_depth == variable assigned if this was a FLORT on a CSPP, not with moorings
    ds = ds.reset_coords()
    ds = ds.drop([
        'internal_timestamp', 'suspect_timestamp',
        'measurement_wavelength_beta', 'measurement_wavelength_cdom',
        'measurement_wavelength_chl'
    ])

    # lots of renaming here to get a better defined data set with cleaner attributes
    rename = {
        'temp': 'seawater_temperature',
        'raw_signal_chl': 'raw_chlorophyll',
        'fluorometric_chlorophyll_a': 'estimated_chlorophyll',
        'fluorometric_chlorophyll_a_qc_executed':
        'estimated_chlorophyll_qc_executed',
        'fluorometric_chlorophyll_a_qc_results':
        'estimated_chlorophyll_qc_results',
        'raw_signal_cdom': 'raw_cdom',
        'raw_signal_beta': 'raw_backscatter',
        'total_volume_scattering_coefficient': 'beta_700',
        'total_volume_scattering_coefficient_qc_executed':
        'beta_700_qc_executed',
        'total_volume_scattering_coefficient_qc_results':
        'beta_700_qc_results',
        'optical_backscatter': 'bback',
        'optical_backscatter_qc_executed': 'bback_qc_executed',
        'optical_backscatter_qc_results': 'bback_qc_results',
    }
    ds = ds.rename(rename)

    # reset some attributes
    for key, value in ATTRS.items():
        for atk, atv in value.items():
            if key in ds.variables:
                ds[key].attrs[atk] = atv

    # add the original variable name as an attribute, if renamed
    for key, value in rename.items():
        ds[value].attrs['ooinet_variable_name'] = key

    # check if the raw data for all three channels is 0, if so the FLORT wasn't talking to the CTD and these are
    # all just fill values that can be removed.
    ds = ds.where(
        ds['raw_backscatter'] + ds['raw_cdom'] + ds['raw_chlorophyll'] > 0,
        drop=True)
    if len(ds.time) == 0:
        # this was one of those deployments where the FLORT was never able to communicate with the CTD.
        warnings.warn(
            'Communication failure between the FLORT and the CTDBP. No data was recorded.'
        )
        return None

    # parse the OOI QC variables and add QARTOD style QC summary flags to the data, converting the
    # bitmap represented flags into an integer value representing pass == 1, suspect or of high
    # interest == 3, and fail == 4.
    ds = parse_qc(ds)

    # create qc flags for the data and add them to the OOI qc flags
    beta_flag, cdom_flag, chl_flag = quality_checks(ds)
    ds['beta_700_qc_summary_flag'] = ('time', (np.array(
        [ds.beta_700_qc_summary_flag, beta_flag])).max(axis=0, initial=1))
    ds['fluorometric_cdom_qc_summary_flag'] = ('time', (np.array(
        [ds.fluorometric_cdom_qc_summary_flag, cdom_flag])).max(axis=0,
                                                                initial=1))
    ds['estimated_chlorophyll_qc_summary_flag'] = ('time', (np.array(
        [ds.estimated_chlorophyll_qc_summary_flag, chl_flag])).max(axis=0,
                                                                   initial=1))

    return ds

Example #3

Show file

def flort_cspp(ds):
    """
    Takes FLORT data recorded by the CSPP loggers used by the Endurance Array
    and cleans up the data set to make it more user-friendly.  Primary task is
    renaming parameters and dropping some that are of limited use. Additionally,
    re-organize some of the variables to permit better assessments of the data.

    :param ds: initial FLORT data set downloaded from OOI via the M2M system
    :return ds: cleaned up data set
    """
    # drop some of the variables:
    #   internal_timestamp == superseded by time, redundant so can remove
    #   suspect_timestamp = not used
    #   measurement_wavelength_* == metadata, move into variable attributes.
    #   seawater_scattering_coefficient == not used
    ds = ds.reset_coords()
    ds = ds.drop([
        'internal_timestamp', 'suspect_timestamp',
        'measurement_wavelength_beta', 'measurement_wavelength_cdom',
        'measurement_wavelength_chl'
    ])

    # lots of renaming here to get a better defined data set with cleaner attributes
    rename = {
        'pressure': 'seawater_pressure',
        'pressure_qc_executed': 'seawater_pressure_qc_executed',
        'pressure_qc_results': 'seawater_pressure_qc_results',
        'temperature': 'seawater_temperature',
        'salinity': 'practical_salinity',
        'raw_signal_chl': 'raw_chlorophyll',
        'fluorometric_chlorophyll_a': 'estimated_chlorophyll',
        'fluorometric_chlorophyll_a_qc_executed':
        'estimated_chlorophyll_qc_executed',
        'fluorometric_chlorophyll_a_qc_results':
        'estimated_chlorophyll_qc_results',
        'raw_signal_cdom': 'raw_cdom',
        'raw_signal_beta': 'raw_backscatter',
        'total_volume_scattering_coefficient': 'beta_700',
        'total_volume_scattering_coefficient_qc_executed':
        'beta_700_qc_executed',
        'total_volume_scattering_coefficient_qc_results':
        'beta_700_qc_results',
        'optical_backscatter': 'bback',
        'optical_backscatter_qc_executed': 'bback_qc_executed',
        'optical_backscatter_qc_results': 'bback_qc_results',
    }
    ds = ds.rename(rename)

    # reset some attributes
    for key, value in ATTRS.items():
        for atk, atv in value.items():
            if key in ds.variables:
                ds[key].attrs[atk] = atv

    # add the original variable name as an attribute, if renamed
    for key, value in rename.items():
        ds[value].attrs['ooinet_variable_name'] = key

    # parse the OOI QC variables and add QARTOD style QC summary flags to the data, converting the
    # bitmap represented flags into an integer value representing pass == 1, suspect or of high
    # interest == 3, and fail == 4.
    ds = parse_qc(ds)

    # create qc flags for the data and add them to the OOI qc flags
    beta_flag, cdom_flag, chl_flag = quality_checks(ds)
    ds['beta_700_qc_summary_flag'] = ('time', (np.array(
        [ds.beta_700_qc_summary_flag, beta_flag])).max(axis=0, initial=1))
    ds['fluorometric_cdom_qc_summary_flag'] = ('time', (np.array(
        [ds.fluorometric_cdom_qc_summary_flag, cdom_flag])).max(axis=0,
                                                                initial=1))
    ds['estimated_chlorophyll_qc_summary_flag'] = ('time', (np.array(
        [ds.estimated_chlorophyll_qc_summary_flag, chl_flag])).max(axis=0,
                                                                   initial=1))

    return ds

Example #4

Show file

def flort_datalogger(ds, burst=False):
    """
    Takes flort data recorded by the data loggers used in the CGSN/EA moorings
    and cleans up the data set to make it more user-friendly.  Primary task is
    renaming parameters and dropping some that are of limited use. Additionally,
    re-organize some of the variables to permit better assessments of the data.

    :param ds: initial flort data set downloaded from OOI via the M2M system
    :param burst: resample the data to the defined time interval
    :return ds: cleaned up data set
    """
    # drop some of the variables:
    #   internal_timestamp == superseded by time, redundant so can remove
    #   suspect_timestamp = not used
    #   measurement_wavelength_* == metadata, move into variable attributes.
    #   pressure_depth == variable assigned if this was a FLORT on a CSPP, not with moorings
    ds = ds.drop([
        'internal_timestamp', 'suspect_timestamp',
        'measurement_wavelength_beta', 'measurement_wavelength_cdom',
        'measurement_wavelength_chl'
    ])

    # check for data from a co-located CTD, if not present add it and reset the fill value for the optical
    # backscatter derived values
    if 'temp' not in ds.variables:
        ds['temp'] = ('time', ds['deployment'] * np.nan)
        ds['practical_salinity'] = ('time', ds['deployment'] * np.nan)
        ds['optical_backscatter'] = ds['optical_backscatter'] * np.nan
        ds['seawater_scattering_coefficient'] = ds[
            'seawater_scattering_coefficient'] * np.nan

    # lots of renaming here to get a better defined data set with cleaner attributes
    rename = {
        'temp': 'seawater_temperature',
        'raw_signal_chl': 'raw_chlorophyll',
        'fluorometric_chlorophyll_a': 'estimated_chlorophyll',
        'fluorometric_chlorophyll_a_qc_executed':
        'estimated_chlorophyll_qc_executed',
        'fluorometric_chlorophyll_a_qc_results':
        'estimated_chlorophyll_qc_results',
        'raw_signal_cdom': 'raw_cdom',
        'raw_signal_beta': 'raw_backscatter',
        'total_volume_scattering_coefficient': 'beta_700',
        'total_volume_scattering_coefficient_qc_executed':
        'beta_700_qc_executed',
        'total_volume_scattering_coefficient_qc_results':
        'beta_700_qc_results',
        'optical_backscatter': 'bback',
        'optical_backscatter_qc_executed': 'bback_qc_executed',
        'optical_backscatter_qc_results': 'bback_qc_results',
    }
    ds = ds.rename(rename)

    # reset some attributes
    for key, value in ATTRS.items():
        for atk, atv in value.items():
            if key in ds.variables:
                ds[key].attrs[atk] = atv

    # add the original variable name as an attribute, if renamed
    for key, value in rename.items():
        ds[value].attrs['ooinet_variable_name'] = key

    # parse the OOI QC variables and add QARTOD style QC summary flags to the data, converting the
    # bitmap represented flags into an integer value representing pass == 1, suspect or of high
    # interest == 3, and fail == 4.
    ds = parse_qc(ds)

    # create QC flags for the data and add them to the OOI QC summary flags
    beta_flag, cdom_flag, chl_flag = quality_checks(ds)
    ds['beta_700_qc_summary_flag'] = ('time', (np.array(
        [ds.beta_700_qc_summary_flag, beta_flag])).max(axis=0, initial=1))
    ds['fluorometric_cdom_qc_summary_flag'] = ('time', (np.array(
        [ds.fluorometric_cdom_qc_summary_flag, cdom_flag])).max(axis=0,
                                                                initial=1))
    ds['estimated_chlorophyll_qc_summary_flag'] = ('time', (np.array(
        [ds.estimated_chlorophyll_qc_summary_flag, chl_flag])).max(axis=0,
                                                                   initial=1))

    if burst:
        # re-sample the data collected in burst mode using a 15-minute median average
        burst = ds.resample(time='900s', skipna=True).median(dim='time',
                                                             keep_attrs=True)

        # for each of the three FLORT measurements, calculate stats (min, max, and the standard deviation)
        # for each of the bursts
        cdom = ds['fluorometric_cdom'].resample(time='900s', skipna=True)
        cdom = np.array([
            cdom.min('time').values,
            cdom.max('time').values,
            cdom.std('time').values
        ])

        chl = ds['estimated_chlorophyll'].resample(time='900s', skipna=True)
        chl = np.array([
            chl.min('time').values,
            chl.max('time').values,
            chl.std('time').values
        ])

        beta = ds['beta_700'].resample(time='900s', skipna=True)
        beta = np.array([
            beta.min('time').values,
            beta.max('time').values,
            beta.std('time').values
        ])

        # create a data set with the burst statistics for the variables
        stats = xr.Dataset(
            {
                'fluorometric_cdom_burst_stats': (['time', 'stats'], cdom.T),
                'estimated_chlorophyll_burst_stats':
                (['time', 'stats'], chl.T),
                'beta_700_burst_stats': (['time', 'stats'], beta.T)
            },
            coords={
                'time': burst['time'],
                'stats': np.arange(0, 3).astype('int32')
            })

        # add the stats into the burst averaged data set, and then remove the missing rows
        burst = burst.merge(stats)
        burst = burst.where(~np.isnan(burst.deployment), drop=True)

        # save the newly average data
        ds = burst

    return ds