Ejemplo n.º 1
0
def generate_qartod(site, node, sensor, cut_off):
    """
    Load all of the pH data for a defined reference designator (using the site,
    node and sensor names to construct the reference designator) collected via
    the three data delivery methods of telemetered, recovered host and
    recovered instrument and combine them into a single data set from which
    QARTOD test limits for the gross range and climatology tests can be
    calculated.

    :param site: Site designator, extracted from the first part of the
        reference designator
    :param node: Node designator, extracted from the second part of the
        reference designator
    :param sensor: Sensor designator, extracted from the third and fourth part
        of the reference designator
    :param cut_off: string formatted date to use as cut-off for data to add
        to QARTOD test sets
    :return annotations: Initial list of auto-generated HITL annotations as
        a pandas dataframe
    :return gr_lookup: CSV formatted strings to save to a csv file for the
        QARTOD gross range lookup tables.
    :return clm_lookup: CSV formatted strings to save to a csv file for the
        QARTOD climatology lookup tables.
    :return clm_table: CSV formatted strings to save to a csv file for the
        QARTOD climatology range tables.
    """
    # load and combine all of the data sources for the pH sensor
    data = combine_delivery_methods(site, node, sensor)

    # create a boolean array of the data marked as "fail" by the pH quality checks and generate initial
    # HITL annotations that can be combined with system annotations and pH quality checks to create
    # a cleaned up data set prior to calculating the QARTOD test values
    fail = data.seawater_ph_quality_flag.where(
        data.seawater_ph_quality_flag == 4).notnull()
    blocks = identify_blocks(fail, [24, 24])
    hitl = create_annotations(site, node, sensor, blocks)

    # get the current system annotations for the sensor
    annotations = get_annotations(site, node, sensor)
    annotations = pd.DataFrame(annotations)
    if not annotations.empty:
        annotations = annotations.drop(columns=['@class'])
        annotations['beginDate'] = pd.to_datetime(
            annotations.beginDT, unit='ms').dt.strftime('%Y-%m-%dT%H:%M:%S')
        annotations['endDate'] = pd.to_datetime(
            annotations.endDT, unit='ms').dt.strftime('%Y-%m-%dT%H:%M:%S')

    # append the fail annotations to the existing annotations
    annotations = annotations.append(pd.DataFrame(hitl),
                                     ignore_index=True,
                                     sort=False)

    # create a roll-up annotation flag
    data = add_annotation_qc_flags(data, annotations)

    # clean-up the data, removing values that fail the pH quality checks or were marked as fail in the annotations
    data = data.where((data.seawater_ph_quality_flag != 4)
                      & (data.rollup_annotations_qc_results != 4))

    # if a cut_off date was used, limit data to all data collected up to the cut_off date.
    # otherwise, set the limit to the range of the downloaded data.
    if cut_off:
        cut = parser.parse(cut_off)
        cut = cut.astimezone(pytz.utc)
        end_date = cut.strftime('%Y-%m-%dT%H:%M:%S')
        src_date = cut.strftime('%Y-%m-%d')
    else:
        cut = parser.parse(data.time_coverage_end)
        cut = cut.astimezone(pytz.utc)
        end_date = cut.strftime('%Y-%m-%dT%H:%M:%S')
        src_date = cut.strftime('%Y-%m-%d')

    data = data.sel(time=slice("2014-01-01T00:00:00", end_date))

    # create the initial gross range entry
    gr = process_gross_range(data, ['seawater_ph'], [6.9, 9.0],
                             site=site,
                             node=node,
                             sensor=sensor)

    # re-work gross entry for the different streams and parameter names
    gr_lookup = pd.DataFrame()
    gr_lookup = gr_lookup.append([gr, gr, gr], ignore_index=True)
    gr_lookup['parameter'][0] = {'inp': 'phsen_abcdef_ph_seawater'}
    gr_lookup['stream'][0] = 'phsen_abcdef_dcl_instrument'
    gr_lookup['parameter'][1] = {'inp': 'phsen_abcdef_ph_seawater'}
    gr_lookup['stream'][1] = 'phsen_abcdef_dcl_instrument_recovered'
    gr_lookup['parameter'][2] = {'inp': 'phsen_abcdef_ph_seawater'}
    gr_lookup['stream'][2] = 'phsen_abcdef_instrument'
    gr_lookup['source'] = (
        'Sensor min/max based on the vendor standard calibration range. '
        'The user min/max is the historical mean of all data collected '
        'up to {} +/- 3 standard deviations.'.format(src_date))

    # create and format the climatology entry and table
    cll, clm_table = process_climatology(data, ['seawater_ph'], [6.9, 9.0],
                                         site=site,
                                         node=node,
                                         sensor=sensor)

    # re-work climatology entry for the different streams and parameter names
    clm_lookup = pd.DataFrame()
    clm_lookup = clm_lookup.append([cll, cll, cll])
    clm_lookup['parameters'][0] = {
        'inp': 'phsen_abcdef_ph_seawater',
        'tinp': 'time',
        'zinp': 'None'
    }
    clm_lookup['stream'][0] = 'phsen_abcdef_dcl_instrument'
    clm_lookup['parameters'][1] = {
        'inp': 'phsen_abcdef_ph_seawater',
        'tinp': 'time',
        'zinp': 'None'
    }
    clm_lookup['stream'][1] = 'phsen_abcdef_dcl_instrument_recovered'
    clm_lookup['parameters'][2] = {
        'inp': 'phsen_abcdef_ph_seawater',
        'tinp': 'time',
        'zinp': 'None'
    }
    clm_lookup['stream'][2] = 'phsen_abcdef_instrument'

    return annotations, gr_lookup, clm_lookup, clm_table
Ejemplo n.º 2
0
def generate_qartod(site, node, sensor, cut_off):
    """
    Load all FLORT data for a defined reference designator (using the site,
    node and sensor names to construct the reference designator) and
    collected via the different data delivery methods and combine them into a
    single data set from which QARTOD test limits for the gross range and
    climatology tests can be calculated.

    :param site: Site designator, extracted from the first part of the
        reference designator
    :param node: Node designator, extracted from the second part of the
        reference designator
    :param sensor: Sensor designator, extracted from the third and fourth part
        of the reference designator
    :param cut_off: string formatted date to use as cut-off for data to add
        to QARTOD test sets
    :return gr_lookup: CSV formatted strings to save to a csv file for the
        QARTOD gross range lookup tables.
    :return clm_lookup: CSV formatted strings to save to a csv file for the
        QARTOD climatology lookup tables.
    :return clm_table: CSV formatted strings to save to a csv file for the
        QARTOD climatology range tables.
    """
    # load the combined data for the different sources of FLORT data
    data = combine_delivery_methods(site, node, sensor)

    # create boolean arrays of the data marked as "fail" by the quality checks and generate initial
    # HITL annotations that can be combined with system annotations to create a cleaned up data set
    # prior to calculating the QARTOD test values
    if node == 'WFP01':
        index = 10  # decimate the WFP data so we can process it
    else:
        index = 1
    chl_fail = data.estimated_chlorophyll_qc_summary_flag.where(
        data.estimated_chlorophyll_qc_summary_flag > 3).notnull()
    blocks = identify_blocks(chl_fail[::index], [18, 72])
    chl_hitl = create_annotations(site, node, sensor, blocks)
    chl_hitl['parameters'] = [[22, 1141] for i in chl_hitl['parameters']]

    cdom_fail = data.fluorometric_cdom_qc_summary_flag.where(
        data.fluorometric_cdom_qc_summary_flag > 3).notnull()
    blocks = identify_blocks(cdom_fail[::index], [18, 72])
    cdom_hitl = create_annotations(site, node, sensor, blocks)
    cdom_hitl['parameters'] = [[23, 1143] for i in cdom_hitl['parameters']]

    beta_fail = data.beta_700_qc_summary_flag.where(
        data.beta_700_qc_summary_flag > 3).notnull()
    blocks = identify_blocks(beta_fail[::index], [18, 72], 24)
    beta_hitl = create_annotations(site, node, sensor, blocks)
    beta_hitl['parameters'] = [[24, 25, 1139] for i in beta_hitl['parameters']]

    # combine the different dictionaries into a single HITL annotation dictionary for later use
    hitl = chl_hitl.copy()
    for d in (cdom_hitl, beta_hitl):
        for key, value in d.items():
            hitl[key] = hitl[key] + d[key]

    # get the current system annotations for the sensor
    annotations = get_annotations(site, node, sensor)
    annotations = pd.DataFrame(annotations)
    if not annotations.empty:
        annotations = annotations.drop(columns=['@class'])
        annotations['beginDate'] = pd.to_datetime(
            annotations.beginDT, unit='ms').dt.strftime('%Y-%m-%dT%H:%M:%S')
        annotations['endDate'] = pd.to_datetime(
            annotations.endDT, unit='ms').dt.strftime('%Y-%m-%dT%H:%M:%S')

    # append the fail annotations to the existing annotations
    annotations = annotations.append(pd.DataFrame(hitl),
                                     ignore_index=True,
                                     sort=False)

    # create an annotation-based quality flag
    data = add_annotation_qc_flags(data, annotations)

    # clean-up the data, NaN-ing values that were marked as fail in the QC checks and/or identified as a block
    # of failed data, and then removing all records where the rollup annotation (every parameter fails) was
    # set to fail.
    data['estimated_chlorophyll'][chl_fail] = np.nan
    if 'fluorometric_chl_a_annotations_qc_results' in data.variables:
        m = data.fluorometric_chl_a_annotations_qc_results == 4
        data['estimated_chlorophyll'][m] = np.nan

    data['fluorometric_cdom'][cdom_fail] = np.nan
    if 'fluorometric_cdom_annotations_qc_results' in data.variables:
        m = data.fluorometric_cdom_annotations_qc_results == 4
        data['fluorometric_cdom'][m] = np.nan

    data['beta_700'][beta_fail] = np.nan
    if 'total_volume_scattering_coefficient_annotations_qc_results' in data.variables:
        m = data.total_volume_scattering_coefficient_annotations_qc_results == 4
        data['beta_700'][m] = np.nan
        data['bback'][m] = np.nan

    if 'rollup_annotations_qc_results' in data.variables:
        data = data.where(data.rollup_annotations_qc_results < 4)

    # if a cut_off date was used, limit data to all data collected up to the cut_off date.
    # otherwise, set the limit to the range of the downloaded data.
    if cut_off:
        cut = parser.parse(cut_off)
        cut = cut.astimezone(pytz.utc)
        end_date = cut.strftime('%Y-%m-%dT%H:%M:%S')
        src_date = cut.strftime('%Y-%m-%d')
    else:
        cut = parser.parse(data.time_coverage_end)
        cut = cut.astimezone(pytz.utc)
        end_date = cut.strftime('%Y-%m-%dT%H:%M:%S')
        src_date = cut.strftime('%Y-%m-%d')

    data = data.sel(time=slice('2014-01-01T00:00:00', end_date))

    # set the parameters and the gross range limits
    parameters = ['bback', 'estimated_chlorophyll', 'fluorometric_cdom']
    limits = [[0, 3], [0, 30], [0, 375]]

    # create the initial gross range entry
    gr_lookup = process_gross_range(data,
                                    parameters,
                                    limits,
                                    site=site,
                                    node=node,
                                    sensor=sensor,
                                    stream='flort_sample')

    # add the stream name and the source comment
    gr_lookup['notes'] = (
        'User range based on data collected through {}.'.format(src_date))

    # based on the site and node, determine if we need a depth based climatology
    depth_bins = np.array([])
    if node in ['SP001', 'WFP01']:
        if site in [
                'CE01ISSP', 'CE02SHSP', 'CE06ISSP', 'CE07SHSP', 'CE09OSPM'
        ]:
            vocab = get_vocabulary(site, node, sensor)[0]
            max_depth = vocab['maxdepth']
            depth_bins = woa_standard_bins()
            m = depth_bins[:, 1] <= max_depth
            depth_bins = depth_bins[m, :]

    # create and format the climatology lookups and tables for the data
    clm_lookup, clm_table = process_climatology(data,
                                                parameters,
                                                limits,
                                                depth_bins=depth_bins,
                                                site=site,
                                                node=node,
                                                sensor=sensor,
                                                stream='flort_sample')

    # add the stream name
    clm_lookup['stream'] = 'flort_sample'

    return annotations, gr_lookup, clm_lookup, clm_table
def generate_qartod(site, node, sensor, cut_off):
    """
    Load all FLORT data for a defined reference designator (using the site,
    node and sensor names to construct the reference designator) and
    collected via the different data delivery methods and combine them into a
    single data set from which QARTOD test limits for the gross range and
    climatology tests can be calculated.

    :param site: Site designator, extracted from the first part of the
        reference designator
    :param node: Node designator, extracted from the second part of the
        reference designator
    :param sensor: Sensor designator, extracted from the third and fourth part
        of the reference designator
    :param cut_off: string formatted date to use as cut-off for data to add
        to QARTOD test sets
    :return gr_lookup: CSV formatted strings to save to a csv file for the
        QARTOD gross range lookup tables.
    :return clm_lookup: CSV formatted strings to save to a csv file for the
        QARTOD climatology lookup tables.
    :return clm_table: CSV formatted strings to save to a csv file for the
        QARTOD climatology range table for the seafloor pressure and
        temperature.
    """
    # load the combined data for the different sources of FLORT data
    data = combine_delivery_methods(site, node, sensor)

    # create boolean arrays of the data marked as "fail" by the quality checks and generate initial
    # HITL annotations that can be combined with system annotations to create a cleaned up data set
    # prior to calculating the QARTOD test values
    chl_fail = data.estimated_chlorophyll_qc_summary_flag.where(data.estimated_chlorophyll_qc_summary_flag > 3).notnull()
    blocks = identify_blocks(chl_fail, [18, 72])
    chl_hitl = create_annotations(site, node, sensor, blocks)
    chl_hitl['parameters'] = ['chl' for i in chl_hitl['parameters']]

    cdom_fail = data.fluorometric_cdom_qc_summary_flag.where(data.fluorometric_cdom_qc_summary_flag > 3).notnull()
    blocks = identify_blocks(cdom_fail, [18, 72])
    cdom_hitl = create_annotations(site, node, sensor, blocks)
    cdom_hitl['parameters'] = ['cdom' for i in cdom_hitl['parameters']]

    beta_fail = data.beta_700_qc_summary_flag.where(data.beta_700_qc_summary_flag > 3).notnull()
    blocks = identify_blocks(beta_fail, [18, 72])
    beta_hitl = create_annotations(site, node, sensor, blocks)
    beta_hitl['parameters'] = ['beta' for i in beta_hitl['parameters']]

    bback_fail = data.bback_qc_summary_flag.where(data.bback_qc_summary_flag > 3).notnull()
    blocks = identify_blocks(bback_fail, [18, 72])
    bback_hitl = create_annotations(site, node, sensor, blocks)
    bback_hitl['parameters'] = ['bback' for i in bback_hitl['parameters']]

    # combine the different dictionaries into a single HITL annotation dictionary for later use
    hitl = chl_hitl.copy()
    for d in (cdom_hitl, beta_hitl, bback_hitl):
        for key, value in d.items():
            hitl[key] = hitl[key] + d[key]

    # get the current system annotations for the sensor
    annotations = get_annotations(site, node, sensor)
    annotations = pd.DataFrame(annotations)
    if not annotations.empty:
        annotations = annotations.drop(columns=['@class'])
        annotations['beginDate'] = pd.to_datetime(annotations.beginDT, unit='ms').dt.strftime('%Y-%m-%dT%H:%M:%S')
        annotations['endDate'] = pd.to_datetime(annotations.endDT, unit='ms').dt.strftime('%Y-%m-%dT%H:%M:%S')

    # append the fail annotations to the existing annotations
    annotations = annotations.append(pd.DataFrame(hitl), ignore_index=True, sort=False)

    # create an annotation-based quality flag
    data = add_annotation_qc_flags(data, annotations)

    # clean-up the data, NaN-ing values that were marked as fail in the QC checks, and then removing
    # all records where the rollup annotation was set to fail
    data['estimated_chlorophyll'][chl_fail] = np.nan
    data['fluorometric_cdom'][cdom_fail] = np.nan
    data['beta_700'][beta_fail] = np.nan
    data['bback'][beta_fail] = np.nan
    data['bback'][bback_fail] = np.nan
    data = data.where(data.rollup_annotations_qc_results < 4)

    # if a cut_off date was used, limit data to all data collected up to the cut_off date.
    # otherwise, set the limit to the range of the downloaded data.
    if cut_off:
        cut = parser.parse(cut_off)
        cut = cut.astimezone(pytz.utc)
        end_date = cut.strftime('%Y-%m-%dT%H:%M:%S')
        src_date = cut.strftime('%Y-%m-%d')
    else:
        cut = parser.parse(data.time_coverage_end)
        cut = cut.astimezone(pytz.utc)
        end_date = cut.strftime('%Y-%m-%dT%H:%M:%S')
        src_date = cut.strftime('%Y-%m-%d')

    data = data.sel(time=slice('2014-01-01T00:00:00', end_date))

    # set the parameters and the gross range limits
    parameters = ['bback', 'estimated_chlorophyll', 'fluorometric_cdom']
    limits = [[0, 5], [0, 30], [0, 375]]

    # create the initial gross range entry
    gr_lookup = process_gross_range(data, parameters, limits, site=site, node=node, sensor=sensor)

    # add the stream name and the source comment
    gr_lookup['stream'] = 'flort_sample'
    gr_lookup['source'] = ('Sensor min/max based on the vendor sensor specifications. '
                           'The user min/max is the historical mean of all data collected '
                           'up to {} +/- 3 standard deviations.'.format(src_date))

    # create and format the climatology lookups and tables for the data
    clm_lookup, clm_table = process_climatology(data, parameters, limits, site=site, node=node, sensor=sensor)

    # add the stream name
    clm_lookup['stream'] = 'flort_sample'

    return annotations, gr_lookup, clm_lookup, clm_table