Esempio n. 1
0
def combine_delivery_methods(site, node, sensor):
    """
    Takes the downloaded data from each of the three data delivery methods and
    combines them into a single, merged xarray data set.

    :param site: Site designator, extracted from the first part of the
        reference designator
    :param node: Node designator, extracted from the second part of the
        reference designator
    :param sensor: Sensor designator, extracted from the third and fourth part
        of the reference designator
    :return merged:
    """
    # download the telemetered data and re-process it to create a more useful and coherent data set
    tag = '.*PHSEN.*\\.nc$'
    telem = load_gc_thredds(site, node, sensor, 'telemetered',
                            'phsen_abcdef_dcl_instrument', tag)
    telem = phsen_datalogger(telem)

    # download the recovered host data and re-process it to create a more useful and coherent data set
    rhost = load_gc_thredds(site, node, sensor, 'recovered_host',
                            'phsen_abcdef_dcl_instrument_recovered', tag)
    rhost = phsen_datalogger(rhost)

    # download the recovered instrument data and re-process it to create a more useful and coherent data set
    rinst = load_gc_thredds(site, node, sensor, 'recovered_inst',
                            'phsen_abcdef_instrument', tag)
    rinst = phsen_instrument(rinst)

    # combine the three datasets into a single, merged time series resampled to a 3 hour interval time series
    merged = combine_datasets(telem, rhost, rinst, 180)

    # re-run the quality checks, since averaging will change the flag values
    merged['seawater_ph_quality_flag'] = quality_checks(merged)
    return merged
def combine_delivery_methods(site, node, sensor):
    """
    Takes the downloaded data from each of the three data delivery methods for
    the uncabled CTD (CTDBP), and combines each of them into a single, merged
    xarray data set.

    :param site: Site designator, extracted from the first part of the
        reference designator
    :param node: Node designator, extracted from the second part of the
        reference designator
    :param sensor: Sensor designator, extracted from the third and fourth part
        of the reference designator
    :return merged: the merged CTDBP data stream resampled to a 3-hour time
        record
    """
    # download the telemetered data and re-process it to create a more useful and coherent data set
    tag = '.*CTDBP.*\\.nc$'
    telem = load_gc_thredds(site, node, sensor, 'telemetered', 'ctdbp_cdef_dcl_instrument', tag)
    telem = ctdbp_datalogger(telem)

    # download the recovered host data and re-process it to create a more useful and coherent data set
    rhost = load_gc_thredds(site, node, sensor, 'recovered_host', 'ctdbp_cdef_dcl_instrument_recovered', tag)
    rhost = ctdbp_datalogger(rhost)

    # download the recovered instrument data and re-process it to create a more useful and coherent data set
    rinst = load_gc_thredds(site, node, sensor, 'recovered_inst', 'ctdbp_cdef_instrument_recovered', tag)
    rinst = ctdbp_instrument(rinst)

    # combine the three datasets into a single, merged time series resampled to a 3-hour interval time series
    merged = combine_datasets(telem, rhost, rinst, 180)

    return merged
def combine_delivery_methods(site, node, sensor):
    """
    Takes the downloaded data from each of the three data delivery methods for
    the seafloor pressure sensor (PRESF), and combines each of them into a
    single, merged xarray data sets.

    :param site: Site designator, extracted from the first part of the
        reference designator
    :param node: Node designator, extracted from the second part of the
        reference designator
    :param sensor: Sensor designator, extracted from the third and fourth part
        of the reference designator
    :return merged: the atmospheric or surface seawater pCO2 data stream
        resampled to a 3 hour time record
    """
    # download the telemetered, recovered_host and recovered_inst data and re-process it to create
    # a more useful and coherent data set
    tag = '.*PRESF.*\\.nc$'
    telem = load_gc_thredds(site, node, sensor, 'telemetered',
                            'presf_abc_dcl_tide_measurement', tag)
    rhost = load_gc_thredds(site, node, sensor, 'recovered_host',
                            'presf_abc_dcl_tide_measurement_recovered', tag)
    rinst = load_gc_thredds(site, node, sensor, 'recovered_inst',
                            'presf_abc_tide_measurement_recovered', tag)

    # combine the two datasets into a single, merged time series resampled to an hourly time series
    merged = combine_datasets(telem, rhost, rinst, 60)

    return merged
def combine_delivery_methods(site, node, sensor, source):
    """
    Takes the downloaded data from each of the two data delivery methods for
    the atmospheric pCO2 data stream (which also contains the surface seawater
    pCO2), and combines each of them into a single, merged xarray data sets.

    :param site: Site designator, extracted from the first part of the
        reference designator
    :param node: Node designator, extracted from the second part of the
        reference designator
    :param sensor: Sensor designator, extracted from the third and fourth part
        of the reference designator
    :param source: specifiy whether this is the air or water stream
    :return merged: the atmospheric or surface seawater pCO2 data stream
        resampled to a 3 hour time record
    """
    # download the telemetered data and re-process it to create a more useful and coherent data set
    if source == 'air':
        tag = '.*PCO2A.*air.*\\.nc$'
        tstream = 'pco2a_a_dcl_instrument_air'
        rstream = 'pco2a_a_dcl_instrument_air_recovered'
    else:
        tag = '.*PCO2A.*water.*\\.nc$'
        tstream = 'pco2a_a_dcl_instrument_water'
        rstream = 'pco2a_a_dcl_instrument_water_recovered'

    telem = load_gc_thredds(site, node, sensor, 'telemetered', tstream, tag)
    telem = pco2a_datalogger(telem)

    # download the recovered host data and re-process it to create a more useful and coherent data set
    rhost = load_gc_thredds(site, node, sensor, 'recovered_host', rstream, tag)
    rhost = pco2a_datalogger(rhost)

    # combine the two datasets into a single, merged time series resampled to a 3 hour interval time series
    merged = combine_datasets(telem, rhost, None, 180)

    return merged
Esempio n. 5
0
def main():
    # Setup needed parameters for the request, the user would need to vary these to suit their own needs and
    # sites/instruments of interest. Site, node, sensor, stream and delivery method names can be obtained from the
    # Ocean Observatories Initiative web site. The last two parameters (level and instrmt) will set path and naming
    # conventions to save the data to the local disk.
    site = 'CE02SHSP'  # OOI Net site designator
    node = 'SP001'  # OOI Net node designator
    sensor = '07-FLORTJ000'  # OOI Net sensor designator
    stream = 'flort_sample'  # OOI Net stream name
    method = 'recovered_cspp'  # OOI Net data delivery method
    instrmt = 'flort'  # local directory name, instrument below site

    # We are after the recovered data. Determine list of deployments and use data from one of the earlier deployments
    vocab = get_vocabulary(site, node, sensor)[0]
    deployments = list_deployments(site, node, sensor)
    deploy = deployments[-4]

    # download the data from the Gold Copy THREDDS server
    flort = load_gc_thredds(site, node, sensor, method, stream,
                            ('.*deployment%04d.*FLORT.*\\.nc$' % deploy))

    # clean-up and reorganize
    flort = flort_cspp(flort)
    flort = update_dataset(flort, vocab['maxdepth'])

    # save the data
    out_path = os.path.join(CONFIG['base_dir']['m2m_base'], site.lower(),
                            instrmt)
    out_path = os.path.abspath(out_path)
    if not os.path.exists(out_path):
        os.makedirs(out_path)

    out_file = ('%s.%s.deploy%02d.%s.%s.nc' %
                (site.lower(), instrmt, deploy, method, stream))
    nc_out = os.path.join(out_path, out_file)

    flort.to_netcdf(nc_out,
                    mode='w',
                    format='NETCDF4',
                    engine='h5netcdf',
                    encoding=ENCODINGS)
Esempio n. 6
0
def combine_delivery_methods(site, node, sensor):
    """
    Takes the downloaded data from the different data delivery methods for the
    three-channel fluorometer (FLORT), and combines them, where appropriate,
    into a single, merged xarray data sets.

    :param site: Site designator, extracted from the first part of the
        reference designator
    :param node: Node designator, extracted from the second part of the
        reference designator
    :param sensor: Sensor designator, extracted from the third and fourth part
        of the reference designator
    :return merged: the merged and resampled (if appropriate) FLORT dataset
    """
    # set the stream and tag constants
    tag = '.*FLORT.*\\.nc$'
    stream = 'flort_sample'

    if node in ['SP001', 'WFP01']:
        # this FLORT is part of a CSPP or WFP and includes telemetered and recovered data
        if node == 'SP001':
            telem = None  # don't use the telemetered CSPP data
            print(
                '##### Downloading the recovered_cspp FLORT data for %s #####'
                % site)
            rhost = load_gc_thredds(site, node, sensor, 'recovered_cspp',
                                    stream, tag)
            deployments = []
            print('# -- Group the data by deployment and process the data')
            grps = list(rhost.groupby('deployment'))
            for grp in grps:
                print('# -- Processing recovered_host deployment %s' % grp[0])
                deployments.append(flort_cspp(grp[1]))
            deployments = [i for i in deployments if i]
            rhost = xr.concat(deployments, 'time')
        else:
            print('##### Downloading the telemetered FLORT data for %s #####' %
                  site)
            telem = load_gc_thredds(site, node, sensor, 'telemetered', stream,
                                    tag)
            deployments = []
            print('# -- Group the data by deployment and process the data')
            grps = list(telem.groupby('deployment'))
            for grp in grps:
                print('# -- Processing telemetered deployment %s' % grp[0])
                deployments.append(flort_wfp(grp[1]))
            deployments = [i for i in deployments if i]
            telem = xr.concat(deployments, 'time')

            print(
                '##### Downloading the recovered_wfp FLORT data for %s #####' %
                site)
            rhost = load_gc_thredds(site, node, sensor, 'recovered_wfp',
                                    stream, tag)
            deployments = []
            print('# -- Group the data by deployment and process the data')
            grps = list(rhost.groupby('deployment'))
            for grp in grps:
                print('# -- Processing recovered_host deployment %s' % grp[0])
                deployments.append(flort_wfp(grp[1]))
            deployments = [i for i in deployments if i]
            rhost = xr.concat(deployments, 'time')

        # merge, but do not resample the time records.
        merged = combine_datasets(telem, rhost, None, None)
    elif node == 'SBD17':
        # this FLORT is mounted on the buoy of the Inshore moorings and includes all three types of data
        print('##### Downloading the telemetered FLORT data for %s #####' %
              site)
        telem = load_gc_thredds(site, node, sensor, 'telemetered', stream, tag)
        deployments = []
        print('# -- Group the data by deployment and process the data')
        grps = list(telem.groupby('deployment'))
        for grp in grps:
            print('# -- Processing telemetered deployment %s' % grp[0])
            deployments.append(flort_instrument(grp[1]))
        deployments = [i for i in deployments if i]
        telem = xr.concat(deployments, 'time')

        print('##### Downloading the recovered_host FLORT data for %s #####' %
              site)
        rhost = load_gc_thredds(site, node, sensor, 'recovered_host', stream,
                                tag)
        deployments = []
        print('# -- Group the data by deployment and process the data')
        grps = list(rhost.groupby('deployment'))
        for grp in grps:
            print('# -- Processing recovered_host deployment %s' % grp[0])
            deployments.append(flort_instrument(grp[1]))
        deployments = [i for i in deployments if i]
        rhost = xr.concat(deployments, 'time')

        print('##### Downloading the recovered_inst FLORT data for %s #####' %
              site)
        rinst = load_gc_thredds(site, node, sensor, 'recovered_inst', stream,
                                tag)
        deployments = []
        print('# -- Group the data by deployment and process the data')
        grps = list(rinst.groupby('deployment'))
        for grp in grps:
            print('# -- Processing recovered_inst deployment %s' % grp[0])
            deployments.append(flort_instrument(grp[1]))
        deployments = [i for i in deployments if i]
        rinst = xr.concat(deployments, 'time')

        # merge and resample to a 2 hour data record
        merged = combine_datasets(telem, rhost, rinst, 120)
    else:
        # this FLORT is standalone on one of the NSIFs and includes the telemetered and recovered_host data
        # data is collected in bursts (3 minutes at 1 Hz). process each data set per-deployment
        print('##### Downloading the telemetered FLORT data for %s #####' %
              site)
        telem = load_gc_thredds(site, node, sensor, 'telemetered', stream, tag)
        deployments = []
        print('# -- Group the data by deployment and process the data')
        grps = list(telem.groupby('deployment'))
        for grp in grps:
            print('# -- Processing telemetered deployment %s' % grp[0])
            deployments.append(flort_datalogger(grp[1], True))
        deployments = [i for i in deployments if i]
        telem = xr.concat(deployments, 'time')

        print('##### Downloading the recovered_host FLORT data for %s #####' %
              site)
        rhost = load_gc_thredds(site, node, sensor, 'recovered_host', stream,
                                tag)
        deployments = []
        print('# -- Group the data by deployment and process the data')
        grps = list(rhost.groupby('deployment'))
        for grp in grps:
            print('# -- Processing recovered_host deployment %s' % grp[0])
            deployments.append(flort_datalogger(grp[1], True))
        deployments = [i for i in deployments if i]
        rhost = xr.concat(deployments, 'time')

        # combine the datasets, leaving them as 15-minute median averaged datasets
        merged = combine_datasets(telem, rhost, None, None)

    return merged
Esempio n. 7
0
def main(argv=None):
    args = inputs(argv)
    site = args.site
    node = args.node
    sensor = args.sensor
    method = args.method
    stream = args.stream
    deploy = args.deploy
    start = args.start
    stop = args.stop
    burst = args.burst

    # check if we are specifying a deployment or a specific date and time range
    if not deploy or (start and stop):
        return SyntaxError(
            'You must specify either a deployment number or beginning and end dates of interest.'
        )

    # if we are specifying a deployment number, then get the data from the Gold Copy THREDDS server
    if deploy:
        # download the data for the deployment
        flort = load_gc_thredds(site, node, sensor, method, stream,
                                ('.*deployment%04d.*FLORT.*\\.nc$' % deploy))

        # check to see if we downloaded any data
        if not flort:
            exit_text = (
                'Data unavailable for %s-%s-%s, %s, %s, deployment %d.' %
                (site, node, sensor, method, stream, deploy))
            raise SystemExit(exit_text)
    else:
        # otherwise, request the data for download from OOINet via the M2M API using the specified dates
        r = m2m_request(site, node, sensor, method, stream, start, stop)
        if not r:
            exit_text = (
                'Request failed for %s-%s-%s, %s, %s, from %s to %s.' %
                (site, node, sensor, method, stream, start, stop))
            raise SystemExit(exit_text)

        # Valid M2M request, start downloading the data
        flort = m2m_collect(r, '.*FLORT.*\\.nc$')

        # check to see if we downloaded any data
        if not flort:
            exit_text = (
                'Data unavailable for %s-%s-%s, %s, %s, from %s to %s.' %
                (site, node, sensor, method, stream, start, stop))
            raise SystemExit(exit_text)

    # clean-up and reorganize the data
    if node == 'SP001':
        # this FLORT is part of a CSPP
        flort = flort_cspp(flort)
    elif node == 'WFP01':
        # this FLORT is part of a Wire-Following Profiler
        flort = flort_wfp(flort)
    elif node == 'SBD17':
        # this FLORT is connected to the CTDBP on an EA Inshore Surface Mooring
        flort = flort_instrument(flort)
        if not flort:
            # there was no data after removing all the 0's
            sys.exit()
    else:
        # this FLORT is stand-alone on one of the moorings
        flort = flort_datalogger(flort, burst)

    vocab = get_vocabulary(site, node, sensor)[0]
    flort = update_dataset(flort, vocab['maxdepth'])

    # save the data to disk
    out_file = os.path.abspath(args.outfile)
    if not os.path.exists(os.path.dirname(out_file)):
        os.makedirs(os.path.dirname(out_file))

    flort.to_netcdf(out_file,
                    mode='w',
                    format='NETCDF4',
                    engine='h5netcdf',
                    encoding=ENCODINGS)
def generate_qartod(site, node, sensor, cut_off):
    """
    Load all of the pCO2 data for a defined reference designator (using the
    site, node and sensor names to construct the reference designator)
    collected via the recovered instrument method and combine them into a
    single data set from which QARTOD test limits for the gross range and
    climatology tests can be calculated.

    :param site: Site designator, extracted from the first part of the
        reference designator
    :param node: Node designator, extracted from the second part of the
        reference designator
    :param sensor: Sensor designator, extracted from the third and fourth part
        of the reference designator
    :param cut_off: string formatted date to use as cut-off for data to add
        to QARTOD test sets
    :return annotations: Initial list of auto-generated HITL annotations as
        a pandas dataframe
    :return gr_lookup: CSV formatted strings to save to a csv file for the
        QARTOD gross range lookup tables.
    :return clm_lookup: CSV formatted strings to save to a csv file for the
        QARTOD climatology lookup tables.
    :return clm_table: CSV formatted strings to save to a csv file for the
        QARTOD climatology range tables.
    """
    # load the recovered instrument data
    data = load_gc_thredds(site, node, sensor, 'recovered_inst',
                           'pco2w_abc_instrument', '^(?!.*blank).*PCO2W.*nc$')
    data = pco2w_instrument(data)

    # resample the data into a 3 hour, median averaged time series
    data = combine_datasets(data, None, None, 180)

    # recalculate the quality flags as averaging will alter them
    data['pco2_seawater_quality_flag'] = quality_checks(data)

    # create a boolean array of the data marked as "fail" by the pCO2 quality checks and generate initial
    # HITL annotations that can be combined with system annotations and pCO2 quality checks to create
    # a cleaned up data set prior to calculating the QARTOD test values
    fail = data.pco2_seawater_quality_flag.where(
        data.pco2_seawater_quality_flag == 4).notnull()
    blocks = identify_blocks(fail, [24, 96])
    hitl = create_annotations(site, node, sensor, blocks)

    # get the current system annotations for the sensor
    annotations = get_annotations(site, node, sensor)
    annotations = pd.DataFrame(annotations)
    if not annotations.empty:
        annotations = annotations.drop(columns=['@class'])
        annotations['beginDate'] = pd.to_datetime(
            annotations.beginDT, unit='ms').dt.strftime('%Y-%m-%dT%H:%M:%S')
        annotations['endDate'] = pd.to_datetime(
            annotations.endDT, unit='ms').dt.strftime('%Y-%m-%dT%H:%M:%S')

    # append the fail annotations to the existing annotations
    annotations = annotations.append(pd.DataFrame(hitl),
                                     ignore_index=True,
                                     sort=False)

    # create a roll-up annotation flag
    data = add_annotation_qc_flags(data, annotations)

    # clean-up the data, removing values that fail the pCO2 quality checks or were marked as fail in the annotations
    data = data.where((data.pco2_seawater_quality_flag != 4)
                      & (data.rollup_annotations_qc_results != 4))

    # if a cut_off date was used, limit data to all data collected up to the cut_off date.
    # otherwise, set the limit to the range of the downloaded data.
    if cut_off:
        cut = parser.parse(cut_off)
        cut = cut.astimezone(pytz.utc)
        end_date = cut.strftime('%Y-%m-%dT%H:%M:%S')
        src_date = cut.strftime('%Y-%m-%d')
    else:
        cut = parser.parse(data.time_coverage_end)
        cut = cut.astimezone(pytz.utc)
        end_date = cut.strftime('%Y-%m-%dT%H:%M:%S')
        src_date = cut.strftime('%Y-%m-%d')

    data = data.sel(time=slice('2014-01-01T00:00:00', end_date))

    # create the initial gross range entry
    gr = process_gross_range(data, ['pco2_seawater'], [200, 2000],
                             site=site,
                             node=node,
                             sensor=sensor)

    # re-work gross entry for the different streams and parameter names
    gr_lookup = pd.DataFrame()
    gr_lookup = gr_lookup.append([gr, gr, gr], ignore_index=True)
    gr_lookup['parameter'][0] = {'inp': 'pco2_seawater'}
    gr_lookup['stream'][0] = 'pco2w_abc_dcl_instrument'
    gr_lookup['parameter'][1] = {'inp': 'pco2_seawater'}
    gr_lookup['stream'][1] = 'pco2w_abc_dcl_instrument_recovered'
    gr_lookup['parameter'][2] = {'inp': 'pco2_seawater'}
    gr_lookup['stream'][2] = 'pco2w_abc_instrument'
    gr_lookup['source'] = (
        'Sensor min/max based on the vendor standard calibration range. '
        'The user min/max is the historical mean of all data collected '
        'up to {} +/- 3 standard deviations.'.format(src_date))

    # create and format the climatology entry and table
    cll, clm_table = process_climatology(data, ['pco2_seawater'], [200, 2000],
                                         site=site,
                                         node=node,
                                         sensor=sensor)

    # re-work climatology entry for the different streams and parameter names
    clm_lookup = pd.DataFrame()
    clm_lookup = clm_lookup.append([cll, cll, cll])
    clm_lookup['parameters'][0] = {
        'inp': 'pco2_seawater',
        'tinp': 'time',
        'zinp': 'None'
    }
    clm_lookup['stream'][0] = 'pco2w_abc_dcl_instrument'
    clm_lookup['parameters'][1] = {
        'inp': 'pco2_seawater',
        'tinp': 'time',
        'zinp': 'None'
    }
    clm_lookup['stream'][1] = 'pco2w_abc_dcl_instrument_recovered'
    clm_lookup['parameters'][2] = {
        'inp': 'pco2_seawater',
        'tinp': 'time',
        'zinp': 'None'
    }
    clm_lookup['stream'][2] = 'pco2w_abc_instrument'

    return annotations, gr_lookup, clm_lookup, clm_table
def main(argv=None):
    # setup the input arguments
    args = inputs(argv)
    site = args.site
    node = args.node
    sensor = args.sensor
    method = args.method
    stream = args.stream
    deploy = args.deploy
    start = args.start
    stop = args.stop

    # check if we are specifying a deployment or a specific date and time range
    if not deploy or (start and stop):
        return SyntaxError(
            'You must specify either a deployment number or beginning and end dates of interest.'
        )

    # if we are specifying a deployment number, then get the data from the Gold Copy THREDDS server
    if deploy:
        # download the data for the deployment
        pco2w = load_gc_thredds(
            site, node, sensor, method, stream,
            ('^(?!.*blank).*deployment%04d.*PCO2W.*\\.nc$' % deploy))

        # check to see if we downloaded any data
        if not pco2w:
            exit_text = (
                'Data unavailable for %s-%s-%s, %s, %s, deployment %d.' %
                (site, node, sensor, method, stream, deploy))
            raise SystemExit(exit_text)
    else:
        # otherwise, request the data for download from OOINet via the M2M API using the specified dates
        r = m2m_request(site, node, sensor, method, stream, start, stop)
        if not r:
            exit_text = (
                'Request failed for %s-%s-%s, %s, %s, from %s to %s.' %
                (site, node, sensor, method, stream, start, stop))
            raise SystemExit(exit_text)

        # Valid M2M request, start downloading the data
        pco2w = m2m_collect(r, '^(?!.*blank).*PCO2W.*\\.nc$')

        # check to see if we downloaded any data
        if not pco2w:
            exit_text = (
                'Data unavailable for %s-%s-%s, %s, %s, from %s to %s.' %
                (site, node, sensor, method, stream, start, stop))
            raise SystemExit(exit_text)

    # clean-up and reorganize
    if method in ['telemetered', 'recovered_host']:
        pco2w = pco2w_datalogger(pco2w)
    else:
        pco2w = pco2w_instrument(pco2w)

    vocab = get_vocabulary(site, node, sensor)[0]
    pco2w = update_dataset(pco2w, vocab['maxdepth'])

    # save the data to disk
    out_file = os.path.abspath(args.outfile)
    if not os.path.exists(os.path.dirname(out_file)):
        os.makedirs(os.path.dirname(out_file))

    pco2w.to_netcdf(out_file,
                    mode='w',
                    format='NETCDF4',
                    engine='h5netcdf',
                    encoding=ENCODINGS)