Beispiel #1
0
def timeseries_get_profiles(inname, profile_filt_time=100,
                            profile_min_time=400):
    """
    Parameters
    ----------
    profile_filt_time : float
        how long a filter to apply to the pressure data in seconds

    profile_min_time : float
        how long a profile must last to be considered a proper profile (seconds)
    """
    with  xr.open_dataset(inname) as ds:
        ds = utils.get_profiles_new(ds,
                filt_time=profile_filt_time, profile_min_time=profile_min_time)
    ds.to_netcdf(inname, mode='a')
    return inname
Beispiel #2
0
def raw_to_L1timeseries(indir,
                        outdir,
                        deploymentyaml,
                        profile_filt_len=7,
                        profile_min_nsamples=14):
    """
    """

    with open(deploymentyaml) as fin:
        deployment = yaml.safe_load(fin)
    metadata = deployment['metadata']
    ncvar = deployment['netcdf_variables']
    thenames = list(ncvar.keys())
    thenames.remove('time')

    id = metadata['glider_name'] + metadata['glider_serial']

    id0 = None
    prev_profile = 0
    for mnum in range(0, 500):
        if 1:
            ebdn = indir + '/' + id + f'-{mnum:04d}-rawebd.nc'
            dbdn = indir + '/' + id + f'-{mnum:04d}-rawdbd.nc'
            if os.path.exists(ebdn) and os.path.exists(dbdn):
                print('Opening:', ebdn, dbdn)
                ebd = xr.open_dataset(ebdn, decode_times=False)
                dbd = xr.open_dataset(dbdn, decode_times=False)
                if len(ebd.time) > 2:
                    # build a new data set based on info in `deployment.`
                    # We will use ebd.m_present_time as the interpolant if the
                    # variabel is in dbd.

                    ds = xr.Dataset()
                    attr = {}
                    name = 'time'
                    for atts in ncvar[name].keys():
                        if atts != 'coordinates':
                            attr[atts] = ncvar[name][atts]
                    ds[name] = (('time'), ebd[name].values, attr)

                    for name in thenames:
                        _log.info('working on %s', name)
                        if not ('method' in ncvar[name].keys()):
                            # variables that are in the data set or can be interpolated from it
                            if 'conversion' in ncvar[name].keys():
                                convert = getattr(utils,
                                                  ncvar[name]['conversion'])
                            else:
                                convert = utils._passthrough
                            sensorname = ncvar[name]['source']
                            _log.info('names: %s %s', name, sensorname)
                            if sensorname in dbd.keys():
                                _log.debug('sensorname %s', sensorname)
                                val = convert(dbd[sensorname])
                                val = _dbd2ebd(dbd, ds, val)
                                ncvar['method'] = 'linear fill'
                            else:
                                val = ebd[sensorname]
                                val = utils._zero_screen(val)
                                #        val[val==0] = np.NaN
                                val = convert(val)
                            # make the attributes:
                            ncvar[name].pop('coordinates', None)
                            attrs = ncvar[name]
                            attrs = utils.fill_required_attrs(attrs)
                            ds[name] = (('time'), val, attrs)

                    # some derived variables:
                    # trim bad times...
                    ds = ds.sel(time=slice(1e8, None))

                    ds = utils.get_glider_depth(ds)
                    ds = utils.get_distance_over_ground(ds)
                    ds = utils.get_profiles_new(
                        ds,
                        filt_length=profile_filt_len,
                        min_nsamples=profile_min_nsamples)
                    # ds = utils.get_profiles(ds)
                    ds.profile_index.values = ds.profile_index.values + prev_profile
                    ind = np.where(np.isfinite(ds.profile_index))[0]
                    prev_profile = ds.profile_index.values[ind][-1]
                    ds = utils.get_derived_eos_raw(ds)
                    ds = ds.assign_coords(longitude=ds.longitude)
                    ds = ds.assign_coords(latitude=ds.latitude)
                    ds = ds.assign_coords(depth=ds.depth)

                    #ds = ds._get_distance_over_ground(ds)
                    ds = utils.fill_metadata(ds, deployment['metadata'])
                    try:
                        os.mkdir('L1-timeseries')
                    except:
                        pass
                    outname = ('L1-timeseries/' + ds.attrs['deployment_name'] +
                               f'-M{mnum:04d}_L1.nc')
                    _log.info('writing %s', outname)
                    ds.to_netcdf(outname, 'w')
                    if id0 is None:
                        id0 = ds.attrs['deployment_name']

    # now merge:
    with xr.open_mfdataset('L1-timeseries/' + id + '*-M*_L1.nc',
                           decode_times=False) as ds:
        print(ds.attrs)
        # put the real start and end times:
        start = ((ds['time'].values[0]).astype('timedelta64[s]') +
                 np.datetime64('1970-01-01T00:00:00'))
        end = ((ds['time'].values[-1]).astype('timedelta64[s]') +
               np.datetime64('1970-01-01T00:00:00'))

        ds.attrs['deployment_start'] = str(start)
        ds.attrs['deployment_end'] = str(end)

        outname = 'L1-timeseries/' + id0 + '_L1.nc'
        ds.to_netcdf(outname)

    return outname
Beispiel #3
0
def raw_to_timeseries(indir,
                      outdir,
                      deploymentyaml,
                      kind='raw',
                      profile_filt_time=100,
                      profile_min_time=300):
    """
    A little different than above, for the 4-file version of the data set.
    """

    with open(deploymentyaml) as fin:
        deployment = yaml.safe_load(fin)
    metadata = deployment['metadata']
    ncvar = deployment['netcdf_variables']
    device_data = deployment['glider_devices']
    id = metadata['glider_name']
    _log.info(f'Opening combined nav file {indir}/{id}-rawgli.nc')
    gli = xr.open_dataset(f'{indir}/{id}-rawgli.nc')
    _log.info(f'Opening combined payload file {indir}/{id}-{kind}pld.nc')
    sensor = xr.open_dataset(f'{indir}/{id}-{kind}pld.nc')

    # build a new data set based on info in `deploymentyaml.`
    # We will use ctd as the interpolant
    ds = xr.Dataset()
    attr = {}
    name = 'time'
    for atts in ncvar[name].keys():
        if atts != 'coordinates':
            attr[atts] = ncvar[name][atts]

    # If present, use the timebase specified in ncva: timebase in the
    # deployment yaml. Otherwise, the ctd will be our timebase.
    # It oversamples the nav data, but mildly undersamples the optics and
    # oxygen....
    if 'timebase' in ncvar:
        indctd = np.where(~np.isnan(sensor[ncvar['timebase']['source']]))[0]
    elif 'GPCTD_TEMPERATURE' in list(sensor.variables):
        _log.warning('No timebase specified. Using GPCTD_TEMPERATURE as time'
                     'base')
        indctd = np.where(~np.isnan(sensor.GPCTD_TEMPERATURE))[0]
    elif 'LEGATO_TEMPERATURE' in list(sensor.variables):
        _log.warning('No timebase specified. Using LEGATO_TEMPERATURE as time'
                     'base')
        indctd = np.where(~np.isnan(sensor.LEGATO_TEMPERATURE))[0]
    else:
        _log.warning('No gpctd or legato data found. Using NAV_DEPTH as time'
                     'base')
        indctd = np.where(~np.isnan(sensor.NAV_DEPTH))[0]
    ds['time'] = (('time'), sensor['time'].values[indctd], attr)
    thenames = list(ncvar.keys())
    for i in ['time', 'timebase', 'keep_variables']:
        if i in thenames:
            thenames.remove(i)
    for name in thenames:
        _log.info('interpolating ' + name)
        if not ('method' in ncvar[name].keys()):
            # variables that are in the data set or can be interpolated from it
            if 'conversion' in ncvar[name].keys():
                convert = getattr(utils, ncvar[name]['conversion'])
            else:
                convert = utils._passthrough
            sensorname = ncvar[name]['source']
            if sensorname in list(sensor.variables):
                _log.debug('sensorname %s', sensorname)
                val = convert(sensor[sensorname])
                if 'coarsen' in ncvar[name]:
                    # smooth oxygen data as originally perscribed
                    coarsen_time = ncvar[name]['coarsen']
                    sensor_sub = sensor.coarsen(time=coarsen_time,
                                                boundary='trim').mean()
                    val2 = sensor_sub[sensorname]
                    val = _interp_gli_to_pld(sensor_sub, sensor, val2, indctd)
                val = val[indctd]

                ncvar['method'] = 'linear fill'
            else:
                val = gli[sensorname]
                val = convert(val)
                # Values from the glider netcdf must be interpolated to match
                # the sensor netcdf
                val = _interp_gli_to_pld(gli, ds, val, indctd)

            # make the attributes:
            ncvar[name].pop('coordinates', None)
            attrs = ncvar[name]
            attrs = utils.fill_required_attrs(attrs)
            ds[name] = (('time'), val.data, attrs)

    # fix lon and lat to be linearly interpolated between fixes
    good = np.where(
        np.abs(np.diff(ds.longitude)) +
        np.abs(np.diff(ds.latitude)) > 0)[0] + 1
    ds['longitude'].values = np.interp(ds.time, ds.time[good],
                                       ds.longitude[good])
    ds['latitude'].values = np.interp(ds.time, ds.time[good],
                                      ds.latitude[good])

    # keep only timestamps with data from one of a set of variables
    if 'keep_variables' in ncvar:
        keeps = np.empty(len(ds.longitude))
        keeps[:] = np.nan
        keeper_vars = ncvar['keep_variables']
        for keep_var in keeper_vars:
            keeps[~np.isnan(ds[keep_var].values)] = 1
        ds = ds.where(~np.isnan(keeps))
        ds = ds.dropna(dim='time', how='all')

    # some derived variables:
    ds = utils.get_glider_depth(ds)
    ds = utils.get_distance_over_ground(ds)
    #    ds = utils.get_profiles(ds)
    ds = utils.get_profiles_new(ds,
                                filt_time=profile_filt_time,
                                profile_min_time=profile_min_time)
    ds = utils.get_derived_eos_raw(ds)

    # Correct oxygen if present:
    if 'oxygen_concentration' in ncvar.keys():
        if 'correct_oxygen' in ncvar['oxygen_concentration'].keys():
            ds = utils.oxygen_concentration_correction(ds, ncvar)
        else:
            _log.warning('correct_oxygen not found in oxygen yaml. No'
                         'correction applied')
    ds = ds.assign_coords(longitude=ds.longitude)
    ds = ds.assign_coords(latitude=ds.latitude)
    ds = ds.assign_coords(depth=ds.depth)
    # ds = ds._get_distance_over_ground(ds)

    ds = utils.fill_metadata(ds, deployment['metadata'], device_data)

    # somehow this comes out unsorted:
    ds = ds.sortby(ds.time)

    start = ds['time'].values[0]
    end = ds['time'].values[-1]

    ds.attrs['deployment_start'] = str(start)
    ds.attrs['deployment_end'] = str(end)

    try:
        os.mkdir(outdir)
    except:
        pass
    id0 = ds.attrs['deployment_name']
    outname = outdir + id0 + '.nc'
    _log.info('writing %s', outname)
    if 'units' in ds.time.attrs.keys():
        ds.time.attrs.pop('units')
    if 'calendar' in ds.time.attrs.keys():
        ds.time.attrs.pop('calendar')
    if 'ad2cp_time' in list(ds):
        if 'units' in ds.ad2cp_time.attrs.keys():
            ds.ad2cp_time.attrs.pop('units')
    ds.to_netcdf(
        outname,
        'w',
        encoding={'time': {
            'units': 'seconds since 1970-01-01T00:00:00Z'
        }})
    return outname
Beispiel #4
0
def raw_to_L0timeseries(indir,
                        outdir,
                        deploymentyaml,
                        kind='raw',
                        profile_filt_time=100,
                        profile_min_time=300):
    """
    A little different than above, for the 4-file version of the data set.
    """

    with open(deploymentyaml) as fin:
        deployment = yaml.safe_load(fin)
    metadata = deployment['metadata']
    ncvar = deployment['netcdf_variables']

    id = metadata['glider_name']
    gli = xr.open_dataset(indir + '/' + id + '-rawgli.nc', decode_times=False)
    ctd = xr.open_dataset(indir + '/' + id + '-' + kind + 'p_gpctd.nc',
                          decode_times=False)
    arod = xr.open_dataset(indir + '/' + id + '-' + kind + 'p_arod.nc',
                           decode_times=False)
    flb = xr.open_dataset(indir + '/' + id + '-' + kind + 'p_flbbcd.nc',
                          decode_times=False)

    # build a new data set based on info in `deployment.`
    # We will use ebd.m_present_time as the interpolant if the
    # variabel is in dbd.

    ds = xr.Dataset()
    attr = {}
    name = 'time'
    for atts in ncvar[name].keys():
        if atts != 'coordinates':
            attr[atts] = ncvar[name][atts]

    # the ctd will be our timebase.  It oversamples the nav data, but
    # mildly undersamples the optics and oxygen....
    indctd = np.where(~np.isnan(ctd.GPCTD_TEMPERATURE))[0]

    print('TIME', ctd['time'])
    ds[name] = (('time'), ctd[name].values[indctd], attr)
    print(ds['time'])
    thenames = list(ncvar.keys())
    print(thenames)
    thenames.remove('time')

    for name in thenames:
        _log.info('interpolating ' + name)
        if not ('method' in ncvar[name].keys()):
            # variables that are in the data set or can be interpolated from it
            if 'conversion' in ncvar[name].keys():
                convert = getattr(utils, ncvar[name]['conversion'])
            else:
                convert = utils._passthrough
            sensorname = ncvar[name]['source']
            if sensorname in ctd.keys():
                _log.debug('sensorname %s', sensorname)
                val = convert(ctd[sensorname])
                val = _interp_pld_to_pld(ctd, ds, val, indctd)
                ncvar['method'] = 'linear fill'
            elif sensorname in arod.keys():
                _log.debug('sensorname %s', sensorname)
                val = convert(arod[sensorname])
                val = _interp_pld_to_pld(arod, ds, val, indctd)
                ncvar['method'] = 'linear fill'
            elif sensorname in flb.keys():
                _log.debug('sensorname %s', sensorname)
                val = convert(flb[sensorname])
                val = _interp_pld_to_pld(flb, ds, val, indctd)
                ncvar['method'] = 'linear fill'
            else:
                val = gli[sensorname]
                #val = utils._zero_screen(val)
                #        val[val==0] = np.NaN
                val = convert(val)
                print('Gli', gli)
                val = _interp_gli_to_pld(gli, ds, val, indctd)

            # make the attributes:
            ncvar[name].pop('coordinates', None)
            attrs = ncvar[name]
            attrs = utils.fill_required_attrs(attrs)
            ds[name] = (('time'), val, attrs)

    # fix lon and lat to be linearly interpolated between fixes
    good = np.where(
        np.abs(np.diff(ds.longitude)) +
        np.abs(np.diff(ds.latitude)) > 0)[0] + 1
    ds['longitude'].values = np.interp(ds.time, ds.time[good],
                                       ds.longitude[good])
    ds['latitude'].values = np.interp(ds.time, ds.time[good],
                                      ds.latitude[good])

    # some derived variables:
    ds = utils.get_glider_depth(ds)
    ds = utils.get_distance_over_ground(ds)
    #    ds = utils.get_profiles(ds)
    ds = utils.get_profiles_new(ds,
                                filt_time=profile_filt_time,
                                profile_min_time=profile_min_time)

    ds = utils.get_derived_eos_raw(ds)

    ds = ds.assign_coords(longitude=ds.longitude)
    ds = ds.assign_coords(latitude=ds.latitude)
    ds = ds.assign_coords(depth=ds.depth)
    #ds = ds._get_distance_over_ground(ds)

    ds = utils.fill_metadata(ds, deployment['metadata'])

    # somehow this comes out unsorted:
    ds = ds.sortby(ds.time)

    start = ((ds['time'].values[0]).astype('timedelta64[s]') +
             np.datetime64('1970-01-01T00:00:00'))
    end = ((ds['time'].values[-1]).astype('timedelta64[s]') +
           np.datetime64('1970-01-01T00:00:00'))

    ds.attrs['deployment_start'] = str(start)
    ds.attrs['deployment_end'] = str(end)

    try:
        os.mkdir(outdir)
    except:
        pass
    id0 = ds.attrs['deployment_name']
    outname = outdir + id0 + '_L0.nc'
    _log.info('writing %s', outname)
    ds.to_netcdf(outname, 'w')

    return outname
Beispiel #5
0
def raw_to_L0timeseries(indir, outdir, deploymentyaml, *,
                        profile_filt_time=100, profile_min_time=300):
    """
    Parameters
    ----------
    indir : string
        Directory with raw netcdf files.
    outdir : string
        Directory to put the merged timeseries files.
    profile_filt_time : float
        time in seconds over which to smooth the pressure time series for
        finding up and down profiles (note, doesn't filter the data that is
        saved)
    profile_min_time : float
        minimum time to consider a profile an actual profile (seconds)

    Returns
    -------
    outname : string
        name of the new merged netcdf file.
    """

    with open(deploymentyaml) as fin:
        deployment = yaml.safe_load(fin)
    metadata = deployment['metadata']
    ncvar = deployment['netcdf_variables']
    thenames = list(ncvar.keys())
    thenames.remove('time')

    id = metadata['glider_name'] + metadata['glider_serial']

    id0 = None
    prev_profile = 0
    for mnum in range(0, 500):
        if 1:
            ebdn = indir + '/' + id + f'-{mnum:04d}-rawebd.nc'
            dbdn = indir + '/' + id + f'-{mnum:04d}-rawdbd.nc'
            print(ebdn, dbdn)
            if os.path.exists(ebdn) and os.path.exists(dbdn):
                print('Opening:', ebdn, dbdn)
                ebd = xr.open_dataset(ebdn, decode_times=False)
                dbd = xr.open_dataset(dbdn, decode_times=False)
                print('DBD', dbd, dbd.m_depth)
                if len(ebd.time) > 2:
                    # build a new data set based on info in `deployment.`
                    # We will use ebd.m_present_time as the interpolant if the
                    # variabel is in dbd.

                    ds = xr.Dataset()
                    attr = {}
                    name = 'time'
                    for atts in ncvar[name].keys():
                        if atts != 'coordinates':
                            attr[atts] = ncvar[name][atts]
                    ds[name] = (('time'), ebd[name].values, attr)

                    for name in thenames:
                        _log.info('working on %s', name)
                        if not('method' in ncvar[name].keys()):
                            # variables that are in the data set or can be interpolated from it
                            if 'conversion' in ncvar[name].keys():
                                convert = getattr(utils, ncvar[name]['conversion'])
                            else:
                                convert = utils._passthrough
                            sensorname = ncvar[name]['source']
                            _log.info('names: %s %s', name, sensorname)
                            if sensorname in ebd.keys():
                                _log.debug('EBD sensorname %s', sensorname)
                                val = ebd[sensorname]
                                val = utils._zero_screen(val)
                        #        val[val==0] = np.NaN
                                val = convert(val)
                            else:
                                _log.debug('DBD sensorname %s', sensorname)
                                val = convert(dbd[sensorname])
                                val = _dbd2ebd(dbd, ds, val)
                                ncvar['method'] = 'linear fill'
                            # make the attributes:
                            ncvar[name].pop('coordinates', None)
                            attrs = ncvar[name]
                            attrs = utils.fill_required_attrs(attrs)
                            ds[name] = (('time'), val, attrs)

                    print('HERE', ds)
                    print('HERE', ds.pressure[0:100])
                    # some derived variables:
                    # trim bad times...
                    #ds = ds.sel(time=slice(1e8, None))

                    ds = utils.get_glider_depth(ds)

                    ds = utils.get_distance_over_ground(ds)

                    # ds = utils.get_profiles(ds)
                    # ds['profile_index'] = ds.profile_index + prev_profile

                    #ind = np.where(np.isfinite(ds.profile_index))[0]
                    #prev_profile = ds.profile_index.values[ind][-1]
                    ds = utils.get_derived_eos_raw(ds)
                    ds = ds.assign_coords(longitude=ds.longitude)
                    ds = ds.assign_coords(latitude=ds.latitude)
                    ds = ds.assign_coords(depth=ds.depth)

                    #ds = ds._get_distance_over_ground(ds)
                    ds = utils.fill_metadata(ds, deployment['metadata'])
                    try:
                        os.mkdir(outdir)
                    except:
                        pass
                    outname = (outdir + '/' + ds.attrs['deployment_name'] +
                                f'-M{mnum:04d}_L0.nc')
                    _log.info('writing %s', outname)
                    ds.to_netcdf(outname, 'w')
                    if id0 is None:
                        id0 = ds.attrs['deployment_name']

    # now merge:
    with xr.open_mfdataset(outdir + '/' + id + '*-M*_L0.nc', decode_times=False, lock=False) as ds:
        print(ds.attrs)

        # put the real start and end times:
        start = ((ds['time'].values[0]).astype('timedelta64[s]') +
            np.datetime64('1970-01-01T00:00:00'))
        end = ((ds['time'].values[-1]).astype('timedelta64[s]')  +
            np.datetime64('1970-01-01T00:00:00'))

        ds.attrs['deployment_start'] = str(start)
        ds.attrs['deployment_end'] = str(end)
        print(ds.depth.values[:100])
        print(ds.depth.values[2000:2100])
        ds = utils.get_profiles_new(ds,
                filt_time=profile_filt_time, profile_min_time=profile_min_time)
        print(ds.depth.values[:100])
        print(ds.depth.values[2000:2100])

        outname = outdir + '/' + id0 + '.nc'
        print(outname)
        ds.to_netcdf(outname)
    return outname