def timeseries_get_profiles(inname, profile_filt_time=100, profile_min_time=400): """ Parameters ---------- profile_filt_time : float how long a filter to apply to the pressure data in seconds profile_min_time : float how long a profile must last to be considered a proper profile (seconds) """ with xr.open_dataset(inname) as ds: ds = utils.get_profiles_new(ds, filt_time=profile_filt_time, profile_min_time=profile_min_time) ds.to_netcdf(inname, mode='a') return inname
def raw_to_L1timeseries(indir, outdir, deploymentyaml, profile_filt_len=7, profile_min_nsamples=14): """ """ with open(deploymentyaml) as fin: deployment = yaml.safe_load(fin) metadata = deployment['metadata'] ncvar = deployment['netcdf_variables'] thenames = list(ncvar.keys()) thenames.remove('time') id = metadata['glider_name'] + metadata['glider_serial'] id0 = None prev_profile = 0 for mnum in range(0, 500): if 1: ebdn = indir + '/' + id + f'-{mnum:04d}-rawebd.nc' dbdn = indir + '/' + id + f'-{mnum:04d}-rawdbd.nc' if os.path.exists(ebdn) and os.path.exists(dbdn): print('Opening:', ebdn, dbdn) ebd = xr.open_dataset(ebdn, decode_times=False) dbd = xr.open_dataset(dbdn, decode_times=False) if len(ebd.time) > 2: # build a new data set based on info in `deployment.` # We will use ebd.m_present_time as the interpolant if the # variabel is in dbd. ds = xr.Dataset() attr = {} name = 'time' for atts in ncvar[name].keys(): if atts != 'coordinates': attr[atts] = ncvar[name][atts] ds[name] = (('time'), ebd[name].values, attr) for name in thenames: _log.info('working on %s', name) if not ('method' in ncvar[name].keys()): # variables that are in the data set or can be interpolated from it if 'conversion' in ncvar[name].keys(): convert = getattr(utils, ncvar[name]['conversion']) else: convert = utils._passthrough sensorname = ncvar[name]['source'] _log.info('names: %s %s', name, sensorname) if sensorname in dbd.keys(): _log.debug('sensorname %s', sensorname) val = convert(dbd[sensorname]) val = _dbd2ebd(dbd, ds, val) ncvar['method'] = 'linear fill' else: val = ebd[sensorname] val = utils._zero_screen(val) # val[val==0] = np.NaN val = convert(val) # make the attributes: ncvar[name].pop('coordinates', None) attrs = ncvar[name] attrs = utils.fill_required_attrs(attrs) ds[name] = (('time'), val, attrs) # some derived variables: # trim bad times... ds = ds.sel(time=slice(1e8, None)) ds = utils.get_glider_depth(ds) ds = utils.get_distance_over_ground(ds) ds = utils.get_profiles_new( ds, filt_length=profile_filt_len, min_nsamples=profile_min_nsamples) # ds = utils.get_profiles(ds) ds.profile_index.values = ds.profile_index.values + prev_profile ind = np.where(np.isfinite(ds.profile_index))[0] prev_profile = ds.profile_index.values[ind][-1] ds = utils.get_derived_eos_raw(ds) ds = ds.assign_coords(longitude=ds.longitude) ds = ds.assign_coords(latitude=ds.latitude) ds = ds.assign_coords(depth=ds.depth) #ds = ds._get_distance_over_ground(ds) ds = utils.fill_metadata(ds, deployment['metadata']) try: os.mkdir('L1-timeseries') except: pass outname = ('L1-timeseries/' + ds.attrs['deployment_name'] + f'-M{mnum:04d}_L1.nc') _log.info('writing %s', outname) ds.to_netcdf(outname, 'w') if id0 is None: id0 = ds.attrs['deployment_name'] # now merge: with xr.open_mfdataset('L1-timeseries/' + id + '*-M*_L1.nc', decode_times=False) as ds: print(ds.attrs) # put the real start and end times: start = ((ds['time'].values[0]).astype('timedelta64[s]') + np.datetime64('1970-01-01T00:00:00')) end = ((ds['time'].values[-1]).astype('timedelta64[s]') + np.datetime64('1970-01-01T00:00:00')) ds.attrs['deployment_start'] = str(start) ds.attrs['deployment_end'] = str(end) outname = 'L1-timeseries/' + id0 + '_L1.nc' ds.to_netcdf(outname) return outname
def raw_to_timeseries(indir, outdir, deploymentyaml, kind='raw', profile_filt_time=100, profile_min_time=300): """ A little different than above, for the 4-file version of the data set. """ with open(deploymentyaml) as fin: deployment = yaml.safe_load(fin) metadata = deployment['metadata'] ncvar = deployment['netcdf_variables'] device_data = deployment['glider_devices'] id = metadata['glider_name'] _log.info(f'Opening combined nav file {indir}/{id}-rawgli.nc') gli = xr.open_dataset(f'{indir}/{id}-rawgli.nc') _log.info(f'Opening combined payload file {indir}/{id}-{kind}pld.nc') sensor = xr.open_dataset(f'{indir}/{id}-{kind}pld.nc') # build a new data set based on info in `deploymentyaml.` # We will use ctd as the interpolant ds = xr.Dataset() attr = {} name = 'time' for atts in ncvar[name].keys(): if atts != 'coordinates': attr[atts] = ncvar[name][atts] # If present, use the timebase specified in ncva: timebase in the # deployment yaml. Otherwise, the ctd will be our timebase. # It oversamples the nav data, but mildly undersamples the optics and # oxygen.... if 'timebase' in ncvar: indctd = np.where(~np.isnan(sensor[ncvar['timebase']['source']]))[0] elif 'GPCTD_TEMPERATURE' in list(sensor.variables): _log.warning('No timebase specified. Using GPCTD_TEMPERATURE as time' 'base') indctd = np.where(~np.isnan(sensor.GPCTD_TEMPERATURE))[0] elif 'LEGATO_TEMPERATURE' in list(sensor.variables): _log.warning('No timebase specified. Using LEGATO_TEMPERATURE as time' 'base') indctd = np.where(~np.isnan(sensor.LEGATO_TEMPERATURE))[0] else: _log.warning('No gpctd or legato data found. Using NAV_DEPTH as time' 'base') indctd = np.where(~np.isnan(sensor.NAV_DEPTH))[0] ds['time'] = (('time'), sensor['time'].values[indctd], attr) thenames = list(ncvar.keys()) for i in ['time', 'timebase', 'keep_variables']: if i in thenames: thenames.remove(i) for name in thenames: _log.info('interpolating ' + name) if not ('method' in ncvar[name].keys()): # variables that are in the data set or can be interpolated from it if 'conversion' in ncvar[name].keys(): convert = getattr(utils, ncvar[name]['conversion']) else: convert = utils._passthrough sensorname = ncvar[name]['source'] if sensorname in list(sensor.variables): _log.debug('sensorname %s', sensorname) val = convert(sensor[sensorname]) if 'coarsen' in ncvar[name]: # smooth oxygen data as originally perscribed coarsen_time = ncvar[name]['coarsen'] sensor_sub = sensor.coarsen(time=coarsen_time, boundary='trim').mean() val2 = sensor_sub[sensorname] val = _interp_gli_to_pld(sensor_sub, sensor, val2, indctd) val = val[indctd] ncvar['method'] = 'linear fill' else: val = gli[sensorname] val = convert(val) # Values from the glider netcdf must be interpolated to match # the sensor netcdf val = _interp_gli_to_pld(gli, ds, val, indctd) # make the attributes: ncvar[name].pop('coordinates', None) attrs = ncvar[name] attrs = utils.fill_required_attrs(attrs) ds[name] = (('time'), val.data, attrs) # fix lon and lat to be linearly interpolated between fixes good = np.where( np.abs(np.diff(ds.longitude)) + np.abs(np.diff(ds.latitude)) > 0)[0] + 1 ds['longitude'].values = np.interp(ds.time, ds.time[good], ds.longitude[good]) ds['latitude'].values = np.interp(ds.time, ds.time[good], ds.latitude[good]) # keep only timestamps with data from one of a set of variables if 'keep_variables' in ncvar: keeps = np.empty(len(ds.longitude)) keeps[:] = np.nan keeper_vars = ncvar['keep_variables'] for keep_var in keeper_vars: keeps[~np.isnan(ds[keep_var].values)] = 1 ds = ds.where(~np.isnan(keeps)) ds = ds.dropna(dim='time', how='all') # some derived variables: ds = utils.get_glider_depth(ds) ds = utils.get_distance_over_ground(ds) # ds = utils.get_profiles(ds) ds = utils.get_profiles_new(ds, filt_time=profile_filt_time, profile_min_time=profile_min_time) ds = utils.get_derived_eos_raw(ds) # Correct oxygen if present: if 'oxygen_concentration' in ncvar.keys(): if 'correct_oxygen' in ncvar['oxygen_concentration'].keys(): ds = utils.oxygen_concentration_correction(ds, ncvar) else: _log.warning('correct_oxygen not found in oxygen yaml. No' 'correction applied') ds = ds.assign_coords(longitude=ds.longitude) ds = ds.assign_coords(latitude=ds.latitude) ds = ds.assign_coords(depth=ds.depth) # ds = ds._get_distance_over_ground(ds) ds = utils.fill_metadata(ds, deployment['metadata'], device_data) # somehow this comes out unsorted: ds = ds.sortby(ds.time) start = ds['time'].values[0] end = ds['time'].values[-1] ds.attrs['deployment_start'] = str(start) ds.attrs['deployment_end'] = str(end) try: os.mkdir(outdir) except: pass id0 = ds.attrs['deployment_name'] outname = outdir + id0 + '.nc' _log.info('writing %s', outname) if 'units' in ds.time.attrs.keys(): ds.time.attrs.pop('units') if 'calendar' in ds.time.attrs.keys(): ds.time.attrs.pop('calendar') if 'ad2cp_time' in list(ds): if 'units' in ds.ad2cp_time.attrs.keys(): ds.ad2cp_time.attrs.pop('units') ds.to_netcdf( outname, 'w', encoding={'time': { 'units': 'seconds since 1970-01-01T00:00:00Z' }}) return outname
def raw_to_L0timeseries(indir, outdir, deploymentyaml, kind='raw', profile_filt_time=100, profile_min_time=300): """ A little different than above, for the 4-file version of the data set. """ with open(deploymentyaml) as fin: deployment = yaml.safe_load(fin) metadata = deployment['metadata'] ncvar = deployment['netcdf_variables'] id = metadata['glider_name'] gli = xr.open_dataset(indir + '/' + id + '-rawgli.nc', decode_times=False) ctd = xr.open_dataset(indir + '/' + id + '-' + kind + 'p_gpctd.nc', decode_times=False) arod = xr.open_dataset(indir + '/' + id + '-' + kind + 'p_arod.nc', decode_times=False) flb = xr.open_dataset(indir + '/' + id + '-' + kind + 'p_flbbcd.nc', decode_times=False) # build a new data set based on info in `deployment.` # We will use ebd.m_present_time as the interpolant if the # variabel is in dbd. ds = xr.Dataset() attr = {} name = 'time' for atts in ncvar[name].keys(): if atts != 'coordinates': attr[atts] = ncvar[name][atts] # the ctd will be our timebase. It oversamples the nav data, but # mildly undersamples the optics and oxygen.... indctd = np.where(~np.isnan(ctd.GPCTD_TEMPERATURE))[0] print('TIME', ctd['time']) ds[name] = (('time'), ctd[name].values[indctd], attr) print(ds['time']) thenames = list(ncvar.keys()) print(thenames) thenames.remove('time') for name in thenames: _log.info('interpolating ' + name) if not ('method' in ncvar[name].keys()): # variables that are in the data set or can be interpolated from it if 'conversion' in ncvar[name].keys(): convert = getattr(utils, ncvar[name]['conversion']) else: convert = utils._passthrough sensorname = ncvar[name]['source'] if sensorname in ctd.keys(): _log.debug('sensorname %s', sensorname) val = convert(ctd[sensorname]) val = _interp_pld_to_pld(ctd, ds, val, indctd) ncvar['method'] = 'linear fill' elif sensorname in arod.keys(): _log.debug('sensorname %s', sensorname) val = convert(arod[sensorname]) val = _interp_pld_to_pld(arod, ds, val, indctd) ncvar['method'] = 'linear fill' elif sensorname in flb.keys(): _log.debug('sensorname %s', sensorname) val = convert(flb[sensorname]) val = _interp_pld_to_pld(flb, ds, val, indctd) ncvar['method'] = 'linear fill' else: val = gli[sensorname] #val = utils._zero_screen(val) # val[val==0] = np.NaN val = convert(val) print('Gli', gli) val = _interp_gli_to_pld(gli, ds, val, indctd) # make the attributes: ncvar[name].pop('coordinates', None) attrs = ncvar[name] attrs = utils.fill_required_attrs(attrs) ds[name] = (('time'), val, attrs) # fix lon and lat to be linearly interpolated between fixes good = np.where( np.abs(np.diff(ds.longitude)) + np.abs(np.diff(ds.latitude)) > 0)[0] + 1 ds['longitude'].values = np.interp(ds.time, ds.time[good], ds.longitude[good]) ds['latitude'].values = np.interp(ds.time, ds.time[good], ds.latitude[good]) # some derived variables: ds = utils.get_glider_depth(ds) ds = utils.get_distance_over_ground(ds) # ds = utils.get_profiles(ds) ds = utils.get_profiles_new(ds, filt_time=profile_filt_time, profile_min_time=profile_min_time) ds = utils.get_derived_eos_raw(ds) ds = ds.assign_coords(longitude=ds.longitude) ds = ds.assign_coords(latitude=ds.latitude) ds = ds.assign_coords(depth=ds.depth) #ds = ds._get_distance_over_ground(ds) ds = utils.fill_metadata(ds, deployment['metadata']) # somehow this comes out unsorted: ds = ds.sortby(ds.time) start = ((ds['time'].values[0]).astype('timedelta64[s]') + np.datetime64('1970-01-01T00:00:00')) end = ((ds['time'].values[-1]).astype('timedelta64[s]') + np.datetime64('1970-01-01T00:00:00')) ds.attrs['deployment_start'] = str(start) ds.attrs['deployment_end'] = str(end) try: os.mkdir(outdir) except: pass id0 = ds.attrs['deployment_name'] outname = outdir + id0 + '_L0.nc' _log.info('writing %s', outname) ds.to_netcdf(outname, 'w') return outname
def raw_to_L0timeseries(indir, outdir, deploymentyaml, *, profile_filt_time=100, profile_min_time=300): """ Parameters ---------- indir : string Directory with raw netcdf files. outdir : string Directory to put the merged timeseries files. profile_filt_time : float time in seconds over which to smooth the pressure time series for finding up and down profiles (note, doesn't filter the data that is saved) profile_min_time : float minimum time to consider a profile an actual profile (seconds) Returns ------- outname : string name of the new merged netcdf file. """ with open(deploymentyaml) as fin: deployment = yaml.safe_load(fin) metadata = deployment['metadata'] ncvar = deployment['netcdf_variables'] thenames = list(ncvar.keys()) thenames.remove('time') id = metadata['glider_name'] + metadata['glider_serial'] id0 = None prev_profile = 0 for mnum in range(0, 500): if 1: ebdn = indir + '/' + id + f'-{mnum:04d}-rawebd.nc' dbdn = indir + '/' + id + f'-{mnum:04d}-rawdbd.nc' print(ebdn, dbdn) if os.path.exists(ebdn) and os.path.exists(dbdn): print('Opening:', ebdn, dbdn) ebd = xr.open_dataset(ebdn, decode_times=False) dbd = xr.open_dataset(dbdn, decode_times=False) print('DBD', dbd, dbd.m_depth) if len(ebd.time) > 2: # build a new data set based on info in `deployment.` # We will use ebd.m_present_time as the interpolant if the # variabel is in dbd. ds = xr.Dataset() attr = {} name = 'time' for atts in ncvar[name].keys(): if atts != 'coordinates': attr[atts] = ncvar[name][atts] ds[name] = (('time'), ebd[name].values, attr) for name in thenames: _log.info('working on %s', name) if not('method' in ncvar[name].keys()): # variables that are in the data set or can be interpolated from it if 'conversion' in ncvar[name].keys(): convert = getattr(utils, ncvar[name]['conversion']) else: convert = utils._passthrough sensorname = ncvar[name]['source'] _log.info('names: %s %s', name, sensorname) if sensorname in ebd.keys(): _log.debug('EBD sensorname %s', sensorname) val = ebd[sensorname] val = utils._zero_screen(val) # val[val==0] = np.NaN val = convert(val) else: _log.debug('DBD sensorname %s', sensorname) val = convert(dbd[sensorname]) val = _dbd2ebd(dbd, ds, val) ncvar['method'] = 'linear fill' # make the attributes: ncvar[name].pop('coordinates', None) attrs = ncvar[name] attrs = utils.fill_required_attrs(attrs) ds[name] = (('time'), val, attrs) print('HERE', ds) print('HERE', ds.pressure[0:100]) # some derived variables: # trim bad times... #ds = ds.sel(time=slice(1e8, None)) ds = utils.get_glider_depth(ds) ds = utils.get_distance_over_ground(ds) # ds = utils.get_profiles(ds) # ds['profile_index'] = ds.profile_index + prev_profile #ind = np.where(np.isfinite(ds.profile_index))[0] #prev_profile = ds.profile_index.values[ind][-1] ds = utils.get_derived_eos_raw(ds) ds = ds.assign_coords(longitude=ds.longitude) ds = ds.assign_coords(latitude=ds.latitude) ds = ds.assign_coords(depth=ds.depth) #ds = ds._get_distance_over_ground(ds) ds = utils.fill_metadata(ds, deployment['metadata']) try: os.mkdir(outdir) except: pass outname = (outdir + '/' + ds.attrs['deployment_name'] + f'-M{mnum:04d}_L0.nc') _log.info('writing %s', outname) ds.to_netcdf(outname, 'w') if id0 is None: id0 = ds.attrs['deployment_name'] # now merge: with xr.open_mfdataset(outdir + '/' + id + '*-M*_L0.nc', decode_times=False, lock=False) as ds: print(ds.attrs) # put the real start and end times: start = ((ds['time'].values[0]).astype('timedelta64[s]') + np.datetime64('1970-01-01T00:00:00')) end = ((ds['time'].values[-1]).astype('timedelta64[s]') + np.datetime64('1970-01-01T00:00:00')) ds.attrs['deployment_start'] = str(start) ds.attrs['deployment_end'] = str(end) print(ds.depth.values[:100]) print(ds.depth.values[2000:2100]) ds = utils.get_profiles_new(ds, filt_time=profile_filt_time, profile_min_time=profile_min_time) print(ds.depth.values[:100]) print(ds.depth.values[2000:2100]) outname = outdir + '/' + id0 + '.nc' print(outname) ds.to_netcdf(outname) return outname