def proc_ctd(fname, compression='gzip', below_water=True): """Quick `proc_ctd` function.""" # 00-Split, clean 'bad pump' data, and apply flag. cast = DataFrame.from_cnv(fname, compression=compression, below_water=below_water).split()[0] cast = cast[cast['pumps']] cast = cast[~cast['flag']] # True for bad values. name = os.path.basename(fname).split('.')[0] # Removed unwanted columns. keep = set(['altM', 'c0S/m', 'dz/dtM', 'wetCDOM', 'latitude', 'longitude', 'sbeox0Mm/Kg', 'sbeox1Mm/Kg', 'oxsolMm/Kg', 'oxsatMm/Kg', 'par', 'pla', 'sva', 't090C', 't190C', 'tsa', 'sbeox0V']) drop = keep.symmetric_difference(cast.columns) cast.drop(drop, axis=1, inplace=True) # Smooth velocity with a 2 seconds windows. cast['dz/dtM'] = movingaverage(cast['dz/dtM'], window_size=48) # 01-Filter pressure. kw = dict(sample_rate=24.0, time_constant=0.15) cast.index = lp_filter(cast.index, **kw) # 02-Remove pressure reversals. cast = cast.press_check() cast = cast.dropna() # 03-Loop Edit. cast = cast[cast['dz/dtM'] >= 0.25] # Threshold velocity. # 04-Remove spikes. kw = dict(n1=2, n2=20, block=100) cast = cast.apply(Series.despike, **kw) # 05-Bin-average. cast = cast.apply(Series.bindata, **dict(delta=1.)) # 06-interpolate. cast = cast.apply(Series.interpolate) if False: # 07-Smooth. pmax = max(cast.index) if pmax >= 500.: window_len = 21 elif pmax >= 100.: window_len = 11 else: window_len = 5 kw = dict(window_len=window_len, window='hanning') cast = cast.apply(Series.smooth, **kw) # 08-Derive. cast.lat = cast['latitude'].mean() cast.lon = cast['longitude'].mean() cast = derive_cnv(cast) cast.name = name return cast
def proc_ctd(fname, compression='gzip', below_water=True): """ Quick `proc_ctd` function. """ # 00-Split, clean 'bad pump' data, and apply flag. cast = DataFrame.from_cnv(fname, compression=compression, below_water=below_water).split()[0] name = os.path.basename(fname).split('.')[0] cast = cast[cast['pumps']] cast = cast[~cast['flag']] # True for bad values. # Smooth velocity with a 2 seconds windows. cast['dz/dtM'] = movingaverage(cast['dz/dtM'], window_size=48) # 01-Filter pressure. kw = dict(sample_rate=24.0, time_constant=0.15) cast.index = lp_filter(cast.index, **kw) # 02-Remove pressure reversals. cast = cast.press_check() cast = cast.dropna() # 03-Loop Edit. cast = cast[cast['dz/dtM'] >= 0.25] # Threshold velocity. # 04-Remove spikes. kw = dict(n1=2, n2=20, block=100) cast = cast.apply(Series.despike, **kw) # 05-Bin-average. cast = cast.apply(Series.bindata, **dict(delta=1.)) # 06-interpolate. cast = cast.apply(Series.interpolate) if False: # 07-Smooth. pmax = max(cast.index) if pmax >= 500.: window_len = 21 elif pmax >= 100.: window_len = 11 else: window_len = 5 kw = dict(window_len=window_len, window='hanning') cast = cast.apply(Series.smooth, **kw) # 08-Derive. cast.lat = cast['latitude'].mean() cast.lon = cast['longitude'].mean() cast = derive_cnv(cast) cast.name = name return cast
def proc_ctd(fname, compression="gzip", below_water=True): """Quick `proc_ctd` function.""" # 00-Split, clean 'bad pump' data, and apply flag. cast = DataFrame.from_cnv(fname, compression=compression, below_water=below_water).split()[0] cast = cast[cast["pumps"]] cast = cast[~cast["flag"]] # True for bad values. name = os.path.basename(fname).split(".")[0] # Removed unwanted columns. keep = set( [ "altM", "c0S/m", "dz/dtM", "wetCDOM", "latitude", "longitude", "sbeox0Mm/Kg", "sbeox1Mm/Kg", "oxsolMm/Kg", "oxsatMm/Kg", "par", "pla", "sva", "t090C", "t190C", "tsa", "sbeox0V", ] ) null = map(cast.pop, keep.symmetric_difference(cast.columns)) del null # Smooth velocity with a 2 seconds windows. cast["dz/dtM"] = movingaverage(cast["dz/dtM"], window_size=48) # 01-Filter pressure. kw = dict(sample_rate=24.0, time_constant=0.15) cast.index = lp_filter(cast.index, **kw) # 02-Remove pressure reversals. cast = cast.press_check() cast = cast.dropna() # 03-Loop Edit. cast = cast[cast["dz/dtM"] >= 0.25] # Threshold velocity. # 04-Remove spikes. kw = dict(n1=2, n2=20, block=100) cast = cast.apply(Series.despike, **kw) # 05-Bin-average. cast = cast.apply(Series.bindata, **dict(delta=1.0)) # 06-interpolate. cast = cast.apply(Series.interpolate) if False: # 07-Smooth. pmax = max(cast.index) if pmax >= 500.0: window_len = 21 elif pmax >= 100.0: window_len = 11 else: window_len = 5 kw = dict(window_len=window_len, window="hanning") cast = cast.apply(Series.smooth, **kw) # 08-Derive. cast.lat = cast["latitude"].mean() cast.lon = cast["longitude"].mean() cast = derive_cnv(cast) cast.name = name return cast
def proc_ctd(fname, below_water=True): """ Quick `proc_ctd` function. """ # 00-Split, clean 'bad pump' data, and apply flag. cast = DataFrame.from_cnv( fname, below_water=below_water ).split()[0] name = Path(fname).stem cast = cast[cast['pumps']] cast = cast[~cast['flag']] # True for bad values. # Smooth velocity with a 2 seconds windows. cast['dz/dtM'] = movingaverage(cast['dz/dtM'], window_size=48) # 01-Filter pressure. kw = { 'sample_rate': 24.0, 'time_constant': 0.15 } cast.index = lp_filter(cast.index, **kw) # 02-Remove pressure reversals. cast = cast.press_check() cast = cast.dropna() # 03-Loop Edit. cast = cast[cast['dz/dtM'] >= 0.25] # Threshold velocity. # 04-Remove spikes. kw = { 'n1': 2, 'n2': 20, 'block': 100 } cast = cast.apply(Series.despike, **kw) # 05-Bin-average. cast = cast.apply(Series.bindata, **{'delta': 1.}) # 06-interpolate. cast = cast.apply(Series.interpolate) if False: # 07-Smooth. pmax = max(cast.index) if pmax >= 500.: window_len = 21 elif pmax >= 100.: window_len = 11 else: window_len = 5 kw = { 'window_len': window_len, 'window': 'hanning' } cast = cast.apply(Series.smooth, **kw) # 08-Derive. cast.lat = cast['latitude'].mean() cast.lon = cast['longitude'].mean() cast = derive_cnv(cast) cast.name = name return cast
def qualityCheckStation(filename, dateObject, station, CTDConfig): # if CTDConfig.debug: # print("=> Opening input file: %s" % (filename)) cast, metadata = ctd.from_saiv(filename) downcast, upcast = cast.split() if station.name in ["OKS2"] and dateObject.year == 2019: upcast = downcast if (not downcast.empty and CTDConfig.useDowncast) or (not upcast.empty and not CTDConfig.useDowncast): if CTDConfig.useDowncast: downcast_copy = downcast.copy() downcast_copy['dz/dtM'] = movingaverage(downcast['dz/dtM'], window_size=1) downcast['dz/dtM'].loc[downcast_copy['dz/dtM'] == np.nan].fillna(0) downcast['dz/dtM'].replace([np.inf, -np.inf], 0.5) downcast = downcast[downcast['dz/dtM'] >= 0.05] # Threshold velocity. window = okokyst_tools.findMaximumWindow(downcast, CTDConfig.tempName) window = 10 temperature = downcast[CTDConfig.tempName] \ .remove_above_water() \ .despike(n1=2, n2=20, block=window) \ .interpolate(method='index', \ limit_direction='both', limit_area='inside') \ .smooth(window_len=2, window='hanning') salinity = downcast[CTDConfig.saltName] \ .remove_above_water() \ .despike(n1=2, n2=20, block=window) \ .interpolate(method='index', \ limit_direction='both', limit_area='inside') \ .smooth(window_len=2, window='hanning') # Make sure that oxygen is in ml O2/L if 'OxMgL' in downcast.columns: df = downcast.astype({'OxMgL': float}) df['OxMgL'] = df.OxMgL.values / 1.42905 df = sm.to_rename_columns(df, 'OxMgL', 'OxMlL') elif 'OxMlL' in downcast.columns: df = downcast.astype({'OxMlL': float}) else: raise Exception( "Unable to find oxygen in dataformat: {}".format( downcast.columns)) oxygen = df["OxMlL"] \ .remove_above_water() \ .despike(n1=2, n2=20, block=window) \ .interpolate(method='index', \ limit_direction='both', limit_area='inside') \ .smooth(window_len=2, window='hanning') oxsat = downcast[CTDConfig.oxsatName] \ .remove_above_water() \ .despike(n1=2, n2=20, block=window) \ .interpolate(method='index', \ limit_direction='both', limit_area='inside') \ .smooth(window_len=2, window='hanning') ftu = downcast[CTDConfig.ftuName] \ .remove_above_water() \ .despike(n1=2, n2=20, block=window) \ .interpolate(method='index', \ limit_direction='both', limit_area='inside') \ .smooth(window_len=2, window='hanning') if CTDConfig.showStats: print( "=> STATS FOR DOWNCAST TEMP at %s:\n %s" % (station.name, downcast[[CTDConfig.tempName]].describe())) print( "=> STATS FOR DOWNCAST SALT at %s:\n %s" % (station.name, downcast[[CTDConfig.saltName]].describe())) if 'OxMgL' in downcast.columns: print("=> STATS FOR DOWNCAST OXYGEN at %s:\n %s" % (station.name, downcast[["OxMgL"]].describe())) else: print("=> STATS FOR DOWNCAST OXYGEN at %s:\n %s" % (station.name, downcast[["OxMlL"]].describe())) print("=> STATS FOR DOWNCAST FTU at %s:\n %s" % (station.name, downcast[[CTDConfig.ftuName]].describe())) else: upcast['dz/dtM'] = movingaverage(upcast['dz/dtM'], window_size=2) # upcast['dz/dtM'] = upcast['dz/dtM'].fillna(0) upcast['dz/dtM'] = upcast['dz/dtM'].replace([np.inf, -np.inf], 0.5) upcast = upcast[upcast['dz/dtM'] >= 0.05] # Threshold velocity. window = okokyst_tools.findMaximumWindow(upcast, CTDConfig.tempName) temperature = upcast[CTDConfig.tempName].despike(n1=1, n2=20, block=window) salinity = upcast[CTDConfig.saltName].despike(n1=1, n2=20, block=window) oxygen = upcast[CTDConfig.oxName].despike(n1=1, n2=20, block=window) oxsat = upcast[CTDConfig.oxsatName].despike(n1=1, n2=20, block=window) ftu = upcast[CTDConfig.ftuName].despike(n1=2, n2=10, block=window) if CTDConfig.showStats: print("=> STATS FOR UPCAST TEMP at %s:\n %s" % (station.name, upcast[[CTDConfig.tempName]].describe())) print("=> STATS FOR UPCAST SALT at %s:\n %s" % (station.name, upcast[[CTDConfig.saltName]].describe())) print("=> STATS FOR UPCAST OXYGEN at %s:\n %s" % (station.name, upcast[[CTDConfig.oxName]].describe())) print("=> STATS FOR UPCAST FTU at %s:\n %s" % (station.name, upcast[[CTDConfig.ftuName]].describe())) # Binning delta = 1 if CTDConfig.survey == "Soerfjorden": window_len = 1 if CTDConfig.survey in ["Sognefjorden", "Hardangerfjorden", "MON"]: window_len = 10 # Smoothing and interpolation temperature = temperature.interpolate(method='linear') # temperature = temperature.smooth(window_len=window_len, window='hanning') oxygen = oxygen.interpolate(method='linear') # oxygen = oxygen.smooth(window_len=window_len, window='hanning') oxsat = oxsat.interpolate(method='linear') # oxsat = oxsat.smooth(window_len=window_len, window='hanning') ftu = ftu.interpolate(method='linear') # ftu = ftu.smooth(window_len=1, window='hanning') salinity = salinity.interpolate(method='linear') # salinity = salinity.smooth(window_len=window_len, window='hanning') # Bin the data to delta intervals temperature = temperature.bindata(delta=delta, method='interpolate') salinity = salinity.bindata(delta=delta, method='interpolate') oxygen = oxygen.bindata(delta=delta, method='interpolate') oxsat = oxsat.bindata(delta=delta, method='interpolate') if station.name not in ['SJON1', 'SJON2']: ftu = ftu.bindata(delta=delta, method='interpolate') df = pd.DataFrame(index=salinity.index, columns=[ "Depth", "Temperature", "Salinity", "Oxygen", "Oxsat", "FTU" ]) # df = df.fillna(0) # df = df.reset_index(drop=True) # oxsat = oxsat.reset_index(drop=True) df["Depth"] = salinity.index df["Temperature"] = temperature df["Salinity"] = salinity df["Oxygen"] = oxygen df["Oxsat"] = oxsat if station.name not in ['SJON1', 'SJON2']: df["FTU"] = ftu # Add data to station object for later station.addData( salinity, temperature, oxygen, oxsat, ftu, salinity.index, date2num(dateObject, CTDConfig.refdate, calendar="standard")) return df