Example #1
0
def proc_ctd(fname, compression='gzip', below_water=True):
    """Quick `proc_ctd` function."""
    # 00-Split, clean 'bad pump' data, and apply flag.
    cast = DataFrame.from_cnv(fname, compression=compression,
                              below_water=below_water).split()[0]
    cast = cast[cast['pumps']]
    cast = cast[~cast['flag']]  # True for bad values.
    name = os.path.basename(fname).split('.')[0]

    # Removed unwanted columns.
    keep = set(['altM', 'c0S/m', 'dz/dtM', 'wetCDOM', 'latitude',
                'longitude', 'sbeox0Mm/Kg', 'sbeox1Mm/Kg', 'oxsolMm/Kg',
                'oxsatMm/Kg', 'par', 'pla', 'sva', 't090C', 't190C', 'tsa',
                'sbeox0V'])
    drop = keep.symmetric_difference(cast.columns)

    cast.drop(drop, axis=1, inplace=True)

    # Smooth velocity with a 2 seconds windows.
    cast['dz/dtM'] = movingaverage(cast['dz/dtM'], window_size=48)

    # 01-Filter pressure.
    kw = dict(sample_rate=24.0, time_constant=0.15)
    cast.index = lp_filter(cast.index, **kw)

    # 02-Remove pressure reversals.
    cast = cast.press_check()
    cast = cast.dropna()

    # 03-Loop Edit.
    cast = cast[cast['dz/dtM'] >= 0.25]  # Threshold velocity.

    # 04-Remove spikes.
    kw = dict(n1=2, n2=20, block=100)
    cast = cast.apply(Series.despike, **kw)

    # 05-Bin-average.
    cast = cast.apply(Series.bindata, **dict(delta=1.))

    # 06-interpolate.
    cast = cast.apply(Series.interpolate)

    if False:
        # 07-Smooth.
        pmax = max(cast.index)
        if pmax >= 500.:
            window_len = 21
        elif pmax >= 100.:
            window_len = 11
        else:
            window_len = 5
        kw = dict(window_len=window_len, window='hanning')
        cast = cast.apply(Series.smooth, **kw)

    # 08-Derive.
    cast.lat = cast['latitude'].mean()
    cast.lon = cast['longitude'].mean()
    cast = derive_cnv(cast)
    cast.name = name
    return cast
Example #2
0
def proc_ctd(fname, compression='gzip', below_water=True):
    """
    Quick `proc_ctd` function.

    """
    # 00-Split, clean 'bad pump' data, and apply flag.

    cast = DataFrame.from_cnv(fname, compression=compression,
                              below_water=below_water).split()[0]

    name = os.path.basename(fname).split('.')[0]
    cast = cast[cast['pumps']]
    cast = cast[~cast['flag']]  # True for bad values.

    # Smooth velocity with a 2 seconds windows.
    cast['dz/dtM'] = movingaverage(cast['dz/dtM'], window_size=48)

    # 01-Filter pressure.
    kw = dict(sample_rate=24.0, time_constant=0.15)
    cast.index = lp_filter(cast.index, **kw)

    # 02-Remove pressure reversals.
    cast = cast.press_check()
    cast = cast.dropna()

    # 03-Loop Edit.
    cast = cast[cast['dz/dtM'] >= 0.25]  # Threshold velocity.

    # 04-Remove spikes.
    kw = dict(n1=2, n2=20, block=100)
    cast = cast.apply(Series.despike, **kw)

    # 05-Bin-average.
    cast = cast.apply(Series.bindata, **dict(delta=1.))

    # 06-interpolate.
    cast = cast.apply(Series.interpolate)

    if False:
        # 07-Smooth.
        pmax = max(cast.index)
        if pmax >= 500.:
            window_len = 21
        elif pmax >= 100.:
            window_len = 11
        else:
            window_len = 5
        kw = dict(window_len=window_len, window='hanning')
        cast = cast.apply(Series.smooth, **kw)

    # 08-Derive.
    cast.lat = cast['latitude'].mean()
    cast.lon = cast['longitude'].mean()
    cast = derive_cnv(cast)
    cast.name = name
    return cast
Example #3
0
def proc_ctd(fname, compression="gzip", below_water=True):
    """Quick `proc_ctd` function."""
    # 00-Split, clean 'bad pump' data, and apply flag.
    cast = DataFrame.from_cnv(fname, compression=compression, below_water=below_water).split()[0]
    cast = cast[cast["pumps"]]
    cast = cast[~cast["flag"]]  # True for bad values.
    name = os.path.basename(fname).split(".")[0]

    # Removed unwanted columns.
    keep = set(
        [
            "altM",
            "c0S/m",
            "dz/dtM",
            "wetCDOM",
            "latitude",
            "longitude",
            "sbeox0Mm/Kg",
            "sbeox1Mm/Kg",
            "oxsolMm/Kg",
            "oxsatMm/Kg",
            "par",
            "pla",
            "sva",
            "t090C",
            "t190C",
            "tsa",
            "sbeox0V",
        ]
    )

    null = map(cast.pop, keep.symmetric_difference(cast.columns))
    del null

    # Smooth velocity with a 2 seconds windows.
    cast["dz/dtM"] = movingaverage(cast["dz/dtM"], window_size=48)

    # 01-Filter pressure.
    kw = dict(sample_rate=24.0, time_constant=0.15)
    cast.index = lp_filter(cast.index, **kw)

    # 02-Remove pressure reversals.
    cast = cast.press_check()
    cast = cast.dropna()

    # 03-Loop Edit.
    cast = cast[cast["dz/dtM"] >= 0.25]  # Threshold velocity.

    # 04-Remove spikes.
    kw = dict(n1=2, n2=20, block=100)
    cast = cast.apply(Series.despike, **kw)

    # 05-Bin-average.
    cast = cast.apply(Series.bindata, **dict(delta=1.0))

    # 06-interpolate.
    cast = cast.apply(Series.interpolate)

    if False:
        # 07-Smooth.
        pmax = max(cast.index)
        if pmax >= 500.0:
            window_len = 21
        elif pmax >= 100.0:
            window_len = 11
        else:
            window_len = 5
        kw = dict(window_len=window_len, window="hanning")
        cast = cast.apply(Series.smooth, **kw)

    # 08-Derive.
    cast.lat = cast["latitude"].mean()
    cast.lon = cast["longitude"].mean()
    cast = derive_cnv(cast)
    cast.name = name
    return cast
Example #4
0
def proc_ctd(fname, below_water=True):
    """
    Quick `proc_ctd` function.

    """
    # 00-Split, clean 'bad pump' data, and apply flag.

    cast = DataFrame.from_cnv(
        fname,
        below_water=below_water
        ).split()[0]

    name = Path(fname).stem
    cast = cast[cast['pumps']]
    cast = cast[~cast['flag']]  # True for bad values.

    # Smooth velocity with a 2 seconds windows.
    cast['dz/dtM'] = movingaverage(cast['dz/dtM'], window_size=48)

    # 01-Filter pressure.
    kw = {
        'sample_rate': 24.0,
        'time_constant': 0.15
    }
    cast.index = lp_filter(cast.index, **kw)

    # 02-Remove pressure reversals.
    cast = cast.press_check()
    cast = cast.dropna()

    # 03-Loop Edit.
    cast = cast[cast['dz/dtM'] >= 0.25]  # Threshold velocity.

    # 04-Remove spikes.
    kw = {
        'n1': 2,
        'n2': 20,
        'block': 100
    }
    cast = cast.apply(Series.despike, **kw)

    # 05-Bin-average.
    cast = cast.apply(Series.bindata, **{'delta': 1.})

    # 06-interpolate.
    cast = cast.apply(Series.interpolate)

    if False:
        # 07-Smooth.
        pmax = max(cast.index)
        if pmax >= 500.:
            window_len = 21
        elif pmax >= 100.:
            window_len = 11
        else:
            window_len = 5
            kw = {
                'window_len': window_len,
                'window': 'hanning'
            }
        cast = cast.apply(Series.smooth, **kw)

    # 08-Derive.
    cast.lat = cast['latitude'].mean()
    cast.lon = cast['longitude'].mean()
    cast = derive_cnv(cast)
    cast.name = name
    return cast
Example #5
0
def qualityCheckStation(filename, dateObject, station, CTDConfig):
    # if CTDConfig.debug:
    #  print("=> Opening input file: %s" % (filename))

    cast, metadata = ctd.from_saiv(filename)
    downcast, upcast = cast.split()

    if station.name in ["OKS2"] and dateObject.year == 2019:
        upcast = downcast

    if (not downcast.empty
            and CTDConfig.useDowncast) or (not upcast.empty
                                           and not CTDConfig.useDowncast):
        if CTDConfig.useDowncast:
            downcast_copy = downcast.copy()

            downcast_copy['dz/dtM'] = movingaverage(downcast['dz/dtM'],
                                                    window_size=1)
            downcast['dz/dtM'].loc[downcast_copy['dz/dtM'] == np.nan].fillna(0)
            downcast['dz/dtM'].replace([np.inf, -np.inf], 0.5)

            downcast = downcast[downcast['dz/dtM'] >=
                                0.05]  # Threshold velocity.
            window = okokyst_tools.findMaximumWindow(downcast,
                                                     CTDConfig.tempName)
            window = 10

            temperature = downcast[CTDConfig.tempName] \
                .remove_above_water() \
                .despike(n1=2, n2=20, block=window) \
                .interpolate(method='index', \
                             limit_direction='both',
                             limit_area='inside') \
                .smooth(window_len=2, window='hanning')
            salinity = downcast[CTDConfig.saltName] \
                .remove_above_water() \
                .despike(n1=2, n2=20, block=window) \
                .interpolate(method='index', \
                             limit_direction='both',
                             limit_area='inside') \
                .smooth(window_len=2, window='hanning')

            # Make sure that oxygen is in ml O2/L
            if 'OxMgL' in downcast.columns:
                df = downcast.astype({'OxMgL': float})
                df['OxMgL'] = df.OxMgL.values / 1.42905
                df = sm.to_rename_columns(df, 'OxMgL', 'OxMlL')
            elif 'OxMlL' in downcast.columns:
                df = downcast.astype({'OxMlL': float})
            else:
                raise Exception(
                    "Unable to find oxygen in dataformat: {}".format(
                        downcast.columns))

            oxygen = df["OxMlL"] \
                .remove_above_water() \
                .despike(n1=2, n2=20, block=window) \
                .interpolate(method='index', \
                             limit_direction='both',
                             limit_area='inside') \
                .smooth(window_len=2, window='hanning')
            oxsat = downcast[CTDConfig.oxsatName] \
                .remove_above_water() \
                .despike(n1=2, n2=20, block=window) \
                .interpolate(method='index', \
                             limit_direction='both',
                             limit_area='inside') \
                .smooth(window_len=2, window='hanning')
            ftu = downcast[CTDConfig.ftuName] \
                .remove_above_water() \
                .despike(n1=2, n2=20, block=window) \
                .interpolate(method='index', \
                             limit_direction='both',
                             limit_area='inside') \
                .smooth(window_len=2, window='hanning')

            if CTDConfig.showStats:
                print(
                    "=> STATS FOR DOWNCAST TEMP at %s:\n %s" %
                    (station.name, downcast[[CTDConfig.tempName]].describe()))
                print(
                    "=> STATS FOR DOWNCAST SALT at %s:\n %s" %
                    (station.name, downcast[[CTDConfig.saltName]].describe()))

                if 'OxMgL' in downcast.columns:
                    print("=> STATS FOR DOWNCAST OXYGEN at %s:\n %s" %
                          (station.name, downcast[["OxMgL"]].describe()))
                else:
                    print("=> STATS FOR DOWNCAST OXYGEN at %s:\n %s" %
                          (station.name, downcast[["OxMlL"]].describe()))
                print("=> STATS FOR DOWNCAST FTU at %s:\n %s" %
                      (station.name, downcast[[CTDConfig.ftuName]].describe()))
        else:

            upcast['dz/dtM'] = movingaverage(upcast['dz/dtM'], window_size=2)
            #    upcast['dz/dtM'] = upcast['dz/dtM'].fillna(0)
            upcast['dz/dtM'] = upcast['dz/dtM'].replace([np.inf, -np.inf], 0.5)
            upcast = upcast[upcast['dz/dtM'] >= 0.05]  # Threshold velocity.

            window = okokyst_tools.findMaximumWindow(upcast,
                                                     CTDConfig.tempName)

            temperature = upcast[CTDConfig.tempName].despike(n1=1,
                                                             n2=20,
                                                             block=window)
            salinity = upcast[CTDConfig.saltName].despike(n1=1,
                                                          n2=20,
                                                          block=window)
            oxygen = upcast[CTDConfig.oxName].despike(n1=1,
                                                      n2=20,
                                                      block=window)
            oxsat = upcast[CTDConfig.oxsatName].despike(n1=1,
                                                        n2=20,
                                                        block=window)
            ftu = upcast[CTDConfig.ftuName].despike(n1=2, n2=10, block=window)

            if CTDConfig.showStats:
                print("=> STATS FOR UPCAST TEMP at %s:\n %s" %
                      (station.name, upcast[[CTDConfig.tempName]].describe()))
                print("=> STATS FOR UPCAST SALT at %s:\n %s" %
                      (station.name, upcast[[CTDConfig.saltName]].describe()))
                print("=> STATS FOR UPCAST OXYGEN at %s:\n %s" %
                      (station.name, upcast[[CTDConfig.oxName]].describe()))
                print("=> STATS FOR UPCAST FTU at %s:\n %s" %
                      (station.name, upcast[[CTDConfig.ftuName]].describe()))

        # Binning
        delta = 1
        if CTDConfig.survey == "Soerfjorden":
            window_len = 1
        if CTDConfig.survey in ["Sognefjorden", "Hardangerfjorden", "MON"]:
            window_len = 10

        # Smoothing and interpolation
        temperature = temperature.interpolate(method='linear')
        # temperature = temperature.smooth(window_len=window_len, window='hanning')

        oxygen = oxygen.interpolate(method='linear')
        # oxygen = oxygen.smooth(window_len=window_len, window='hanning')

        oxsat = oxsat.interpolate(method='linear')
        # oxsat = oxsat.smooth(window_len=window_len, window='hanning')

        ftu = ftu.interpolate(method='linear')
        # ftu = ftu.smooth(window_len=1, window='hanning')

        salinity = salinity.interpolate(method='linear')
        #  salinity = salinity.smooth(window_len=window_len, window='hanning')

        # Bin the data to delta intervals
        temperature = temperature.bindata(delta=delta, method='interpolate')
        salinity = salinity.bindata(delta=delta, method='interpolate')
        oxygen = oxygen.bindata(delta=delta, method='interpolate')
        oxsat = oxsat.bindata(delta=delta, method='interpolate')

        if station.name not in ['SJON1', 'SJON2']:
            ftu = ftu.bindata(delta=delta, method='interpolate')

        df = pd.DataFrame(index=salinity.index,
                          columns=[
                              "Depth", "Temperature", "Salinity", "Oxygen",
                              "Oxsat", "FTU"
                          ])
        # df = df.fillna(0)
        # df = df.reset_index(drop=True)

        # oxsat = oxsat.reset_index(drop=True)
        df["Depth"] = salinity.index
        df["Temperature"] = temperature
        df["Salinity"] = salinity
        df["Oxygen"] = oxygen
        df["Oxsat"] = oxsat
        if station.name not in ['SJON1', 'SJON2']:
            df["FTU"] = ftu

        # Add data to station object for later
        station.addData(
            salinity, temperature, oxygen, oxsat, ftu, salinity.index,
            date2num(dateObject, CTDConfig.refdate, calendar="standard"))
        return df