Example #1
0
 def test_interpolate_gps(self):
     est_lat, est_lon = interpolate_gps(timestamps=masked_epoch(self.df.t),
                                        latitude=self.df.y,
                                        longitude=self.df.x)
     assert len(est_lat) == len(est_lon)
     assert len(est_lat) == self.df.y.size
     assert len(est_lon) == self.df.x.size
Example #2
0
    def test_density(self):
        sr = SlocumReader(ctd_filepath)
        df = sr.standardize()

        salinity = calculate_practical_salinity(
            sr.data.sci_water_cond,
            sr.data.sci_water_temp,
            sr.data.sci_water_pressure,
        )
        assert sr.data.sci_m_present_time.size == salinity.size

        est_lat, est_lon = interpolate_gps(timestamps=masked_epoch(df.t),
                                           latitude=df.y,
                                           longitude=df.x)

        density = calculate_density(sr.data.sci_water_temp,
                                    sr.data.sci_water_pressure, salinity,
                                    est_lat, est_lon)
        assert sr.data.sci_m_present_time.size == density.size
Example #3
0
    def standardize(self, gps_prefix=None):

        df = self.data.copy()

        # Convert NMEA coordinates to decimal degrees
        for col in df.columns:
            # Ignore if the m_gps_lat and/or m_gps_lon value is the default masterdata value
            if col.endswith('_lat'):
                df[col] = df[col].map(lambda x: get_decimal_degrees(x)
                                      if x <= 9000 else np.nan)
            elif col.endswith('_lon'):
                df[col] = df[col].map(lambda x: get_decimal_degrees(x)
                                      if x < 18000 else np.nan)

        # Standardize 'time' to the 't' column
        for t in self.TIMESTAMP_SENSORS:
            if t in df.columns:
                df['t'] = pd.to_datetime(df[t], unit='s')
                break

        # Interpolate GPS coordinates
        if 'm_gps_lat' in df.columns and 'm_gps_lon' in df.columns:

            df['drv_m_gps_lat'] = df.m_gps_lat.copy()
            df['drv_m_gps_lon'] = df.m_gps_lon.copy()

            # Fill in data will nulls where value is the default masterdata value
            masterdatas = (df.drv_m_gps_lon >= 18000) | (df.drv_m_gps_lat >
                                                         9000)
            df.loc[masterdatas, 'drv_m_gps_lat'] = np.nan
            df.loc[masterdatas, 'drv_m_gps_lon'] = np.nan

            try:
                # Interpolate the filled in 'x' and 'y'
                y_interp, x_interp = interpolate_gps(masked_epoch(df.t),
                                                     df.drv_m_gps_lat,
                                                     df.drv_m_gps_lon)
            except (ValueError, IndexError):
                L.warning("Raw GPS values not found!")
                y_interp = np.empty(df.drv_m_gps_lat.size) * np.nan
                x_interp = np.empty(df.drv_m_gps_lon.size) * np.nan

            df['y'] = y_interp
            df['x'] = x_interp
        """
        ---- Option 1: Always calculate Z from pressure ----
        It's really a matter of data provider preference and varies from one provider to another.
        That being said, typically the sci_water_pressure or m_water_pressure variables, if present
        in the raw data files, will typically have more non-NaN values than m_depth.  For example,
        all MARACOOS gliders typically have both m_depth and sci_water_pressure contained in them.
        However, m_depth is typically heavily decimated while sci_water_pressure contains a more
        complete pressure record.  So, while we transmit both m_depth and sci_water_pressure, I
        calculate depth from pressure & (interpolated) latitude and use that as my NetCDF depth
        variable. - Kerfoot
        """
        # Search for a 'pressure' column
        for p in self.PRESSURE_SENSORS:
            if p in df.columns:
                # Convert bar to dbar here
                df['pressure'] = df[p].copy() * 10
                # Calculate depth from pressure and latitude
                # Negate the results so that increasing values note increasing depths
                df['z'] = -z_from_p(df.pressure, df.y)
                break

        if 'z' not in df and 'pressure' not in df:
            # Search for a 'z' column
            for p in self.DEPTH_SENSORS:
                if p in df.columns:
                    df['z'] = df[p].copy()
                    # Calculate pressure from depth and latitude
                    # Negate the results so that increasing values note increasing depth
                    df['pressure'] = -p_from_z(df.z, df.y)
                    break
        # End Option 1
        """
        ---- Option 2: Use raw pressure/depth data that was sent across ----
        # Standardize to the 'pressure' column
        for p in self.PRESSURE_SENSORS:
            if p in df.columns:
                # Convert bar to dbar here
                df['pressure'] = df[p].copy() * 10
                break

        # Standardize to the 'z' column
        for p in self.DEPTH_SENSORS:
            if p in df.columns:
                df['z'] = df[p].copy()
                break

        # Don't calculate Z from pressure if a metered depth column exists already
        if 'pressure' in df and 'z' not in df:
            # Calculate depth from pressure and latitude
            # Negate the results so that increasing values note increasing depths
            df['z'] = -z_from_p(df.pressure, df.y)

        if 'z' in df and 'pressure' not in df:
            # Calculate pressure from depth and latitude
            # Negate the results so that increasing values note increasing depth
            df['pressure'] = -p_from_z(df.z, df.y)
        # End Option 2
        """

        rename_columns = {
            'm_water_vx': 'u_orig',
            'm_water_vy': 'v_orig',
        }

        # These need to be standardize so we can compute salinity and density!
        for vname in self.TEMPERATURE_SENSORS:
            if vname in df.columns:
                rename_columns[vname] = 'temperature'
                break
        for vname in self.CONDUCTIVITY_SENSORS:
            if vname in df.columns:
                rename_columns[vname] = 'conductivity'
                break

        # Standardize columns
        df = df.rename(columns=rename_columns)

        # Compute additional columns
        df = self.compute(df)

        return df
Example #4
0
File: yo.py Project: kerfoot/GUTILS
def assign_profiles(df, tsint=1):
    profile_df = df.copy()
    profile_df['profile'] = np.nan  # Fill profile with nans
    tmp_df = df.copy()

    if tsint is None:
        tsint = 1

    # Make 't' epochs and not a DateTimeIndex
    tmp_df['t'] = masked_epoch(tmp_df.t)
    # Set negative depth values to NaN
    tmp_df.loc[tmp_df.z <= 0, 'z'] = np.nan

    # Remove any rows where time or z is NaN
    tmp_df = tmp_df.dropna(subset=['t', 'z'], how='any')

    if len(tmp_df) < 2:
        return None

    # Create the fixed timestamp array from the min timestamp to the max timestamp
    # spaced by tsint intervals
    ts = np.arange(tmp_df.t.min(), tmp_df.t.max(), tsint)
    # Stretch estimated values for interpolation to span entire dataset
    interp_z = np.interp(ts,
                         tmp_df.t,
                         tmp_df.z,
                         left=tmp_df.z.iloc[0],
                         right=tmp_df.z.iloc[-1])

    del tmp_df

    if len(interp_z) < 2:
        return None

    filtered_z = boxcar_smooth_dataset(interp_z, max(tsint // 2, 1))
    delta_depth = calculate_delta_depth(filtered_z)

    # Find where the depth indexes (-1 and 1) flip
    inflections = np.where(np.diff(delta_depth) != 0)[0]
    # Do we have any profiles?
    if inflections.size < 1:
        return profile_df

    # Prepend a zero at the beginning start the series of profiles
    p_inds = np.insert(inflections, 0, 0)
    # Append the size of the time array to end the series of profiles
    p_inds = np.append(p_inds, ts.size - 1)
    # Zip up neighbors to get the ranges of each profile in interpolated space
    p_inds = list(zip(p_inds[0:-1], p_inds[1:]))
    # Convert the profile indexes into datetime objets
    p_inds = [(pd.to_datetime(ts[int(p0)],
                              unit='s'), pd.to_datetime(ts[int(p1)], unit='s'))
              for p0, p1 in p_inds]

    # We have the profiles in interpolated space, now associate this
    # space with the actual data using the datetimes.

    # Iterate through the profile start/stop indices
    for profile_index, (min_time, max_time) in enumerate(p_inds):

        # Get rows between the min and max time
        time_between = profile_df.t.between(min_time, max_time, inclusive=True)

        # Get indexes of the between rows since we can't assign by the range due to NaT values
        ixs = profile_df.loc[time_between].index.tolist()

        # Set the rows profile column to the profile id
        if len(ixs) > 1:
            profile_df.loc[ixs[0]:ixs[-1], 'profile'] = profile_index
        elif len(ixs) == 1:
            profile_df.loc[ixs[0], 'profile'] = profile_index
        else:
            L.debug(
                'No data rows matched the time range of this profile, Skipping.'
            )

    # Remove rows that were not assigned a profile
    # profile_df = profile_df.loc[~profile_df.profile.isnull()]

    return profile_df
Example #5
0
File: yo.py Project: zeroYXX/GUTILS
def assign_profiles(df, tsint=None):
    """Returns the start and stop timestamps for every profile indexed from the
    depth timeseries
    Parameters:
        time, depth
    Returns:
        A Nx2 array of the start and stop timestamps indexed from the yo
    Use filter_yo_extrema to remove invalid/incomplete profiles
    """

    profile_df = df.copy()
    profile_df['profile'] = np.nan  # Fill profile with nans
    tmp_df = df.copy()

    if tsint is None:
        tsint = 2

    # Make 't' epochs and not a DateTimeIndex
    tmp_df['t'] = masked_epoch(tmp_df.t)
    # Set negative depth values to NaN
    tmp_df.loc[tmp_df.z <= 0, 'z'] = np.nan

    # Remove NaN rows
    tmp_df = tmp_df.dropna(subset=['t', 'z'], how='any')

    if len(tmp_df) < 2:
        return None

    # Create the fixed timestamp array from the min timestamp to the max timestamp
    # spaced by tsint intervals
    ts = np.arange(tmp_df.t.min(), tmp_df.t.max(), tsint)
    # Stretch estimated values for interpolation to span entire dataset
    interp_z = np.interp(ts,
                         tmp_df.t,
                         tmp_df.z,
                         left=tmp_df.z.iloc[0],
                         right=tmp_df.z.iloc[-1])

    del tmp_df

    if len(interp_z) < 2:
        return None

    filtered_z = boxcar_smooth_dataset(interp_z, max(tsint // 2, 1))
    delta_depth = calculate_delta_depth(filtered_z)

    p_inds = np.empty((0, 2))
    inflections = np.where(np.diff(delta_depth) != 0)[0]
    if inflections.size < 1:
        return profile_df
    p_inds = np.append(p_inds, [[0, inflections[0]]], axis=0)

    for p in range(len(inflections) - 1):
        p_inds = np.append(p_inds, [[inflections[p], inflections[p + 1]]],
                           axis=0)
    p_inds = np.append(p_inds, [[inflections[-1], len(ts) - 1]], axis=0)

    # Start profile index
    profile_index = 0
    ts_window = tsint * 2

    # Iterate through the profile start/stop indices
    for p0, p1 in p_inds:

        min_time = pd.to_datetime(ts[int(p0)] - ts_window, unit='s')
        max_time = pd.to_datetime(ts[int(p1)] + ts_window, unit='s')

        # Get rows between the min and max time
        time_between = profile_df.t.between(min_time, max_time, inclusive=True)

        # Get indexes of the between rows since we can't assign by the range due to NaT values
        ixs = profile_df.loc[time_between].index.tolist()

        # Set the rows profile column to the profile id
        if len(ixs) > 1:
            profile_df.loc[ixs[0]:ixs[-1], 'profile'] = profile_index
        elif len(ixs) == 1:
            profile_df.loc[ixs[0], 'profile'] = profile_index
        else:
            L.debug(
                'No data rows matched the time range of this profile, Skipping.'
            )

        # Increment the profile index
        profile_index += 1

    # Remove rows that were not assigned a profile
    # profile_df = profile_df.loc[~profile_df.profile.isnull()]

    # L.info(
    #     list(zip(
    #         profile_df.t,
    #         profile_df.profile,
    #         profile_df.z,
    #     ))[0:20]
    # )
    return profile_df