def calculate_delta_depth(interp_data): delta_depth = np.diff(interp_data) delta_depth = binarize_diff(delta_depth) delta_depth = boxcar_smooth_dataset(delta_depth, 2) return delta_depth
def calculate_delta_depth(interp_data): """ Figure out when the interpolated Z data turns a corner """ delta_depth = np.diff(interp_data) delta_depth[delta_depth <= 0] = -1 delta_depth[delta_depth >= 0] = 1 delta_depth = boxcar_smooth_dataset(delta_depth, 2) delta_depth[delta_depth <= 0] = -1 delta_depth[delta_depth >= 0] = 1 return delta_depth
def find_yo_extrema(timestamps, depth): """Returns timestamps, row indices, and depth and time bounds corresponding to glider yo profiles Returns the timestamps and row indices corresponding the peaks and valleys (profiles start/stop) found in the time-depth array, tz. All indices are returned. Parameters: time, depth Returns: A Nx3 array of timestamps, depth, and profile ids Use filter_yo_extrema to remove invalid/incomplete profiles """ validate_glider_args(timestamps, depth) est_data = np.column_stack((timestamps, depth)) # Set negative depth values to NaN est_data[est_data[:, DATA_DIM] <= 0] = float('nan') est_data = clean_dataset(est_data) # Stretch estimated values for interpolation to span entire dataset interp_data = np.interp(timestamps, est_data[:, 0], est_data[:, 1], left=est_data[0, 1], right=est_data[-1, 1]) interp_data = boxcar_smooth_dataset(interp_data, 5) delta_depth = calculate_delta_depth(interp_data) interp_indices = np.argwhere(delta_depth == 0).flatten() profiled_dataset = np.zeros((len(timestamps), 3)) profiled_dataset[:, TIME_DIM] = timestamps profiled_dataset[:, DATA_DIM] = interp_data start_index = 0 for profile_id, end_index in enumerate(interp_indices): profiled_dataset[start_index:end_index, 2] = profile_id start_index = end_index if start_index < len(profiled_dataset): profiled_dataset[start_index:, 2] = len(interp_indices) - 1 return profiled_dataset
def find_yo_extrema(timestamps, depth, tsint=10): """Returns the start and stop timestamps for every profile indexed from the depth timeseries Parameters: time, depth Returns: A Nx2 array of the start and stop timestamps indexed from the yo Use filter_yo_extrema to remove invalid/incomplete profiles """ validate_glider_args(timestamps, depth) est_data = np.column_stack((timestamps, depth)) # Set negative depth values to NaN est_data[est_data[:, DATA_DIM] <= 0] = float('nan') est_data = clean_dataset(est_data) # Create the fixed timestamp array from the min timestamp to the max timestamp # spaced by tsint intervals ts = np.arange(est_data[:, 0].min(), est_data[:, 0].max(), tsint) # Stretch estimated values for interpolation to span entire dataset interp_z = np.interp(ts, est_data[:, 0], est_data[:, 1], left=est_data[0, 1], right=est_data[-1, 1]) filtered_z = boxcar_smooth_dataset(interp_z, tsint / 2) delta_depth = calculate_delta_depth(filtered_z) #interp_indices = np.argwhere(delta_depth == 0).flatten() p_inds = np.empty((0, 2)) inflections = np.where(np.diff(delta_depth) != 0)[0] p_inds = np.append(p_inds, [[0, inflections[0]]], axis=0) for p in range(len(inflections) - 1): p_inds = np.append(p_inds, [[inflections[p], inflections[p + 1]]], axis=0) p_inds = np.append(p_inds, [[inflections[-1], len(ts) - 1]], axis=0) #profile_timestamps = np.empty((0,2)) ts_window = tsint * 2 # Create orig GUTILS return value - lindemuth method # Initialize an nx3 numpy array of nans profiled_dataset = np.full((len(timestamps), 3), np.nan) # Replace TIME_DIM column with the original timestamps profiled_dataset[:, TIME_DIM] = timestamps # Replace DATA_DIM column with the original depths profiled_dataset[:, DATA_DIM] = depth # Create Nx2 numpy array of profile start/stop times - kerfoot method profile_times = np.full((p_inds.shape[0], 2), np.nan) # Start profile index profile_ind = 0 # Iterate through the profile start/stop indices for p in p_inds: # Profile start row p0 = int(p[0]) # Profile end row p1 = int(p[1]) # Find all rows in the original yo that fall between the interpolated timestamps profile_i = np.flatnonzero( np.logical_and( profiled_dataset[:, TIME_DIM] >= ts[p0] - ts_window, profiled_dataset[:, TIME_DIM] <= ts[p1] + ts_window)) # Slice out the profile pro = profiled_dataset[profile_i] # Find the row index corresponding to the minimum depth try: min_i = np.nanargmin(pro[:, 1]) except ValueError as e: logger.warning(e) continue # Find the row index corresponding to the maximum depth try: max_i = np.nanargmax(pro[:, 1]) except ValueError as e: logger.warning(e) continue # Sort the min/max indices in ascending order sorted_i = np.sort([min_i, max_i]) # Set the profile index profiled_dataset[profile_i[sorted_i[0]]:profile_i[sorted_i[1]], 2] = profile_ind # kerfoot method profile_times[profile_ind, :] = [ timestamps[profile_i[sorted_i[0]]], timestamps[profile_i[sorted_i[1]]] ] # Increment the profile index profile_ind += 1 #profile_timestamps = np.append(profile_timestamps, [[est_data[profile_i[0][0],0], est_data[profile_i[0][-1],0]]], axis=0) #return profiled_dataset return profile_times
def assign_profiles(df, tsint=1): profile_df = df.copy() profile_df['profile'] = np.nan # Fill profile with nans tmp_df = df.copy() if tsint is None: tsint = 1 # Make 't' epochs and not a DateTimeIndex tmp_df['t'] = masked_epoch(tmp_df.t) # Set negative depth values to NaN tmp_df.loc[tmp_df.z <= 0, 'z'] = np.nan # Remove any rows where time or z is NaN tmp_df = tmp_df.dropna(subset=['t', 'z'], how='any') if len(tmp_df) < 2: return None # Create the fixed timestamp array from the min timestamp to the max timestamp # spaced by tsint intervals ts = np.arange(tmp_df.t.min(), tmp_df.t.max(), tsint) # Stretch estimated values for interpolation to span entire dataset interp_z = np.interp(ts, tmp_df.t, tmp_df.z, left=tmp_df.z.iloc[0], right=tmp_df.z.iloc[-1]) del tmp_df if len(interp_z) < 2: return None filtered_z = boxcar_smooth_dataset(interp_z, max(tsint // 2, 1)) delta_depth = calculate_delta_depth(filtered_z) # Find where the depth indexes (-1 and 1) flip inflections = np.where(np.diff(delta_depth) != 0)[0] # Do we have any profiles? if inflections.size < 1: return profile_df # Prepend a zero at the beginning start the series of profiles p_inds = np.insert(inflections, 0, 0) # Append the size of the time array to end the series of profiles p_inds = np.append(p_inds, ts.size - 1) # Zip up neighbors to get the ranges of each profile in interpolated space p_inds = list(zip(p_inds[0:-1], p_inds[1:])) # Convert the profile indexes into datetime objets p_inds = [(pd.to_datetime(ts[int(p0)], unit='s'), pd.to_datetime(ts[int(p1)], unit='s')) for p0, p1 in p_inds] # We have the profiles in interpolated space, now associate this # space with the actual data using the datetimes. # Iterate through the profile start/stop indices for profile_index, (min_time, max_time) in enumerate(p_inds): # Get rows between the min and max time time_between = profile_df.t.between(min_time, max_time, inclusive=True) # Get indexes of the between rows since we can't assign by the range due to NaT values ixs = profile_df.loc[time_between].index.tolist() # Set the rows profile column to the profile id if len(ixs) > 1: profile_df.loc[ixs[0]:ixs[-1], 'profile'] = profile_index elif len(ixs) == 1: profile_df.loc[ixs[0], 'profile'] = profile_index else: L.debug( 'No data rows matched the time range of this profile, Skipping.' ) # Remove rows that were not assigned a profile # profile_df = profile_df.loc[~profile_df.profile.isnull()] return profile_df
def assign_profiles(df, tsint=None): """Returns the start and stop timestamps for every profile indexed from the depth timeseries Parameters: time, depth Returns: A Nx2 array of the start and stop timestamps indexed from the yo Use filter_yo_extrema to remove invalid/incomplete profiles """ profile_df = df.copy() profile_df['profile'] = np.nan # Fill profile with nans tmp_df = df.copy() if tsint is None: tsint = 2 # Make 't' epochs and not a DateTimeIndex tmp_df['t'] = masked_epoch(tmp_df.t) # Set negative depth values to NaN tmp_df.loc[tmp_df.z <= 0, 'z'] = np.nan # Remove NaN rows tmp_df = tmp_df.dropna(subset=['t', 'z'], how='any') if len(tmp_df) < 2: return None # Create the fixed timestamp array from the min timestamp to the max timestamp # spaced by tsint intervals ts = np.arange(tmp_df.t.min(), tmp_df.t.max(), tsint) # Stretch estimated values for interpolation to span entire dataset interp_z = np.interp(ts, tmp_df.t, tmp_df.z, left=tmp_df.z.iloc[0], right=tmp_df.z.iloc[-1]) del tmp_df if len(interp_z) < 2: return None filtered_z = boxcar_smooth_dataset(interp_z, max(tsint // 2, 1)) delta_depth = calculate_delta_depth(filtered_z) p_inds = np.empty((0, 2)) inflections = np.where(np.diff(delta_depth) != 0)[0] if inflections.size < 1: return profile_df p_inds = np.append(p_inds, [[0, inflections[0]]], axis=0) for p in range(len(inflections) - 1): p_inds = np.append(p_inds, [[inflections[p], inflections[p + 1]]], axis=0) p_inds = np.append(p_inds, [[inflections[-1], len(ts) - 1]], axis=0) # Start profile index profile_index = 0 ts_window = tsint * 2 # Iterate through the profile start/stop indices for p0, p1 in p_inds: min_time = pd.to_datetime(ts[int(p0)] - ts_window, unit='s') max_time = pd.to_datetime(ts[int(p1)] + ts_window, unit='s') # Get rows between the min and max time time_between = profile_df.t.between(min_time, max_time, inclusive=True) # Get indexes of the between rows since we can't assign by the range due to NaT values ixs = profile_df.loc[time_between].index.tolist() # Set the rows profile column to the profile id if len(ixs) > 1: profile_df.loc[ixs[0]:ixs[-1], 'profile'] = profile_index elif len(ixs) == 1: profile_df.loc[ixs[0], 'profile'] = profile_index else: L.debug( 'No data rows matched the time range of this profile, Skipping.' ) # Increment the profile index profile_index += 1 # Remove rows that were not assigned a profile # profile_df = profile_df.loc[~profile_df.profile.isnull()] # L.info( # list(zip( # profile_df.t, # profile_df.profile, # profile_df.z, # ))[0:20] # ) return profile_df