def filter_no_data_at_profile_start(profile_data, threshold=1): """ Profile filter that will remove a profile if there is no science data at the beginning (defined as the first 10%) of the profile with extra emphasis on pressure from the CTD. This tries to eliminate the case where a short profile that should have no data, turns on data sampling just before the inflection point and has enough science data to pass the other filters. The rationale being that if there is no data at the start of a profile, it was not intended to be sampled. Note: a profile not removed by this filter might still be removed by another active filter. :param profile_data: :param threshold: The minimum number of minutes at the start of the profile that requires data to occur in. :return: bool value if the profile is to be removed or not. """ remove_profile = False if 'rtime' in profile_data.source_file: return remove_profile timestamps = profile_data.getdata(TIMESENSOR) # ToDo: change explicit pressure here to a PRESSURESENSOR variable pres = profile_data.getdata('sci_water_pressure') first_portion_of_dive = list(range(int(len(timestamps) / 10))) time_len = (timestamps[first_portion_of_dive][-1] - timestamps[first_portion_of_dive][0]) # use the amount of time that is greater, the first 10% of the dive, # or at least `threshold` minutes if time_len / 60. < threshold: first_portion_of_dive = np.flatnonzero( timestamps < timestamps[0] + 60 * threshold) data_indices = processing.all_sci_indices(profile_data) pressure_ii = np.flatnonzero(np.isfinite(pres)) if len(np.intersect1d(pressure_ii, first_portion_of_dive)) == 0: remove_profile = True elif len(np.intersect1d(data_indices, first_portion_of_dive)) == 0: remove_profile = True return remove_profile
def filter_datatime_lessthan(profile_data, threshold=1, data_pts_threshold=4): """Profile filter that will remove a profile if the elapsed time for the data collected in a profile is less than `threshold` minutes. Note: a profile not removed by this filter might still be removed by another active filter. :return: """ remove_profile = False timestamps = profile_data.getdata(TIMESENSOR) data_indices = processing.all_sci_indices(profile_data) if len(data_indices) < data_pts_threshold: remove_profile = True sci_time = timestamps[data_indices] minutes_of_data = cum_data_time_sum(sci_time) / 60. if minutes_of_data < threshold: remove_profile = True return remove_profile
def find_profiles_by_depth( self, depth_sensor='m_depth', tsint=2, winsize=10): """Discovery of profiles in a glider segment using depth and time. Profiles are discovered by smoothing the depth timeseries and using the derivative of depth vs time to find the inflection points to break the segment into profiles. Profiles are truncated to where science data exists; science sensors for the glider are configured in Configuration.py. Depth can be `m_depth` or CTD pressure (Default m_depth). For smoothing a filtered depth is created at regular time intervals `tsint` (default 2 secs) and boxcar filtered with window `winsize` (default 5 points). The smoothing is affected by both choice of `tsint` and `winsize`, but usually still returns similar profiles. After profiles are discovered, they should be filtered with the `filter_profiles` method of this class, which removes profiles that are not true profiles. :param depth_sensor: The Depth sensor to use for profile discovery. Should be either m_depth, sci_water_pressure, or a derivative of those 2. Default is `m_depth` :param tsint: Time interval in seconds for filtered depth. This affects filtering. Default is 2. :param winsize: Window size for boxcar smoothing filter. :return: output is a list of profile indices in self.indices """ self._indices = [] depth = self.dba.getdata(depth_sensor) time_ = self.dba.getdata('m_present_time') # Set negative depth values to NaN; using this method to avoid numpy # warnings from < when nans are in the array depth_ii = np.flatnonzero(np.isfinite(depth)) # non-nan indices neg_depths = np.flatnonzero(depth[depth_ii] <= 0) # indices to depth_ii depth[depth_ii[neg_depths]] = np.nan # Remove NaN depths and truncate to when science data begins being # recorded and ends depth_ii = np.flatnonzero(np.isfinite(depth)) sci_indices = all_sci_indices(self.dba) # from ooidac.processing if len(sci_indices) > 0: starting_index = sci_indices[0] ending_index = sci_indices[-1] else: return # no science_indices, then we don't care to finish depth_ii = depth_ii[ np.logical_and( depth_ii >= starting_index, depth_ii <= ending_index) ] # ---Create a smoothed depth timeseries for finding inflections ------# # Find start and end times first adding winsize * tsint timesteps # onto the start and end to account for filter edge effects itime_start = np.ceil(time_[depth_ii].min()) - winsize * tsint itime_end = np.floor(time_[depth_ii].max()) + (winsize + 1) * tsint itime = np.arange(itime_start, itime_end, tsint) idepth = np.interp(itime, time_[depth_ii], depth[depth_ii], left=depth[depth_ii[0]], right=depth[depth_ii[-1]]) fz = boxcar_smooth_dataset(idepth, winsize) # remove the extra points with filter edge effects fz = fz[winsize:-winsize] itime = itime[winsize:-winsize] idepth = idepth[winsize:-winsize] # Zero crossings of the time derivative of filtered depth are the # inflection points. Differential time is midway between the # filtered timestamps. # Originally, this used scipy's fsolver to locate the exact zero # crossing, but only the timestamp before the zero crossing is needed # to be the last in a profile and the timestamp after the zero # crossing to be the first in the next profile. dz_dt = np.diff(fz) / np.diff(itime) dtime = itime[:-1] + np.diff(itime) / 2 # differential time # Get the time point just after a zero crossing. The flatnonzero # statement below gets the point before a zero crossing. zero_crossings_ii = np.flatnonzero(abs(np.diff(np.sign(dz_dt)))) zc_times = dtime[zero_crossings_ii] + ( dtime[zero_crossings_ii + 1] - dtime[zero_crossings_ii]) / 2. profile_switch_times = zc_times[np.logical_and( zc_times > time_[starting_index], zc_times < time_[ending_index] )] # insert the timestamp of the first science data point at the start # and the last data point at the end. profile_switch_times = np.insert( profile_switch_times, [0, len(profile_switch_times)], [time_[starting_index], time_[ending_index]]) self.inflection_times = profile_switch_times # profile_switch_times = self.adjust_inflections(depth, time_) profile_switch_times = self.adjust_inflections(depth, time_) # use the time range to gather indices for each profile for ii in range(len(profile_switch_times)-1): pstart = profile_switch_times[ii] pend = profile_switch_times[ii+1] profile_ii = np.flatnonzero( np.logical_and( time_ >= pstart, time_ <= pend)) # inclusive since before the inflection if len(profile_ii) == 0: continue self._indices.append(profile_ii)
def orig_find_profiles_by_depth(self, tsint=2, filter_winsize=10): """Returns the start and stop timestamps for every profile indexed from the depth timeseries Parameters: time, depth Returns: A Nx2 array of the start and stop timestamps indexed from the yo Use filter_yo_extrema to remove invalid/incomplete profiles """ # Create list of profile indices - pearce / kerfoot method self._indices = [] if 'llat_time' in self.dba.sensor_names: timestamps = self.dba['llat_time'] else: timestamps = self.dba['m_present_time'] timestamps = timestamps['data'] if 'm_depth' not in self.dba.sensor_names: logging.warning( ('m_depth not found in dba {:s} for profiles, ' 'trying pressure/depth instead').format(self.dba.source_file) ) if 'llat_depth' not in self.dba.sensor_names: logging.warning('no depth source found in dba {:s}'.format( self.dba.source_file) ) return depth = self.dba['llat_depth']['data'] else: depth = self.dba['m_depth']['data'] # validate_glider_args(timestamps, depth) # Set negative depth values to NaN; using this method to avoid numpy # warnings from < when nans are in the array depth_ii = np.flatnonzero(np.isfinite(depth)) # non-nan indices neg_depths = np.flatnonzero(depth[depth_ii] <= 0) # indices to depth_ii depth[depth_ii[neg_depths]] = np.nan # Remove NaN depths and truncate to when science data begins being # recorded and ends depth_ii = np.flatnonzero(np.isfinite(depth)) # sci_indices = all_sci_indices(self.dba) # from ooidac.processing # if len(sci_indices) > 0: # starting_index = sci_indices[0] # ending_index = sci_indices[-1] # else: # starting_index = 0 # ending_index = len(timestamps) - 1 # # depth_ii = depth_ii[ # np.logical_and( # depth_ii > starting_index, # depth_ii < ending_index) # ] depth = depth[depth_ii] ts = timestamps[depth_ii] if len(depth) < 2: logger.debug('Skipping segment that contains < 2 rows of depth') return # Create the fixed timestamp array from the min timestamp to the max # timestamp spaced by tsint intervals min_ts = ts.min() max_ts = ts.max() if max_ts - min_ts < tsint: logger.warning('Not enough timestamps for depth interpolation ' 'for profile discovery') return interp_ts = np.arange(min_ts, max_ts, tsint) # Stretch estimated values for interpolation to span entire dataset interp_z = np.interp( interp_ts, ts, depth, left=depth[0], right=depth[-1] ) filtered_z = boxcar_smooth_dataset(interp_z, filter_winsize) # truncate the filtered depths by science start and end times sci_indices = all_sci_indices(self.dba) # from ooidac.processing if len(sci_indices) > 0: sci_start_time = timestamps[sci_indices[0]] sci_end_time = timestamps[sci_indices[-1]] else: sci_start_time = timestamps[0] sci_end_time = timestamps[-1] start_minimizer = abs(interp_ts - sci_start_time) end_minimizer = abs(interp_ts - sci_end_time) sci_start_ii = np.flatnonzero( start_minimizer == np.min(start_minimizer)) if len(sci_start_ii) == 2: sci_start_ii = sci_start_ii[1] sci_end_ii = np.flatnonzero(end_minimizer == np.min(end_minimizer)) if len(sci_end_ii) == 2: sci_end_ii = sci_end_ii[1] sci_truncate_indices = slice(int(sci_start_ii), int(sci_end_ii + 1)) interp_ts = interp_ts[sci_truncate_indices] filtered_z = filtered_z[sci_truncate_indices] delta_depth = calculate_delta_depth(filtered_z) inflections = np.where(np.diff(delta_depth) != 0)[0] + 1 if not inflections.any(): return # inflection_times = ddts[inflections] inflection_times = interp_ts[inflections] # for some reason this # works better self.inflection_times = inflection_times # get the first profile indices manually so that it gets all of the # data up to the first inflection including the inflection profile_i = np.flatnonzero(timestamps <= inflection_times[0]) self._indices.append(profile_i) # then iterate over the inflection times and get each profile indices # excluding the preceding inflection and including the ending inflection for ii in range(1, len(inflection_times)): # Find all rows in the original yo that fall between the # interpolated timestamps profile_i = np.flatnonzero( np.logical_and( timestamps > inflection_times[ii - 1], timestamps <= inflection_times[ii] ) ) if len(profile_i) == 0: continue self._indices.append(profile_i) # lastly get the last profile manually again from the last inflection # time to the end of the dataset profile_i = np.flatnonzero(timestamps > inflection_times[-1]) self._indices.append(profile_i)