def return_to_wet_date(wet_season_filter_data, broad_filter_data, wet_threshold_perc, peak_detect_perc, peak_sensitivity_wet, column_number): search_index = None max_wet_peak_mag = max(broad_filter_data[20:]) max_wet_peak_index = find_index(broad_filter_data, max_wet_peak_mag) min_wet_peak_mag = min(broad_filter_data[:max_wet_peak_index]) maxarray_wet, _ = peakdet( wet_season_filter_data, peak_sensitivity_wet) """Loop through peaks to find starting point of search""" for index, value in enumerate(maxarray_wet): if len(maxarray_wet) == 1: if maxarray_wet[0][0] == 0: search_index = max_wet_peak_index break else: search_index = int(maxarray_wet[0][0]) break else: if (maxarray_wet[index][1]-min_wet_peak_mag)/(max_wet_peak_mag-min_wet_peak_mag) > peak_detect_perc: search_index = int(maxarray_wet[index][0]) break """Loop backwards from max flow index to beginning, to search for wet season""" if not search_index: return None for index, value in enumerate(reversed(wet_season_filter_data[:search_index])): if index == len(wet_season_filter_data[:search_index] - 1): return None elif (value - min_wet_peak_mag) / (max_wet_peak_mag - min_wet_peak_mag) < wet_threshold_perc: """If value percentage falls below wet_threshold_perc""" return_date = search_index - index return return_date
def return_to_wet_date(flow_data, wet_season_filter_data, broad_filter_data, slope_detection_data, wet_threshold_perc, peak_detect_perc, peak_sensitivity_wet, column_number, slope_sensitivity): search_index = None max_wet_peak_mag = max(broad_filter_data[20:]) max_wet_peak_index = find_index(broad_filter_data, max_wet_peak_mag) if broad_filter_data[:max_wet_peak_index].size == 0: return None min_wet_peak_mag = min(broad_filter_data[:max_wet_peak_index]) maxarray_wet, _ = peakdet(wet_season_filter_data, peak_sensitivity_wet) """Get the derivative of smoothed data for rate of change requirement""" x_axis = list(range(len(slope_detection_data))) spl = ip.UnivariateSpline(x_axis, slope_detection_data, k=3, s=3) spl_first = spl.derivative(n=1) """Loop through peaks to find starting point of search""" for index, value in enumerate(maxarray_wet): if len(maxarray_wet) == 1: if maxarray_wet[0][0] == 0: search_index = max_wet_peak_index break else: search_index = int(maxarray_wet[0][0]) break else: if (maxarray_wet[index][1] - min_wet_peak_mag) / ( max_wet_peak_mag - min_wet_peak_mag) > peak_detect_perc: search_index = int(maxarray_wet[index][0]) break """Loop backwards from max flow index to beginning, to search for wet season""" if not search_index: return None for index, value in enumerate( reversed(wet_season_filter_data[:search_index])): if index == len(wet_season_filter_data[:search_index] - 1): return None elif (value - min_wet_peak_mag) / ( max_wet_peak_mag - min_wet_peak_mag) < wet_threshold_perc and abs( spl_first(search_index - index)) < max_wet_peak_mag / slope_sensitivity: """If value percentage falls below wet_threshold_perc""" return_date = search_index - index # plt.figure() # plt.plot(flow_data, '-', slope_detection_data, '--') # if return_date is not None: # plt.axvline(return_date, color='blue') # plt.axhline((max_wet_peak_mag-min_wet_peak_mag)*.2, color='orange') # plt.text(364,max(flow_data),str(abs(spl_first(search_index - index)))) # plt.savefig('post_processedFiles/Boxplots/{}.png'.format(column_number)) return return_date
def return_to_wet_date(wet_season_filter_data, broad_filter_data, wet_threshold_perc, peak_detect_perc, peak_sensitivity_wet, column_number): search_index = None max_wet_peak_mag = max(broad_filter_data[20:]) max_wet_peak_index = find_index(broad_filter_data, max_wet_peak_mag) if broad_filter_data[:max_wet_peak_index].size == 0: return None min_wet_peak_mag = min(broad_filter_data[:max_wet_peak_index]) maxarray_wet, _ = peakdet( wet_season_filter_data, peak_sensitivity_wet) # # plt.figure() # plt.plot(wet_season_filter_data, '-', broad_filter_data, ':') # for data in maxarray_wet: # plt.plot(data[0], data[1], '^') # plt.savefig('post_processedFiles/Boxplots/{}.png'.format(column_number)) """Loop through peaks to find starting point of search""" for index, value in enumerate(maxarray_wet): if len(maxarray_wet) == 1: if maxarray_wet[0][0] == 0: search_index = max_wet_peak_index break else: search_index = int(maxarray_wet[0][0]) break else: if (maxarray_wet[index][1]-min_wet_peak_mag)/(max_wet_peak_mag-min_wet_peak_mag) > peak_detect_perc: search_index = int(maxarray_wet[index][0]) break """Loop backwards from max flow index to beginning, to search for wet season""" if not search_index: return None for index, value in enumerate(reversed(wet_season_filter_data[:search_index])): if index == len(wet_season_filter_data[:search_index] - 1): return None elif (value - min_wet_peak_mag) / (max_wet_peak_mag - min_wet_peak_mag) < wet_threshold_perc: """If value percentage falls below wet_threshold_perc""" return_date = search_index - index return return_date
def calc_fall_flush_timings_durations(flow_matrix, summer_timings): max_zero_allowed_per_year = fall_params['max_zero_allowed_per_year'] max_nan_allowed_per_year = fall_params['max_nan_allowed_per_year'] min_flow_rate = fall_params['min_flow_rate'] sigma = fall_params['sigma'] # Smaller filter to find fall flush peak wet_sigma = fall_params[ 'wet_sigma'] # Larger filter to find wet season peak peak_sensitivity = fall_params['peak_sensitivity'] # smaller is more peak max_flush_duration = fall_params[ 'max_flush_duration'] # Maximum duration from start to end, for fall flush peak wet_threshold_perc = fall_params[ 'wet_threshold_perc'] # Return to wet season flow must be certain percentage of that year's max flow flush_threshold_perc = fall_params[ 'flush_threshold_perc'] # Size of flush peak, from rising limb to top of peak, has great enough change min_flush_threshold = fall_params['min_flush_threshold'] date_cutoff = fall_params[ 'date_cutoff'] # Latest accepted date for fall flush, in Julian Date counting from Oct 1st = 0. (i.e. Dec 15th = 75) start_dates = [] wet_dates = [] durations = [] mags = [] for column_number, column_flow in enumerate(flow_matrix[0]): start_dates.append(None) wet_dates.append(None) durations.append(None) mags.append(None) """Check to see if water year has more than allowed nan or zeros""" if np.isnan(flow_matrix[:, column_number]).sum( ) > max_nan_allowed_per_year or np.count_nonzero( flow_matrix[:, column_number] == 0) > max_zero_allowed_per_year or max( flow_matrix[:, column_number]) < min_flow_rate: continue """Get flow data""" flow_data = flow_matrix[:, column_number] x_axis = list(range(len(flow_data))) """Interpolate between None values""" flow_data = replace_nan(flow_data) """Return to Wet Season""" wet_filter_data = gaussian_filter1d(flow_data, wet_sigma) return_date = return_to_wet_date(wet_filter_data, wet_threshold_perc) wet_dates[-1] = return_date + 10 """Filter noise data with small sigma to find fall flush hump""" filter_data = gaussian_filter1d(flow_data, sigma) """Fit spline""" x_axis = list(range(len(filter_data))) spl = ip.UnivariateSpline(x_axis, filter_data, k=3, s=3) """Find the peaks and valleys of the filtered data""" mean_flow = np.nanmean(filter_data) maxarray, minarray = peakdet(spl(x_axis), mean_flow * peak_sensitivity) """Find max and min of filtered flow data""" max_flow = max(filter_data[20:]) max_flow_index = find_index(filter_data[20:], max_flow) + 20 min_flow = min(wet_filter_data[:max_flow_index]) """If could not find any max and find""" if not list(maxarray) or not list( minarray) or minarray[0][0] > max_flow_index: continue """Get flow magnitude threshold from previous summer's baseflow""" baseflows = [] if column_number == 0: wet_date = wet_dates[0] baseflow = list(flow_matrix[:wet_date, column_number]) bs_mean = np.mean(baseflow) bs_med = np.nanpercentile(baseflow, 50) else: summer_date = summer_timings[column_number - 1] if wet_dates[column_number] > 20: wet_date = wet_dates[column_number] - 20 baseflow = list( flow_matrix[summer_date:, column_number - 1]) + list( flow_matrix[:wet_date, column_number]) bs_mean = np.mean(baseflow) bs_med = np.nanpercentile(baseflow, 50) """Get fall flush peak""" counter = 0 half_duration = int( max_flush_duration / 2) # Only test duration for first half of fall flush peak if bs_med > 25: min_flush_magnitude = bs_med * 1.5 # if median baseflow is large (>25), magnitude threshold is 50% above median baseflow of previous summer else: min_flush_magnitude = bs_med * 2 # otherwise magnitude threshold is 100% above median baseflow of previous summer if min_flush_magnitude < min_flush_threshold: min_flush_magnitude = min_flush_threshold for flow_index in maxarray: if counter == 0: if flow_index[0] < half_duration and flow_index[ 0] != 0 and flow_index[1] > wet_filter_data[int( flow_index[0] )] and flow_index[1] > min_flush_magnitude: """if index found is before the half duration allowed""" start_dates[-1] = int(flow_index[0]) mags[-1] = flow_index[1] break elif bool( (flow_index[1] - spl(maxarray[counter][0] - half_duration)) / flow_index[1] > flush_threshold_perc or minarray[counter][0] - maxarray[counter][0] < half_duration) and flow_index[1] > wet_filter_data[int( flow_index[0] )] and flow_index[1] > min_flush_magnitude: """If peak and valley is separted by half duration, or half duration to the left is less than 30% of its value""" start_dates[-1] = int(flow_index[0]) mags[-1] = flow_index[1] break elif counter == len(minarray): start_dates[-1] = None mags[-1] = None break elif bool( minarray[counter][0] - maxarray[counter][0] < half_duration or maxarray[counter][0] - minarray[counter - 1][0] < half_duration) and bool( flow_index[1] > wet_filter_data[int(flow_index[0])] and flow_index[1] > min_flush_magnitude and flow_index[0] <= date_cutoff): """valley and peak are distanced by less than half dur from either side""" start_dates[-1] = int(flow_index[0]) mags[-1] = flow_index[1] break elif (spl(flow_index[0] - half_duration) - min_flow ) / (flow_index[1] - min_flow) < flush_threshold_perc and ( spl(flow_index[0] + half_duration) - min_flow) / ( flow_index[1] - min_flow) < flush_threshold_perc and flow_index[ 1] > wet_filter_data[int( flow_index[0])] and flow_index[ 1] > min_flush_magnitude and flow_index[ 0] <= date_cutoff: """both side of flow value at the peak + half duration index fall below flush_threshold_perc""" start_dates[-1] = int(flow_index[0]) mags[-1] = flow_index[1] break counter = counter + 1 """Check to see if last start_date falls behind the max_allowed_date""" if bool(start_dates[-1] is None or start_dates[-1] > wet_dates[-1]) and wet_dates[-1]: start_dates[-1] = None mags[-1] = None """Get duration of each fall flush""" current_duration, left, right = calc_fall_flush_durations_2( filter_data, start_dates[-1]) durations[-1] = current_duration _plotter(x_axis, flow_data, filter_data, wet_filter_data, start_dates, wet_dates, column_number, left, right, maxarray, minarray, min_flush_magnitude) return start_dates, mags, wet_dates, durations
def calc_fall_flush_durations_2(filter_data, date): """Left side sharp""" der_percent_threshold_left = 50 # Slope of rising limb (i.e. derivative) must be "sharp" flow_percent_threshold_left = 80 """Right side mellow""" der_percent_threshold_right = 30 # Slope of falling limb (i.e. derivative) has lower requirement to be part of flush duration flow_percent_threshold_right = 80 duration = None left = 0 right = 0 if date or date == 0: date = int(date) left_maxarray, left_minarray = peakdet(filter_data[:date], 0.01) right_maxarray, right_minarray = peakdet(filter_data[date:], 0.01) if not list(left_minarray): left = 0 else: left = int(left_minarray[-1][0]) if not list(right_minarray): right = 0 else: right = int(date - 2 + right_minarray[0][0]) if date - left > 10: """create spline, and find derivative""" x_axis_left = list(range(len(filter_data[left:date]))) spl_left = ip.UnivariateSpline(x_axis_left, filter_data[left:date], k=3, s=3) spl_first_left = spl_left.derivative(1) """check if derivative value falls below certain threshold""" spl_first_left_median = np.nanpercentile( spl_first_left(x_axis_left), der_percent_threshold_left) """check if actual value falls below threshold, avoiding the rounded peak""" median_left = np.nanpercentile(list(set(filter_data[left:date])), flow_percent_threshold_left) for index_left, der in enumerate( reversed(spl_first_left(x_axis_left))): # print(der < spl_first_left_median, filter_data[date - index_left] < median_left) if der < spl_first_left_median and filter_data[ date - index_left] < median_left: left = date - index_left break if right - date > 10: x_axis_right = list(range(len(filter_data[date:right]))) spl_right = ip.UnivariateSpline(x_axis_right, filter_data[date:right], k=3, s=3) spl_first_right = spl_right.derivative(1) spl_first_right_median = abs( np.nanpercentile(spl_first_right(x_axis_right), der_percent_threshold_right)) median_right = np.nanpercentile(list(set(filter_data[date:right])), flow_percent_threshold_right) for index_right, der in enumerate(spl_first_right(x_axis_right)): # print(date+index_right, der < spl_first_right_median, filter_data[date + index_right] < median_right) if abs(der) < spl_first_right_median and filter_data[ date + index_right] < median_right: right = date + index_right break if left: duration = int(date - left) elif not left and right: duration = int(right - date) else: duration = 0 return duration, left, right
def calc_fall_flush_timings_durations(flow_matrix, summer_timings, class_number, fall_params=def_fall_params): params = set_user_params(fall_params, def_fall_params) max_zero_allowed_per_year, max_nan_allowed_per_year, min_flow_rate, sigma, broad_sigma, wet_season_sigma, peak_sensitivity, peak_sensitivity_wet, max_flush_duration, min_flush_percentage, wet_threshold_perc, peak_detect_perc, flush_threshold_perc, min_flush_threshold, date_cutoff, slope_sensitivity = params.values() start_dates = [] wet_dates = [] durations = [] mags = [] for column_number, _ in enumerate(flow_matrix[0]): start_dates.append(None) wet_dates.append(None) durations.append(None) mags.append(None) """Check to see if water year has more than allowed nan or zeros""" if np.isnan(flow_matrix[:, column_number]).sum() > max_nan_allowed_per_year or np.count_nonzero(flow_matrix[:, column_number] == 0) > max_zero_allowed_per_year or max(flow_matrix[:, column_number]) < min_flow_rate: continue """Get flow data""" flow_data = flow_matrix[:, column_number] x_axis = list(range(len(flow_data))) """Interpolate between None values""" flow_data = replace_nan(flow_data) """Return to Wet Season""" if class_number == 3 or class_number == 4 or class_number == 5 or class_number == 6 or class_number == 7 or class_number == 8: wet_season_filter_data = gaussian_filter1d(flow_data, 6) else: wet_season_filter_data = gaussian_filter1d(flow_data, wet_season_sigma) broad_filter_data = gaussian_filter1d(flow_data, broad_sigma) if class_number == 1 or class_number == 2 or class_number == 9: slope_detection_data = gaussian_filter1d(flow_data, 7) elif class_number == 3 or class_number == 4 or class_number == 5 or class_number == 6 or class_number == 7 or class_number == 8: slope_detection_data = gaussian_filter1d(flow_data, 1) else: slope_detection_data = gaussian_filter1d(flow_data, 4) return_date = return_to_wet_date(flow_data, wet_season_filter_data, broad_filter_data, slope_detection_data, wet_threshold_perc, peak_detect_perc, peak_sensitivity_wet, column_number, slope_sensitivity) if return_date: wet_dates[-1] = return_date broad_filter_data = gaussian_filter1d(flow_data, broad_sigma) """Filter noise data with small sigma to find fall flush hump""" filter_data = gaussian_filter1d(flow_data, sigma) """Fit spline""" x_axis = list(range(len(filter_data))) spl = ip.UnivariateSpline(x_axis, filter_data, k=3, s=3) """Find the peaks and valleys of the filtered data""" mean_flow = np.nanmean(filter_data) maxarray, minarray = peakdet(spl(x_axis), mean_flow * peak_sensitivity) """Find max and min of filtered flow data""" max_flow = max(filter_data[20:]) max_flow_index = find_index(filter_data[20:], max_flow) + 20 min_flow = min(broad_filter_data[:max_flow_index]) """If could not find any max and find""" if not list(maxarray) or not list(minarray) or minarray[0][0] > max_flow_index: continue """Get flow magnitude threshold from previous summer's baseflow""" if column_number == 0: wet_date = wet_dates[0] baseflow = list(flow_matrix[:wet_date, column_number]) # bs_mean = np.mean(baseflow) bs_med = np.nanpercentile(baseflow, 50) else: summer_date = summer_timings[column_number - 1] if wet_dates[column_number]: if wet_dates[column_number] > 20: wet_date = wet_dates[column_number] - 20 else: wet_date = wet_dates[column_number] baseflow = list(flow_matrix[summer_date:, column_number - 1]) + list(flow_matrix[:wet_date, column_number]) # bs_mean = np.mean(baseflow) else: baseflow = list(flow_matrix[summer_date:, column_number - 1]) bs_med = np.nanpercentile(baseflow, 50) """Get fall flush peak""" counter = 0 # Only test duration for first half of fall flush peak half_duration = int(max_flush_duration/2) if bs_med > 25: # if median baseflow is large (>25), magnitude threshold is 50% above median baseflow of previous summer min_flush_magnitude = bs_med * 1.5 else: # otherwise magnitude threshold is 100% above median baseflow of previous summer min_flush_magnitude = bs_med * 2 if min_flush_magnitude < min_flush_threshold: min_flush_magnitude = min_flush_threshold for flow_index in maxarray: if counter == 0: if flow_index[0] < half_duration and flow_index[0] != 0 and flow_index[1] > broad_filter_data[int(flow_index[0])] and flow_index[1] > min_flush_magnitude and flow_index[0] <= date_cutoff: """if index found is before the half duration allowed""" start_dates[-1] = int(flow_index[0]) mags[-1] = flow_index[1] break elif bool((flow_index[1] - spl(maxarray[counter][0] - half_duration)) / flow_index[1] > flush_threshold_perc or minarray[counter][0] - maxarray[counter][0] < half_duration) and flow_index[1] > broad_filter_data[int(flow_index[0])] and flow_index[1] > min_flush_magnitude and flow_index[0] <= date_cutoff: """If peak and valley is separted by half duration, or half duration to the left is less than 30% of its value""" start_dates[-1] = int(flow_index[0]) mags[-1] = flow_index[1] break elif counter == len(minarray): start_dates[-1] = None mags[-1] = None break elif bool(minarray[counter][0] - maxarray[counter][0] < half_duration or maxarray[counter][0] - minarray[counter-1][0] < half_duration) and bool(flow_index[1] > broad_filter_data[int(flow_index[0])] and flow_index[1] > min_flush_magnitude and flow_index[0] <= date_cutoff): """valley and peak are distanced by less than half dur from either side""" start_dates[-1] = int(flow_index[0]) mags[-1] = flow_index[1] break elif (spl(flow_index[0] - half_duration) - min_flow) / (flow_index[1] - min_flow) < flush_threshold_perc and (spl(flow_index[0] + half_duration) - min_flow) / (flow_index[1] - min_flow) < flush_threshold_perc and flow_index[1] > broad_filter_data[int(flow_index[0])] and flow_index[1] > min_flush_magnitude and flow_index[0] <= date_cutoff: """both side of flow value at the peak + half duration index fall below flush_threshold_perc""" start_dates[-1] = int(flow_index[0]) mags[-1] = flow_index[1] break counter = counter + 1 """Check to see if last start_date falls behind the max_allowed_date""" if wet_dates[-1]: if bool(start_dates[-1] is None or start_dates[-1] > wet_dates[-1]) and wet_dates[-1]: start_dates[-1] = None mags[-1] = None """Get duration of each fall flush""" current_duration, left, right = calc_fall_flush_durations_2( filter_data, start_dates[-1]) durations[-1] = current_duration # _plotter(x_axis, flow_data, filter_data, broad_filter_data, start_dates, wet_dates, column_number, left, right, maxarray, minarray, min_flush_magnitude, slope_detection_data) return start_dates, mags, wet_dates, durations
def calc_start_of_summer(matrix): """Set adjustable parameters for start of summer date detection""" max_zero_allowed_per_year = summer_params['max_zero_allowed_per_year'] max_nan_allowed_per_year = summer_params['max_nan_allowed_per_year'] sigma = summer_params[ 'sigma'] # determines amount of smoothing for summer timing detection sensitivity = summer_params[ 'sensitivity'] # increased sensitivity returns smaller threshold for derivative peak_sensitivity = summer_params[ 'peak_sensitivity'] # identifies last major peak after which to search for start date max_peak_flow_date = summer_params[ 'max_peak_flow_date'] # max search date for the peak flow date min_summer_flow_percent = summer_params[ 'min_summer_flow_percent'] # require that summer start is below this flow threshold start_dates = [] for column_number, flow_data in enumerate(matrix[0]): start_dates.append(None) """Check if data has too many zeros or NaN, and if so skip to next water year""" if np.isnan(matrix[:, column_number]).sum( ) > max_nan_allowed_per_year or np.count_nonzero( matrix[:, column_number] == 0) > max_zero_allowed_per_year: continue """Append each column with 30 more days from next column, except the last column""" if column_number != len(matrix[0]) - 1: flow_data = list(matrix[:, column_number]) + list( matrix[:100, column_number + 1]) else: flow_data = matrix[:, column_number] """Replace any NaNs with previous day's flow""" flow_data = replace_nan(flow_data) """Smooth out the timeseries""" smooth_data = gaussian_filter1d(flow_data, 4) smooth_data2 = gaussian_filter1d(flow_data, 12) x_axis = list(range(len(smooth_data))) """Find spline fit equation for smoothed timeseries, and find derivative of spline""" spl = ip.UnivariateSpline(x_axis, smooth_data, k=3, s=3) spl_first = spl.derivative(1) max_flow_data = max(smooth_data[:366]) max_flow_index = find_index(smooth_data, max_flow_data) """Find the major peaks of the filtered data""" mean_flow = np.nanmean(flow_data) maxarray, minarray = peakdet(smooth_data, mean_flow * peak_sensitivity) """Set search range after last smoothed peak flow""" for flow_index in reversed(maxarray): if int(flow_index[0]) < max_peak_flow_date: max_flow_index = int(flow_index[0]) break """Set a magnitude threshold below which start of summer can begin""" min_flow_data = min(smooth_data[max_flow_index:366]) threshold = min_flow_data + (smooth_data[max_flow_index] - min_flow_data) * min_summer_flow_percent current_sensitivity = 1 / sensitivity start_dates[-1] = None for index, data in enumerate(smooth_data): if index == len(smooth_data) - 2: break """Search criteria: derivative is under rate of change threshold, date is after last major peak, and flow is less than specified percent of smoothed max flow""" if abs( spl_first(index) ) < max_flow_data * current_sensitivity and index > max_flow_index and data < threshold: start_dates[-1] = index break _summer_baseflow_plot(x_axis, column_number, flow_data, spl, spl_first, start_dates, threshold, max_flow_index, maxarray, smooth_data, smooth_data2) return start_dates
def calc_spring_transition_timing_magnitude(flow_matrix): max_zero_allowed_per_year = spring_params['max_zero_allowed_per_year'] max_nan_allowed_per_year = spring_params['max_nan_allowed_per_year'] max_peak_flow_date = spring_params[ 'max_peak_flow_date'] # max search date for the peak flow date search_window_left = spring_params[ 'search_window_left'] # left side of search window set around max peak search_window_right = spring_params[ 'search_window_right'] # right side of search window set around max peak peak_sensitivity = spring_params[ 'peak_sensitivity'] # smaller => more peaks detection peak_filter_percentage = spring_params[ 'peak_filter_percentage'] # Relative flow (Q-Qmin) of start of spring must be certain percentage of peak relative flow (Qmax-Qmin) min_max_flow_rate = spring_params['min_max_flow_rate'] window_sigma = spring_params[ 'window_sigma'] # Heavy filter to identify major peaks in entire water year fit_sigma = spring_params[ 'fit_sigma'] # Smaller filter to identify small peaks in windowed data (smaller sigma val => less filter) sensitivity = spring_params[ 'sensitivity'] # 0.1 - 10, 0.1 being the most sensitive min_percentage_of_max_flow = spring_params[ 'min_percentage_of_max_flow'] # the detected date's flow has be certain percetage of the max flow in that region lag_time = spring_params['lag_time'] timings = [] magnitudes = [] for column_number, column_flow in enumerate(flow_matrix[0]): current_sensitivity = sensitivity / 1000 timings.append(None) magnitudes.append(None) """Check to see if water year has more than allowed nan or zeros""" if np.isnan(flow_matrix[:, column_number]).sum( ) > max_nan_allowed_per_year or np.count_nonzero( flow_matrix[:, column_number] == 0) > max_zero_allowed_per_year: continue """Get flow data and interpolate between None values""" flow_data = flow_matrix[:, column_number] flow_data = replace_nan(flow_data) x_axis = list(range( len(flow_data))) # Extract for use in optional plotting """Using Gaussian with heavy sigma to smooth the curve""" filter_data = gaussian_filter1d(flow_data, window_sigma) """Find the peaks and valleys of the filtered data""" mean_flow = np.nanmean(filter_data) maxarray, minarray = peakdet( filter_data, mean_flow * peak_sensitivity ) # Returns array with the index and flow magnitude for each peak and valley """Find the max flow in the curve and determine flow range requirements""" max_flow = np.nanmax(filter_data) max_flow_index = find_index(filter_data, max_flow) min_flow = np.nanmin(filter_data) flow_range = max_flow - min_flow """Identify rightmost peak that fulfills date and magnitude requirements""" for flow_index in reversed(maxarray): if int(flow_index[0]) < max_peak_flow_date and ( flow_index[1] - min_flow) / flow_range > peak_filter_percentage: max_flow_index = int(flow_index[0]) break if np.nanmax(filter_data) < min_max_flow_rate: """Set start of spring index to the max flow index, when the annual max flow is below certain threshold. This is used for extremely low flows where data appears to be stepwise """ max_filter_data = np.nanmax(flow_data) timings[-1] = find_index(flow_data, max_filter_data) magnitudes[-1] = max_filter_data else: if max_flow_index < search_window_left: search_window_left = 0 if max_flow_index > 366 - search_window_right: search_window_right = 366 - max_flow_index """Get indices of windowed data""" max_flow_index_window = max( flow_data[max_flow_index - search_window_left:max_flow_index + search_window_right]) timings[-1] = find_index(flow_data, max_flow_index_window) magnitudes[-1] = max_flow_index_window """Gaussian filter again on the windowed data (smaller filter this time)""" x_axis_window = list( range(max_flow_index - search_window_left, max_flow_index + search_window_right)) flow_data_window = gaussian_filter1d( flow_data[max_flow_index - search_window_left:max_flow_index + search_window_right], fit_sigma) """Fit a spline on top of the Gaussian curve""" if len(flow_data_window) < 50: continue spl = ip.UnivariateSpline(x_axis_window, flow_data_window, k=3, s=3) """Calculate the first derivative of the spline""" spl_first_deriv = spl.derivative(1) """Find where the derivative of the spline crosses zero""" index_zeros = crossings_nonzero_all(spl_first_deriv(x_axis_window)) """Offset the new index""" new_index = [] for index in index_zeros: new_index.append(max_flow_index - search_window_left + index) """Loop through the indices where derivative=0, from right to left""" for i in reversed(new_index): threshold = max(spl_first_deriv(x_axis_window)) max_flow_window = max(spl(x_axis_window)) min_flow_window = min(spl(x_axis_window)) range_window = max_flow_window - min_flow_window """Set spring timing as index which fulfills the following requirements""" if spl(i) - spl( i - 1 ) > threshold * current_sensitivity * 1 and spl(i - 1) - spl( i - 2 ) > threshold * current_sensitivity * 2 and spl(i - 2) - spl( i - 3 ) > threshold * current_sensitivity * 3 and spl(i - 3) - spl( i - 4) > threshold * current_sensitivity * 4 and ( spl(i) - min_flow_window ) / range_window > min_percentage_of_max_flow: timings[-1] = i break """Check if timings is before max flow index""" if timings[-1] < max_flow_index: timings[-1] = max_flow_index + lag_time """Find max flow 4 days before and 7 days ahead. Assign as new start date""" if len(flow_data[timings[-1] - 4:timings[-1] + 7]) > 10: max_flow_window_new = max(flow_data[timings[-1] - 4:timings[-1] + 7]) new_timings = find_index( flow_data[timings[-1] - 4:timings[-1] + 7], max_flow_window_new) timings[-1] = timings[-1] - 4 + new_timings + lag_time magnitudes[-1] = max_flow_window_new # _spring_transition_plotter(x_axis, flow_data, filter_data, x_axis_window, spl_first_deriv, new_index, max_flow_index, timings, search_window_left, search_window_right, spl, column_number, maxarray) return timings, magnitudes
def calc_spring_transition_timing_magnitude(flow_matrix, class_number, summer_timings, spring_params=def_spring_params): max_zero_allowed_per_year = spring_params['max_zero_allowed_per_year'] max_nan_allowed_per_year = spring_params['max_nan_allowed_per_year'] # max search date for the peak flow date max_peak_flow_date = spring_params['max_peak_flow_date'] # left side of search window set around max peak search_window_left = spring_params['search_window_left'] # right side of search window set around max peak search_window_right = spring_params['search_window_right'] # smaller => more peaks detection peak_sensitivity = spring_params['peak_sensitivity'] # Relative flow (Q-Qmin) of start of spring must be certain percentage of peak relative flow (Qmax-Qmin) peak_filter_percentage = spring_params['peak_filter_percentage'] min_max_flow_rate = spring_params['min_max_flow_rate'] # Heavy filter to identify major peaks in entire water year window_sigma = spring_params['window_sigma'] # Smaller filter to identify small peaks in windowed data (smaller sigma val => less filter) fit_sigma = spring_params['fit_sigma'] # 0.1 - 10, 0.1 being the most sensitive sensitivity = spring_params['sensitivity'] # the detected date's flow has be certain percetage of the max flow in that region min_percentage_of_max_flow = spring_params['min_percentage_of_max_flow'] lag_time = spring_params['lag_time'] timing_cutoff = spring_params['timing_cutoff'] # Don't calculate flow metrics if max flow is befow this value. min_flow_rate = spring_params['min_flow_rate'] timings = [] magnitudes = [] for column_number, _ in enumerate(flow_matrix[0]): timings.append(None) magnitudes.append(None) """Check to see if water year has more than allowed nan or zeros""" if np.isnan(flow_matrix[:, column_number]).sum( ) > max_nan_allowed_per_year or np.count_nonzero( flow_matrix[:, column_number] == 0) > max_zero_allowed_per_year or max( flow_matrix[:, column_number]) < min_flow_rate: continue """Get flow data and interpolate between None values""" flow_data = flow_matrix[:, column_number] flow_data = replace_nan(flow_data) # Extract for use in optional plotting x_axis = list(range(len(flow_data))) current_sensitivity = sensitivity / 1000 """Reduce sensitivity in rain-dominated gages""" if class_number == 4 or 6 or 7 or 8: max_peak_flow_date = 255 """Use specialized smoothing sigma values for rain-dominated classes""" if class_number == 7: window_sigma = 2 if class_number == 6: window_sigma = 2.5 if class_number == 8: window_sigma = 2.5 if class_number == 4: window_sigma = 2.5 """Reduce the minimum flow magnitude requirement for rain-dominated classes""" if class_number == 4 or 6 or 7 or 8: min_percentage_of_max_flow = .05 """Using Gaussian with heavy sigma to smooth the curve""" filter_data = gaussian_filter1d(flow_data, window_sigma) """Find the peaks and valleys of the filtered data""" mean_flow = np.nanmean(filter_data) # Returns array with the index and flow magnitude for each peak and valley maxarray, minarray = peakdet(filter_data, mean_flow * peak_sensitivity) """Find the max flow in the curve and determine flow range requirements""" max_flow = np.nanmax(filter_data) max_flow_index = find_index(filter_data, max_flow) min_flow = np.nanmin(filter_data) flow_range = max_flow - min_flow """Use specialized relative peak magnitude requirements for rain-dominated classes""" if class_number == 7: peak_filter_percentage = 0.05 min_percentage_of_max_flow = 0.05 if class_number == 6: peak_filter_percentage = .12 min_percentage_of_max_flow = 0.12 if class_number == 8: peak_filter_percentage = .15 min_percentage_of_max_flow = .15 if class_number == 4: peak_filter_percentage = .10 min_percentage_of_max_flow = .10 """Identify rightmost peak that fulfills date and magnitude requirements""" for counter, flow_index in enumerate(reversed(maxarray)): if int(flow_index[0]) < max_peak_flow_date and ( flow_index[1] - min_flow) / flow_range > peak_filter_percentage: max_flow_index = int(flow_index[0]) break if np.nanmax(filter_data) < min_max_flow_rate: """Set start of spring index to the max flow index, when the annual max flow is below certain threshold. This is used for extremely low flows where seasonal timings are harder to find """ max_filter_data = np.nanmax(flow_data) timings[-1] = find_index(flow_data, max_filter_data) magnitudes[-1] = max_filter_data else: if max_flow_index < search_window_left: current_search_window_left = 0 else: current_search_window_left = search_window_left if max_flow_index > 366 - search_window_right: current_search_window_right = 366 - max_flow_index else: current_search_window_right = search_window_right """Get indices of windowed data""" max_flow_index_window = max( flow_data[max_flow_index - current_search_window_left:max_flow_index + current_search_window_right]) timings[-1] = find_index(flow_data, max_flow_index_window) magnitudes[-1] = max_flow_index_window """Gaussian filter again on the windowed data (smaller filter this time)""" x_axis_window = list( range(max_flow_index - current_search_window_left, max_flow_index + current_search_window_right)) flow_data_window = gaussian_filter1d( flow_data[max_flow_index - current_search_window_left:max_flow_index + current_search_window_right], fit_sigma) """If search window is too small, move on to next value in maxarray. If it is the last value in maxarray, proceed inside loop""" if len(flow_data_window) < 50: if counter != len(maxarray) - 1: continue """Fit a spline on top of the Gaussian curve""" spl = ip.UnivariateSpline(x_axis_window, flow_data_window, k=3, s=3) """Calculate the first derivative of the spline""" spl_first_deriv = spl.derivative(1) """Find where the derivative of the spline crosses zero""" index_zeros = crossings_nonzero_all(spl_first_deriv(x_axis_window)) """Offset the new index""" new_index = [] for index in index_zeros: new_index.append(max_flow_index - current_search_window_left + index) """Loop through the indices where derivative=0, from right to left""" for i in reversed(new_index): threshold = max(spl_first_deriv(x_axis_window)) max_flow_window = max(spl(x_axis_window)) min_flow_window = min(spl(x_axis_window)) range_window = max_flow_window - min_flow_window """Set spring timing as index which fulfills the following requirements""" if summer_timings[ column_number] is not None and i < summer_timings[ column_number] and i > timing_cutoff and spl( i ) - spl( i - 1 ) > threshold * current_sensitivity * 1 and spl( i - 1 ) - spl( i - 2 ) > threshold * current_sensitivity * 2 and spl( i - 2 ) - spl( i - 3 ) > threshold * current_sensitivity * 3 and spl( i - 3) - spl( i - 4 ) > threshold * current_sensitivity * 4 and ( spl(i) - min_flow_window ) / range_window > min_percentage_of_max_flow: timings[-1] = i break """Check if timings is before max flow index""" if timings[ -1] < max_flow_index: # replace max flow index with cutoff date timings[-1] = max_flow_index + lag_time """Find max flow 4 days before and 7 days ahead. Assign as new start date""" if len(flow_data[timings[-1] - 4:timings[-1] + 7]) > 10: max_flow_window_new = max(flow_data[timings[-1] - 4:timings[-1] + 7]) new_timings = find_index( flow_data[timings[-1] - 4:timings[-1] + 7], max_flow_window_new) timings[-1] = timings[-1] - 4 + new_timings + lag_time magnitudes[-1] = max_flow_window_new if summer_timings[column_number] is not None and timings[ -1] > summer_timings[column_number]: timings[-1] = None # _spring_transition_plotter(x_axis, flow_data, filter_data, x_axis_window, spl_first_deriv, new_index, max_flow_index, timings, current_search_window_left, current_search_window_right, spl, column_number, maxarray) return timings, magnitudes
if this < mx - delta: maxtab.append((mxpos, mx)) mn = this mnpos = x[i] lookformax = False else: if this > mn + delta: mintab.append((mnpos, mn)) mx = this mxpos = x[i] lookformax = True return array(maxtab), array(mintab) #inconsistent use of tabs and spaces in indentation- NOT WORKING maxarray, minarray = peakdet(smooth_df, mean_flow * peak_sensitivity) def low_flow(df): for flow_index in range(1, len(df)): #going to slow things down a lot... if int(i[0]) < flow_threshold: max_flow_index = int(flow_index[0]) #if (df<flow_threshold) & ((df[i+1]-df[i-1])/2 <0.5) #ix=(df<flow_threshold) & ((df[i+1]-df[i-1])/2 <0.5) #doesn't work currently return max_flow_index #want start date #%% ''' Create df of only Dry Season''' drystartm = 6 #will want to reference a summer start function later on drystartd = 1
def calc_start_of_summer(matrix, class_number, summer_params=def_summer_params): """Set adjustable parameters for start of summer date detection""" params = set_user_params(summer_params, def_summer_params) max_zero_allowed_per_year, max_nan_allowed_per_year, sigma, sensitivity, peak_sensitivity, max_peak_flow_date, min_summer_flow_percent, min_flow_rate = params.values( ) start_dates = [] for column_number, flow_data in enumerate(matrix[0]): start_dates.append(None) """Check if data has too many zeros or NaN, and if so skip to next water year""" if pd.isnull(matrix[:, column_number]).sum( ) > max_nan_allowed_per_year or np.count_nonzero( matrix[:, column_number] == 0) > max_zero_allowed_per_year or max( matrix[:, column_number]) < min_flow_rate: continue """Append each column with 30 more days from next column, except the last column""" if column_number != len(matrix[0]) - 1: flow_data = list(matrix[:, column_number]) + \ list(matrix[:100, column_number+1]) else: flow_data = matrix[:, column_number] """Replace any NaNs with previous day's flow""" flow_data = replace_nan(flow_data) """Set specific parameters for rain-dominated classes""" if class_number == 4 or 6 or 7 or 8: sensitivity = 1100 peak_sensitivity = .1 sigma = 4 """Smooth out the timeseries""" smooth_data = gaussian_filter1d(flow_data, sigma) x_axis = list(range(len(smooth_data))) """Find spline fit equation for smoothed timeseries, and find derivative of spline""" spl = ip.UnivariateSpline(x_axis, smooth_data, k=3, s=3) spl_first = spl.derivative(1) max_flow_data = max(smooth_data[:366]) max_flow_index = find_index(smooth_data, max_flow_data) """Find the major peaks of the filtered data""" mean_flow = np.nanmean(flow_data) maxarray, minarray = peakdet(smooth_data, mean_flow * peak_sensitivity) """Set search range after last smoothed peak flow""" for flow_index in reversed(maxarray): if int(flow_index[0]) < max_peak_flow_date: max_flow_index = int(flow_index[0]) break """Set a magnitude threshold below which start of summer can begin""" min_flow_data = min(smooth_data[max_flow_index:366]) threshold = min_flow_data + \ (smooth_data[max_flow_index] - min_flow_data) * \ min_summer_flow_percent current_sensitivity = 1 / sensitivity start_dates[-1] = None for index, data in enumerate(smooth_data): if index == len(smooth_data) - 2: break """Search criteria: derivative is under rate of change threshold, date is after last major peak, and flow is less than specified percent of smoothed max flow""" if abs( spl_first(index) ) < max_flow_data * current_sensitivity and index > max_flow_index and data < threshold: start_dates[-1] = index break # _summer_baseflow_plot(x_axis, column_number, flow_data, spl, spl_first, start_dates, threshold, max_flow_index, maxarray) return start_dates